gcc/config/arm/arm.cc

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2025 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #define IN_TARGET_CODE 1
  24
  25 #include "config.h"
  26 #define INCLUDE_STRING
  27 #include "system.h"
  28 #include "coretypes.h"
  29 #include "backend.h"
  30 #include "target.h"
  31 #include "rtl.h"
  32 #include "tree.h"
  33 #include "memmodel.h"
  34 #include "cfghooks.h"
  35 #include "cfgloop.h"
  36 #include "df.h"
  37 #include "tm_p.h"
  38 #include "stringpool.h"
  39 #include "attribs.h"
  40 #include "optabs.h"
  41 #include "regs.h"
  42 #include "emit-rtl.h"
  43 #include "recog.h"
  44 #include "cgraph.h"
  45 #include "diagnostic-core.h"
  46 #include "alias.h"
  47 #include "fold-const.h"
  48 #include "stor-layout.h"
  49 #include "calls.h"
  50 #include "varasm.h"
  51 #include "output.h"
  52 #include "insn-attr.h"
  53 #include "flags.h"
  54 #include "reload.h"
  55 #include "explow.h"
  56 #include "expr.h"
  57 #include "cfgrtl.h"
  58 #include "sched-int.h"
  59 #include "common/common-target.h"
  60 #include "langhooks.h"
  61 #include "intl.h"
  62 #include "libfuncs.h"
  63 #include "opts.h"
  64 #include "dumpfile.h"
  65 #include "target-globals.h"
  66 #include "builtins.h"
  67 #include "tm-constrs.h"
  68 #include "rtl-iter.h"
  69 #include "optabs-libfuncs.h"
  70 #include "gimplify.h"
  71 #include "gimple.h"
  72 #include "gimple-iterator.h"
  73 #include "selftest.h"
  74 #include "tree-vectorizer.h"
  75 #include "opts.h"
  76 #include "aarch-common.h"
  77 #include "aarch-common-protos.h"
  78 #include "machmode.h"
  79
  80 /* This file should be included last.  */
  81 #include "target-def.h"
  82
  83 /* Forward definitions of types.  */
  84 typedef struct minipool_node    Mnode;
  85 typedef struct minipool_fixup   Mfix;
  86
  87 void (*arm_lang_output_object_attributes_hook)(void);
  88
  89 struct four_ints
  90 {
  91   int i[4];
  92 };
  93
  94 /* Forward function declarations.  */
  95 static bool arm_const_not_ok_for_debug_p (rtx);
  96 static int arm_needs_doubleword_align (machine_mode, const_tree);
  97 static int arm_compute_static_chain_stack_bytes (void);
  98 static arm_stack_offsets *arm_get_frame_offsets (void);
  99 static void arm_compute_frame_layout (void);
 100 static void arm_add_gc_roots (void);
 101 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 102                              unsigned HOST_WIDE_INT, rtx, rtx, int, int);
 103 static unsigned bit_count (unsigned long);
 104 static unsigned bitmap_popcount (const sbitmap);
 105 static int arm_address_register_rtx_p (rtx, int);
 106 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 107 static bool is_called_in_ARM_mode (tree);
 108 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 109 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 110 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 111 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 112 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 113 inline static int thumb1_index_register_rtx_p (rtx, int);
 114 static int thumb_far_jump_used_p (void);
 115 static bool thumb_force_lr_save (void);
 116 static unsigned arm_size_return_regs (void);
 117 static bool arm_assemble_integer (rtx, unsigned int, int);
 118 static void arm_print_operand (FILE *, rtx, int);
 119 static void arm_print_operand_address (FILE *, machine_mode, rtx);
 120 static bool arm_print_operand_punct_valid_p (unsigned char code);
 121 static arm_cc get_arm_condition_code (rtx);
 122 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
 123 static const char *output_multi_immediate (rtx *, const char *, const char *,
 124                                            int, HOST_WIDE_INT);
 125 static const char *shift_op (rtx, HOST_WIDE_INT *);
 126 static struct machine_function *arm_init_machine_status (void);
 127 static void thumb_exit (FILE *, int);
 128 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 129 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 130 static Mnode *add_minipool_forward_ref (Mfix *);
 131 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 132 static Mnode *add_minipool_backward_ref (Mfix *);
 133 static void assign_minipool_offsets (Mfix *);
 134 static void arm_print_value (FILE *, rtx);
 135 static void dump_minipool (rtx_insn *);
 136 static int arm_barrier_cost (rtx_insn *);
 137 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 138 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 139 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 140                                machine_mode, rtx);
 141 static void arm_reorg (void);
 142 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 143 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 144 static unsigned long arm_compute_save_core_reg_mask (void);
 145 static unsigned long arm_isr_value (tree);
 146 static unsigned long arm_compute_func_type (void);
 147 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 148 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 149 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 151 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 152 #endif
 153 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 154 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 155 static void arm_output_function_epilogue (FILE *);
 156 static void arm_output_function_prologue (FILE *);
 157 static int arm_comp_type_attributes (const_tree, const_tree);
 158 static void arm_set_default_type_attributes (tree);
 159 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
 160 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 161 static int optimal_immediate_sequence (enum rtx_code code,
 162                                        unsigned HOST_WIDE_INT val,
 163                                        struct four_ints *return_sequence);
 164 static int optimal_immediate_sequence_1 (enum rtx_code code,
 165                                          unsigned HOST_WIDE_INT val,
 166                                          struct four_ints *return_sequence,
 167                                          int i);
 168 static int arm_get_strip_length (int);
 169 static bool arm_function_ok_for_sibcall (tree, tree);
 170 static machine_mode arm_promote_function_mode (const_tree,
 171                                                     machine_mode, int *,
 172                                                     const_tree, int);
 173 static bool arm_return_in_memory (const_tree, const_tree);
 174 static rtx arm_function_value (const_tree, const_tree, bool);
 175 static rtx arm_libcall_value_1 (machine_mode);
 176 static rtx arm_libcall_value (machine_mode, const_rtx);
 177 static bool arm_function_value_regno_p (const unsigned int);
 178 static void arm_internal_label (FILE *, const char *, unsigned long);
 179 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 180                                  tree);
 181 static bool arm_have_conditional_execution (void);
 182 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 183 static bool arm_legitimate_constant_p (machine_mode, rtx);
 184 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 185 static int arm_insn_cost (rtx_insn *, bool);
 186 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 187 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 188 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 189 static void emit_constant_insn (rtx cond, rtx pattern);
 190 static rtx_insn *emit_set_insn (rtx, rtx);
 191 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
 192 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 193 static void arm_emit_multi_reg_pop (unsigned long);
 194 static int vfp_emit_fstmd (int, int);
 195 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
 196 static int arm_arg_partial_bytes (cumulative_args_t,
 197                                   const function_arg_info &);
 198 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
 199 static void arm_function_arg_advance (cumulative_args_t,
 200                                       const function_arg_info &);
 201 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
 202 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 203 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 204                                       const_tree);
 205 static rtx aapcs_libcall_value (machine_mode);
 206 static int aapcs_select_return_coproc (const_tree, const_tree);
 207
 208 #ifdef OBJECT_FORMAT_ELF
 209 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 210 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 211 #endif
 212 static void arm_encode_section_info (tree, rtx, int);
 213
 214 static void arm_file_end (void);
 215 static void arm_file_start (void);
 216 static void arm_insert_attributes (tree, tree *);
 217
 218 static void arm_setup_incoming_varargs (cumulative_args_t,
 219                                         const function_arg_info &, int *, int);
 220 static bool arm_pass_by_reference (cumulative_args_t,
 221                                    const function_arg_info &);
 222 static bool arm_promote_prototypes (const_tree);
 223 static bool arm_default_short_enums (void);
 224 static bool arm_align_anon_bitfield (void);
 225 static bool arm_return_in_msb (const_tree);
 226 static bool arm_must_pass_in_stack (const function_arg_info &);
 227 static bool arm_return_in_memory (const_tree, const_tree);
 228 #if ARM_UNWIND_INFO
 229 static void arm_unwind_emit (FILE *, rtx_insn *);
 230 static bool arm_output_ttype (rtx);
 231 static void arm_asm_emit_except_personality (rtx);
 232 #endif
 233 static void arm_asm_init_sections (void);
 234 static rtx arm_dwarf_register_span (rtx);
 235
 236 static tree arm_cxx_guard_type (void);
 237 static bool arm_cxx_guard_mask_bit (void);
 238 static tree arm_get_cookie_size (tree);
 239 static bool arm_cookie_has_size (void);
 240 static bool arm_cxx_cdtor_returns_this (void);
 241 static bool arm_cxx_key_method_may_be_inline (void);
 242 static void arm_cxx_determine_class_data_visibility (tree);
 243 static bool arm_cxx_class_data_always_comdat (void);
 244 static bool arm_cxx_use_aeabi_atexit (void);
 245 static void arm_init_libfuncs (void);
 246 static tree arm_build_builtin_va_list (void);
 247 static void arm_expand_builtin_va_start (tree, rtx);
 248 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 249 static void arm_option_override (void);
 250 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
 251                                 struct cl_target_option *);
 252 static void arm_override_options_after_change (void);
 253 static void arm_option_print (FILE *, int, struct cl_target_option *);
 254 static void arm_set_current_function (tree);
 255 static bool arm_can_inline_p (tree, tree);
 256 static void arm_relayout_function (tree);
 257 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
 258 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 259 static bool arm_sched_can_speculate_insn (rtx_insn *);
 260 static bool arm_macro_fusion_p (void);
 261 static bool arm_cannot_copy_insn_p (rtx_insn *);
 262 static int arm_issue_rate (void);
 263 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
 264 static int arm_first_cycle_multipass_dfa_lookahead (void);
 265 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
 266 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 267 static bool arm_output_addr_const_extra (FILE *, rtx);
 268 static bool arm_allocate_stack_slots_for_args (void);
 269 static bool arm_warn_func_return (tree);
 270 static tree arm_promoted_type (const_tree t);
 271 static bool arm_scalar_mode_supported_p (scalar_mode);
 272 static bool arm_frame_pointer_required (void);
 273 static bool arm_can_eliminate (const int, const int);
 274 static void arm_asm_trampoline_template (FILE *);
 275 static void arm_trampoline_init (rtx, tree, rtx);
 276 static rtx arm_trampoline_adjust_address (rtx);
 277 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 278 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 279 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 280 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 281 static opt_machine_mode arm_array_mode (machine_mode, unsigned HOST_WIDE_INT);
 282 static bool arm_array_mode_supported_p (machine_mode,
 283                                         unsigned HOST_WIDE_INT);
 284 static machine_mode arm_preferred_simd_mode (scalar_mode);
 285 static bool arm_class_likely_spilled_p (reg_class_t);
 286 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 287 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 288 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 289                                                      const_tree type,
 290                                                      int misalignment,
 291                                                      bool is_packed);
 292 static void arm_conditional_register_usage (void);
 293 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 294 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 295 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
 296 static int arm_default_branch_cost (bool, bool);
 297 static int arm_cortex_a5_branch_cost (bool, bool);
 298 static int arm_cortex_m_branch_cost (bool, bool);
 299 static int arm_cortex_m7_branch_cost (bool, bool);
 300
 301 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
 302                                           rtx, const vec_perm_indices &);
 303
 304 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
 305
 306 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 307                                            tree vectype,
 308                                            int misalign ATTRIBUTE_UNUSED);
 309
 310 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 311                                          bool op0_preserve_value);
 312 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 313
 314 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 315 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
 316                                      const_tree);
 317 static section *arm_function_section (tree, enum node_frequency, bool, bool);
 318 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
 319 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
 320                                                 int reloc);
 321 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
 322 static opt_scalar_float_mode arm_floatn_mode (int, bool);
 323 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
 324 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
 325 static bool arm_modes_tieable_p (machine_mode, machine_mode);
 326 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
 327 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
 328                                        vec<machine_mode> &,
 329                                        vec<const char *> &, vec<rtx> &,
 330                                        vec<rtx> &, HARD_REG_SET &, location_t);
 331 static const char *arm_identify_fpu_from_isa (sbitmap);
 332 \f
 333 /* Table of machine attributes.  */
 334 static const attribute_spec arm_gnu_attributes[] =
 335 {
 336   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
 337        affects_type_identity, handler, exclude } */
 338   /* Function calls made to this symbol must be done indirectly, because
 339      it may lie outside of the 26 bit addressing range of a normal function
 340      call.  */
 341   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
 342   /* Whereas these functions are always known to reside within the 26 bit
 343      addressing range.  */
 344   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
 345   /* Specify the procedure call conventions for a function.  */
 346   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
 347     NULL },
 348   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 349   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
 350     NULL },
 351   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
 352     NULL },
 353   { "naked",        0, 0, true,  false, false, false,
 354     arm_handle_fndecl_attribute, NULL },
 355 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 356   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
 357     NULL },
 358   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
 359     NULL },
 360   { "notshared",    0, 0, false, true, false, false,
 361     arm_handle_notshared_attribute, NULL },
 362 #endif
 363   /* ARMv8-M Security Extensions support.  */
 364   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
 365     arm_handle_cmse_nonsecure_entry, NULL },
 366   { "cmse_nonsecure_call", 0, 0, false, false, false, true,
 367     arm_handle_cmse_nonsecure_call, NULL },
 368   { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }
 369 };
 370
 371 static const scoped_attribute_specs arm_gnu_attribute_table =
 372 {
 373   "gnu", { arm_gnu_attributes }
 374 };
 375
 376 static const scoped_attribute_specs *const arm_attribute_table[] =
 377 {
 378   &arm_gnu_attribute_table
 379 };
 380 \f
 381 /* Initialize the GCC target structure.  */
 382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 383 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 385 #endif
 386
 387 #undef TARGET_CHECK_BUILTIN_CALL
 388 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
 389
 390 #undef TARGET_LEGITIMIZE_ADDRESS
 391 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 392
 393 #undef  TARGET_ATTRIBUTE_TABLE
 394 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 395
 396 #undef  TARGET_INSERT_ATTRIBUTES
 397 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
 398
 399 #undef TARGET_ASM_FILE_START
 400 #define TARGET_ASM_FILE_START arm_file_start
 401 #undef TARGET_ASM_FILE_END
 402 #define TARGET_ASM_FILE_END arm_file_end
 403
 404 #undef  TARGET_ASM_ALIGNED_SI_OP
 405 #define TARGET_ASM_ALIGNED_SI_OP NULL
 406 #undef  TARGET_ASM_INTEGER
 407 #define TARGET_ASM_INTEGER arm_assemble_integer
 408
 409 #undef TARGET_PRINT_OPERAND
 410 #define TARGET_PRINT_OPERAND arm_print_operand
 411 #undef TARGET_PRINT_OPERAND_ADDRESS
 412 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 413 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 414 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 415
 416 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 417 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 418
 419 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 420 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 421
 422 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 423 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 424
 425 #undef TARGET_CAN_INLINE_P
 426 #define TARGET_CAN_INLINE_P arm_can_inline_p
 427
 428 #undef TARGET_RELAYOUT_FUNCTION
 429 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
 430
 431 #undef  TARGET_OPTION_OVERRIDE
 432 #define TARGET_OPTION_OVERRIDE arm_option_override
 433
 434 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
 435 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
 436
 437 #undef TARGET_OPTION_RESTORE
 438 #define TARGET_OPTION_RESTORE arm_option_restore
 439
 440 #undef TARGET_OPTION_PRINT
 441 #define TARGET_OPTION_PRINT arm_option_print
 442
 443 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 445
 446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
 447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
 448
 449 #undef TARGET_SCHED_MACRO_FUSION_P
 450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
 451
 452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
 453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
 454
 455 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 457
 458 #undef  TARGET_SCHED_ADJUST_COST
 459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 460
 461 #undef TARGET_SET_CURRENT_FUNCTION
 462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
 463
 464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
 465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
 466
 467 #undef TARGET_SCHED_REORDER
 468 #define TARGET_SCHED_REORDER arm_sched_reorder
 469
 470 #undef TARGET_REGISTER_MOVE_COST
 471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 472
 473 #undef TARGET_MEMORY_MOVE_COST
 474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 475
 476 #undef TARGET_ENCODE_SECTION_INFO
 477 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 478
 479 #undef  TARGET_STRIP_NAME_ENCODING
 480 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 481
 482 #undef  TARGET_ASM_INTERNAL_LABEL
 483 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 484
 485 #undef TARGET_FLOATN_MODE
 486 #define TARGET_FLOATN_MODE arm_floatn_mode
 487
 488 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 489 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 490
 491 #undef  TARGET_FUNCTION_VALUE
 492 #define TARGET_FUNCTION_VALUE arm_function_value
 493
 494 #undef  TARGET_LIBCALL_VALUE
 495 #define TARGET_LIBCALL_VALUE arm_libcall_value
 496
 497 #undef TARGET_FUNCTION_VALUE_REGNO_P
 498 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 499
 500 #undef TARGET_GIMPLE_FOLD_BUILTIN
 501 #define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
 502
 503 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 504 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 505 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 506 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
 507
 508 #undef  TARGET_RTX_COSTS
 509 #define TARGET_RTX_COSTS arm_rtx_costs
 510 #undef  TARGET_ADDRESS_COST
 511 #define TARGET_ADDRESS_COST arm_address_cost
 512 #undef TARGET_INSN_COST
 513 #define TARGET_INSN_COST arm_insn_cost
 514
 515 #undef TARGET_SHIFT_TRUNCATION_MASK
 516 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 517 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 518 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 519 #undef TARGET_ARRAY_MODE
 520 #define TARGET_ARRAY_MODE arm_array_mode
 521 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 522 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 523 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 524 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 525 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
 526 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
 527   arm_autovectorize_vector_modes
 528
 529 #undef  TARGET_MACHINE_DEPENDENT_REORG
 530 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 531
 532 #undef  TARGET_INIT_BUILTINS
 533 #define TARGET_INIT_BUILTINS  arm_init_builtins
 534 #undef  TARGET_EXPAND_BUILTIN
 535 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 536 #undef  TARGET_BUILTIN_DECL
 537 #define TARGET_BUILTIN_DECL arm_builtin_decl
 538
 539 #undef TARGET_INIT_LIBFUNCS
 540 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 541
 542 #undef TARGET_PROMOTE_FUNCTION_MODE
 543 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 544 #undef TARGET_PROMOTE_PROTOTYPES
 545 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 546 #undef TARGET_PASS_BY_REFERENCE
 547 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 548 #undef TARGET_ARG_PARTIAL_BYTES
 549 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 550 #undef TARGET_FUNCTION_ARG
 551 #define TARGET_FUNCTION_ARG arm_function_arg
 552 #undef TARGET_FUNCTION_ARG_ADVANCE
 553 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 554 #undef TARGET_FUNCTION_ARG_PADDING
 555 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
 556 #undef TARGET_FUNCTION_ARG_BOUNDARY
 557 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 558
 559 #undef  TARGET_SETUP_INCOMING_VARARGS
 560 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 561
 562 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 563 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 564
 565 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 566 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 567 #undef TARGET_TRAMPOLINE_INIT
 568 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 569 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 570 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 571
 572 #undef TARGET_WARN_FUNC_RETURN
 573 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 574
 575 #undef TARGET_DEFAULT_SHORT_ENUMS
 576 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 577
 578 #undef TARGET_ALIGN_ANON_BITFIELD
 579 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 580
 581 #undef TARGET_NARROW_VOLATILE_BITFIELD
 582 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 583
 584 #undef TARGET_CXX_GUARD_TYPE
 585 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 586
 587 #undef TARGET_CXX_GUARD_MASK_BIT
 588 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 589
 590 #undef TARGET_CXX_GET_COOKIE_SIZE
 591 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 592
 593 #undef TARGET_CXX_COOKIE_HAS_SIZE
 594 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 595
 596 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 597 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 598
 599 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 600 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 601
 602 #undef TARGET_CXX_USE_AEABI_ATEXIT
 603 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 604
 605 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 606 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 607   arm_cxx_determine_class_data_visibility
 608
 609 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 610 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 611
 612 #undef TARGET_RETURN_IN_MSB
 613 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 614
 615 #undef TARGET_RETURN_IN_MEMORY
 616 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 617
 618 #undef TARGET_MUST_PASS_IN_STACK
 619 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 620
 621 #if ARM_UNWIND_INFO
 622 #undef TARGET_ASM_UNWIND_EMIT
 623 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 624
 625 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 626 #undef TARGET_ASM_TTYPE
 627 #define TARGET_ASM_TTYPE arm_output_ttype
 628
 629 #undef TARGET_ARM_EABI_UNWINDER
 630 #define TARGET_ARM_EABI_UNWINDER true
 631
 632 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 633 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 634
 635 #endif /* ARM_UNWIND_INFO */
 636
 637 #undef TARGET_ASM_INIT_SECTIONS
 638 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 639
 640 #undef TARGET_DWARF_REGISTER_SPAN
 641 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 642
 643 #undef  TARGET_CANNOT_COPY_INSN_P
 644 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 645
 646 #ifdef HAVE_AS_TLS
 647 #undef TARGET_HAVE_TLS
 648 #define TARGET_HAVE_TLS true
 649 #endif
 650
 651 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 652 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 653
 654 #undef TARGET_LOOP_UNROLL_ADJUST
 655 #define TARGET_LOOP_UNROLL_ADJUST arm_loop_unroll_adjust
 656
 657 #undef TARGET_PREDICT_DOLOOP_P
 658 #define TARGET_PREDICT_DOLOOP_P arm_predict_doloop_p
 659
 660 #undef TARGET_LEGITIMATE_CONSTANT_P
 661 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 662
 663 #undef TARGET_CANNOT_FORCE_CONST_MEM
 664 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 665
 666 #undef TARGET_MAX_ANCHOR_OFFSET
 667 #define TARGET_MAX_ANCHOR_OFFSET 4095
 668
 669 /* The minimum is set such that the total size of the block
 670    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 671    divisible by eight, ensuring natural spacing of anchors.  */
 672 #undef TARGET_MIN_ANCHOR_OFFSET
 673 #define TARGET_MIN_ANCHOR_OFFSET -4088
 674
 675 #undef TARGET_SCHED_ISSUE_RATE
 676 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 677
 678 #undef TARGET_SCHED_VARIABLE_ISSUE
 679 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
 680
 681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
 683   arm_first_cycle_multipass_dfa_lookahead
 684
 685 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 686 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
 687   arm_first_cycle_multipass_dfa_lookahead_guard
 688
 689 #undef TARGET_MANGLE_TYPE
 690 #define TARGET_MANGLE_TYPE arm_mangle_type
 691
 692 #undef TARGET_INVALID_CONVERSION
 693 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
 694
 695 #undef TARGET_INVALID_UNARY_OP
 696 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
 697
 698 #undef TARGET_INVALID_BINARY_OP
 699 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
 700
 701 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 702 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 703
 704 #undef TARGET_BUILD_BUILTIN_VA_LIST
 705 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 706 #undef TARGET_EXPAND_BUILTIN_VA_START
 707 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 708 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 709 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 710
 711 #ifdef HAVE_AS_TLS
 712 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 713 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 714 #endif
 715
 716 #undef TARGET_LEGITIMATE_ADDRESS_P
 717 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 718
 719 #undef TARGET_PREFERRED_RELOAD_CLASS
 720 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 721
 722 #undef TARGET_PROMOTED_TYPE
 723 #define TARGET_PROMOTED_TYPE arm_promoted_type
 724
 725 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 726 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 727
 728 #undef TARGET_COMPUTE_FRAME_LAYOUT
 729 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
 730
 731 #undef TARGET_FRAME_POINTER_REQUIRED
 732 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 733
 734 #undef TARGET_CAN_ELIMINATE
 735 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 736
 737 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 738 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 739
 740 #undef TARGET_CLASS_LIKELY_SPILLED_P
 741 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 742
 743 #undef TARGET_VECTORIZE_BUILTINS
 744 #define TARGET_VECTORIZE_BUILTINS
 745
 746 #undef TARGET_VECTOR_ALIGNMENT
 747 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 748
 749 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 750 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 751   arm_vector_alignment_reachable
 752
 753 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 754 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 755   arm_builtin_support_vector_misalignment
 756
 757 #undef TARGET_PREFERRED_RENAME_CLASS
 758 #define TARGET_PREFERRED_RENAME_CLASS \
 759   arm_preferred_rename_class
 760
 761 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 762 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
 763
 764 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 765 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 766   arm_builtin_vectorization_cost
 767
 768 #undef TARGET_CANONICALIZE_COMPARISON
 769 #define TARGET_CANONICALIZE_COMPARISON \
 770   arm_canonicalize_comparison
 771
 772 #undef TARGET_ASAN_SHADOW_OFFSET
 773 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 774
 775 #undef MAX_INSN_PER_IT_BLOCK
 776 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 777
 778 #undef TARGET_CAN_USE_DOLOOP_P
 779 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 780
 781 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 782 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 783
 784 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 785 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 786
 787 #undef TARGET_SCHED_FUSION_PRIORITY
 788 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 789
 790 #undef  TARGET_ASM_FUNCTION_SECTION
 791 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
 792
 793 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
 794 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
 795
 796 #undef TARGET_SECTION_TYPE_FLAGS
 797 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
 798
 799 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
 800 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
 801
 802 #undef TARGET_C_EXCESS_PRECISION
 803 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
 804
 805 /* Although the architecture reserves bits 0 and 1, only the former is
 806    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
 807 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
 808 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
 809
 810 #undef TARGET_FIXED_CONDITION_CODE_REGS
 811 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
 812
 813 #undef TARGET_HARD_REGNO_NREGS
 814 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
 815 #undef TARGET_HARD_REGNO_MODE_OK
 816 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
 817
 818 #undef TARGET_MODES_TIEABLE_P
 819 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 820
 821 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
 822 #define TARGET_NOCE_CONVERSION_PROFITABLE_P arm_noce_conversion_profitable_p
 823
 824 #undef TARGET_CAN_CHANGE_MODE_CLASS
 825 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 826
 827 #undef TARGET_CONSTANT_ALIGNMENT
 828 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
 829
 830 #undef TARGET_INVALID_WITHIN_DOLOOP
 831 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
 832
 833 #undef TARGET_MD_ASM_ADJUST
 834 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
 835
 836 #undef TARGET_STACK_PROTECT_GUARD
 837 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
 838
 839 #undef TARGET_VECTORIZE_GET_MASK_MODE
 840 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
 841 \f
 842 /* Obstack for minipool constant handling.  */
 843 static struct obstack minipool_obstack;
 844 static char *         minipool_startobj;
 845
 846 /* The maximum number of insns skipped which
 847    will be conditionalised if possible.  */
 848 static int max_insns_skipped = 5;
 849
 850 /* True if we are currently building a constant table.  */
 851 int making_const_table;
 852
 853 /* The processor for which instructions should be scheduled.  */
 854 enum processor_type arm_tune = TARGET_CPU_arm_none;
 855
 856 /* The current tuning set.  */
 857 const struct tune_params *current_tune;
 858
 859 /* Which floating point hardware to schedule for.  */
 860 int arm_fpu_attr;
 861
 862 /* Used for Thumb call_via trampolines.  */
 863 rtx thumb_call_via_label[14];
 864 static int thumb_call_reg_needed;
 865
 866 /* The bits in this mask specify which instruction scheduling options should
 867    be used.  */
 868 unsigned int tune_flags = 0;
 869
 870 /* The highest ARM architecture version supported by the
 871    target.  */
 872 enum base_architecture arm_base_arch = BASE_ARCH_0;
 873
 874 /* Active target architecture and tuning.  */
 875
 876 struct arm_build_target arm_active_target;
 877
 878 /* The following are used in the arm.md file as equivalents to bits
 879    in the above two flag variables.  */
 880
 881 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 882 int arm_arch4 = 0;
 883
 884 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 885 int arm_arch4t = 0;
 886
 887 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
 888 int arm_arch5t = 0;
 889
 890 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
 891 int arm_arch5te = 0;
 892
 893 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 894 int arm_arch6 = 0;
 895
 896 /* Nonzero if this chip supports the ARM 6K extensions.  */
 897 int arm_arch6k = 0;
 898
 899 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
 900 int arm_arch6kz = 0;
 901
 902 /* Nonzero if instructions present in ARMv6-M can be used.  */
 903 int arm_arch6m = 0;
 904
 905 /* Nonzero if this chip supports the ARM 7 extensions.  */
 906 int arm_arch7 = 0;
 907
 908 /* Nonzero if this chip supports the Large Physical Address Extension.  */
 909 int arm_arch_lpae = 0;
 910
 911 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 912 int arm_arch_notm = 0;
 913
 914 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 915 int arm_arch7em = 0;
 916
 917 /* Nonzero if instructions present in ARMv8 can be used.  */
 918 int arm_arch8 = 0;
 919
 920 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
 921 int arm_arch8_1 = 0;
 922
 923 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
 924 int arm_arch8_2 = 0;
 925
 926 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
 927 int arm_arch8_3 = 0;
 928
 929 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
 930 int arm_arch8_4 = 0;
 931
 932 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
 933    extensions.  */
 934 int arm_arch8m_main = 0;
 935
 936 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
 937    extensions.  */
 938 int arm_arch8_1m_main = 0;
 939
 940 /* Nonzero if this chip supports the FP16 instructions extension of ARM
 941    Architecture 8.2.  */
 942 int arm_fp16_inst = 0;
 943
 944 /* Nonzero if this chip can benefit from load scheduling.  */
 945 int arm_ld_sched = 0;
 946
 947 /* Nonzero if this chip is a StrongARM.  */
 948 int arm_tune_strongarm = 0;
 949
 950 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 951 int arm_arch_iwmmxt = 0;
 952
 953 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 954 int arm_arch_iwmmxt2 = 0;
 955
 956 /* Nonzero if this chip is an XScale.  */
 957 int arm_arch_xscale = 0;
 958
 959 /* Nonzero if tuning for XScale  */
 960 int arm_tune_xscale = 0;
 961
 962 /* Nonzero if we want to tune for stores that access the write-buffer.
 963    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 964 int arm_tune_wbuf = 0;
 965
 966 /* Nonzero if tuning for Cortex-A9.  */
 967 int arm_tune_cortex_a9 = 0;
 968
 969 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 970    preprocessor.
 971    XXX This is a bit of a hack, it's intended to help work around
 972    problems in GLD which doesn't understand that armv5t code is
 973    interworking clean.  */
 974 int arm_cpp_interwork = 0;
 975
 976 /* Nonzero if chip supports Thumb 1.  */
 977 int arm_arch_thumb1;
 978
 979 /* Nonzero if chip supports Thumb 2.  */
 980 int arm_arch_thumb2;
 981
 982 /* Nonzero if chip supports integer division instruction.  */
 983 int arm_arch_arm_hwdiv;
 984 int arm_arch_thumb_hwdiv;
 985
 986 /* Nonzero if chip disallows volatile memory access in IT block.  */
 987 int arm_arch_no_volatile_ce;
 988
 989 /* Nonzero if we shouldn't use literal pools.  */
 990 bool arm_disable_literal_pool = false;
 991
 992 /* The register number to be used for the PIC offset register.  */
 993 unsigned arm_pic_register = INVALID_REGNUM;
 994
 995 enum arm_pcs arm_pcs_default;
 996
 997 /* For an explanation of these variables, see final_prescan_insn below.  */
 998 int arm_ccfsm_state;
 999 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
1000 enum arm_cond_code arm_current_cc;
1001
1002 rtx arm_target_insn;
1003 int arm_target_label;
1004 /* The number of conditionally executed insns, including the current insn.  */
1005 int arm_condexec_count = 0;
1006 /* A bitmask specifying the patterns for the IT block.
1007    Zero means do not output an IT block before this insn. */
1008 int arm_condexec_mask = 0;
1009 /* The number of bits used in arm_condexec_mask.  */
1010 int arm_condexec_masklen = 0;
1011
1012 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
1013 int arm_arch_crc = 0;
1014
1015 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
1016 int arm_arch_dotprod = 0;
1017
1018 /* Nonzero if chip supports the ARMv8-M security extensions.  */
1019 int arm_arch_cmse = 0;
1020
1021 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
1022 int arm_m_profile_small_mul = 0;
1023
1024 /* Nonzero if chip supports the AdvSIMD I8MM instructions.  */
1025 int arm_arch_i8mm = 0;
1026
1027 /* Nonzero if chip supports the BFloat16 instructions.  */
1028 int arm_arch_bf16 = 0;
1029
1030 /* Nonzero if chip supports the Custom Datapath Extension.  */
1031 int arm_arch_cde = 0;
1032 int arm_arch_cde_coproc = 0;
1033 const int arm_arch_cde_coproc_bits[] = {
1034   0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1035 };
1036
1037 /* The condition codes of the ARM, and the inverse function.  */
1038 static const char * const arm_condition_codes[] =
1039 {
1040   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1041   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1042 };
1043
1044 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1045 int arm_regs_in_sequence[] =
1046 {
1047   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1048 };
1049
1050 #define DEF_FP_SYSREG(reg) #reg,
1051 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1052   FP_SYSREGS
1053 };
1054 #undef DEF_FP_SYSREG
1055
1056 #define ARM_LSL_NAME "lsl"
1057 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1058
1059 #define THUMB2_WORK_REGS                                        \
1060   (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM)              \
1061             | (1 << SP_REGNUM)                                  \
1062             | (1 << PC_REGNUM)                                  \
1063             | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM        \
1064                ? (1 << PIC_OFFSET_TABLE_REGNUM)                 \
1065                : 0)))
1066 \f
1067 /* Initialization code.  */
1068
1069 struct cpu_tune
1070 {
1071   enum processor_type scheduler;
1072   unsigned int tune_flags;
1073   const struct tune_params *tune;
1074 };
1075
1076 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1077 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1078   {                                                             \
1079     num_slots,                                                  \
1080     l1_size,                                                    \
1081     l1_line_size                                                \
1082   }
1083
1084 /* arm generic vectorizer costs.  */
1085 static const
1086 struct cpu_vec_costs arm_default_vec_cost = {
1087   1,                                    /* scalar_stmt_cost.  */
1088   1,                                    /* scalar load_cost.  */
1089   1,                                    /* scalar_store_cost.  */
1090   1,                                    /* vec_stmt_cost.  */
1091   1,                                    /* vec_to_scalar_cost.  */
1092   1,                                    /* scalar_to_vec_cost.  */
1093   1,                                    /* vec_align_load_cost.  */
1094   1,                                    /* vec_unalign_load_cost.  */
1095   1,                                    /* vec_unalign_store_cost.  */
1096   1,                                    /* vec_store_cost.  */
1097   3,                                    /* cond_taken_branch_cost.  */
1098   1,                                    /* cond_not_taken_branch_cost.  */
1099 };
1100
1101 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1102 #include "aarch-cost-tables.h"
1103
1104
1105
1106 const struct cpu_cost_table cortexa9_extra_costs =
1107 {
1108   /* ALU */
1109   {
1110     0,                  /* arith.  */
1111     0,                  /* logical.  */
1112     0,                  /* shift.  */
1113     COSTS_N_INSNS (1),  /* shift_reg.  */
1114     COSTS_N_INSNS (1),  /* arith_shift.  */
1115     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
1116     0,                  /* log_shift.  */
1117     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1118     COSTS_N_INSNS (1),  /* extend.  */
1119     COSTS_N_INSNS (2),  /* extend_arith.  */
1120     COSTS_N_INSNS (1),  /* bfi.  */
1121     COSTS_N_INSNS (1),  /* bfx.  */
1122     0,                  /* clz.  */
1123     0,                  /* rev.  */
1124     0,                  /* non_exec.  */
1125     true                /* non_exec_costs_exec.  */
1126   },
1127   {
1128     /* MULT SImode */
1129     {
1130       COSTS_N_INSNS (3),        /* simple.  */
1131       COSTS_N_INSNS (3),        /* flag_setting.  */
1132       COSTS_N_INSNS (2),        /* extend.  */
1133       COSTS_N_INSNS (3),        /* add.  */
1134       COSTS_N_INSNS (2),        /* extend_add.  */
1135       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
1136     },
1137     /* MULT DImode */
1138     {
1139       0,                        /* simple (N/A).  */
1140       0,                        /* flag_setting (N/A).  */
1141       COSTS_N_INSNS (4),        /* extend.  */
1142       0,                        /* add (N/A).  */
1143       COSTS_N_INSNS (4),        /* extend_add.  */
1144       0                         /* idiv (N/A).  */
1145     }
1146   },
1147   /* LD/ST */
1148   {
1149     COSTS_N_INSNS (2),  /* load.  */
1150     COSTS_N_INSNS (2),  /* load_sign_extend.  */
1151     COSTS_N_INSNS (2),  /* ldrd.  */
1152     COSTS_N_INSNS (2),  /* ldm_1st.  */
1153     1,                  /* ldm_regs_per_insn_1st.  */
1154     2,                  /* ldm_regs_per_insn_subsequent.  */
1155     COSTS_N_INSNS (5),  /* loadf.  */
1156     COSTS_N_INSNS (5),  /* loadd.  */
1157     COSTS_N_INSNS (1),  /* load_unaligned.  */
1158     COSTS_N_INSNS (2),  /* store.  */
1159     COSTS_N_INSNS (2),  /* strd.  */
1160     COSTS_N_INSNS (2),  /* stm_1st.  */
1161     1,                  /* stm_regs_per_insn_1st.  */
1162     2,                  /* stm_regs_per_insn_subsequent.  */
1163     COSTS_N_INSNS (1),  /* storef.  */
1164     COSTS_N_INSNS (1),  /* stored.  */
1165     COSTS_N_INSNS (1),  /* store_unaligned.  */
1166     COSTS_N_INSNS (1),  /* loadv.  */
1167     COSTS_N_INSNS (1)   /* storev.  */
1168   },
1169   {
1170     /* FP SFmode */
1171     {
1172       COSTS_N_INSNS (14),       /* div.  */
1173       COSTS_N_INSNS (4),        /* mult.  */
1174       COSTS_N_INSNS (7),        /* mult_addsub. */
1175       COSTS_N_INSNS (30),       /* fma.  */
1176       COSTS_N_INSNS (3),        /* addsub.  */
1177       COSTS_N_INSNS (1),        /* fpconst.  */
1178       COSTS_N_INSNS (1),        /* neg.  */
1179       COSTS_N_INSNS (3),        /* compare.  */
1180       COSTS_N_INSNS (3),        /* widen.  */
1181       COSTS_N_INSNS (3),        /* narrow.  */
1182       COSTS_N_INSNS (3),        /* toint.  */
1183       COSTS_N_INSNS (3),        /* fromint.  */
1184       COSTS_N_INSNS (3)         /* roundint.  */
1185     },
1186     /* FP DFmode */
1187     {
1188       COSTS_N_INSNS (24),       /* div.  */
1189       COSTS_N_INSNS (5),        /* mult.  */
1190       COSTS_N_INSNS (8),        /* mult_addsub.  */
1191       COSTS_N_INSNS (30),       /* fma.  */
1192       COSTS_N_INSNS (3),        /* addsub.  */
1193       COSTS_N_INSNS (1),        /* fpconst.  */
1194       COSTS_N_INSNS (1),        /* neg.  */
1195       COSTS_N_INSNS (3),        /* compare.  */
1196       COSTS_N_INSNS (3),        /* widen.  */
1197       COSTS_N_INSNS (3),        /* narrow.  */
1198       COSTS_N_INSNS (3),        /* toint.  */
1199       COSTS_N_INSNS (3),        /* fromint.  */
1200       COSTS_N_INSNS (3)         /* roundint.  */
1201     }
1202   },
1203   /* Vector */
1204   {
1205     COSTS_N_INSNS (1),  /* alu.  */
1206     COSTS_N_INSNS (4),  /* mult.  */
1207     COSTS_N_INSNS (1),  /* movi.  */
1208     COSTS_N_INSNS (2),  /* dup.  */
1209     COSTS_N_INSNS (2)   /* extract.  */
1210   }
1211 };
1212
1213 const struct cpu_cost_table cortexa8_extra_costs =
1214 {
1215   /* ALU */
1216   {
1217     0,                  /* arith.  */
1218     0,                  /* logical.  */
1219     COSTS_N_INSNS (1),  /* shift.  */
1220     0,                  /* shift_reg.  */
1221     COSTS_N_INSNS (1),  /* arith_shift.  */
1222     0,                  /* arith_shift_reg.  */
1223     COSTS_N_INSNS (1),  /* log_shift.  */
1224     0,                  /* log_shift_reg.  */
1225     0,                  /* extend.  */
1226     0,                  /* extend_arith.  */
1227     0,                  /* bfi.  */
1228     0,                  /* bfx.  */
1229     0,                  /* clz.  */
1230     0,                  /* rev.  */
1231     0,                  /* non_exec.  */
1232     true                /* non_exec_costs_exec.  */
1233   },
1234   {
1235     /* MULT SImode */
1236     {
1237       COSTS_N_INSNS (1),        /* simple.  */
1238       COSTS_N_INSNS (1),        /* flag_setting.  */
1239       COSTS_N_INSNS (1),        /* extend.  */
1240       COSTS_N_INSNS (1),        /* add.  */
1241       COSTS_N_INSNS (1),        /* extend_add.  */
1242       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1243     },
1244     /* MULT DImode */
1245     {
1246       0,                        /* simple (N/A).  */
1247       0,                        /* flag_setting (N/A).  */
1248       COSTS_N_INSNS (2),        /* extend.  */
1249       0,                        /* add (N/A).  */
1250       COSTS_N_INSNS (2),        /* extend_add.  */
1251       0                         /* idiv (N/A).  */
1252     }
1253   },
1254   /* LD/ST */
1255   {
1256     COSTS_N_INSNS (1),  /* load.  */
1257     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1258     COSTS_N_INSNS (1),  /* ldrd.  */
1259     COSTS_N_INSNS (1),  /* ldm_1st.  */
1260     1,                  /* ldm_regs_per_insn_1st.  */
1261     2,                  /* ldm_regs_per_insn_subsequent.  */
1262     COSTS_N_INSNS (1),  /* loadf.  */
1263     COSTS_N_INSNS (1),  /* loadd.  */
1264     COSTS_N_INSNS (1),  /* load_unaligned.  */
1265     COSTS_N_INSNS (1),  /* store.  */
1266     COSTS_N_INSNS (1),  /* strd.  */
1267     COSTS_N_INSNS (1),  /* stm_1st.  */
1268     1,                  /* stm_regs_per_insn_1st.  */
1269     2,                  /* stm_regs_per_insn_subsequent.  */
1270     COSTS_N_INSNS (1),  /* storef.  */
1271     COSTS_N_INSNS (1),  /* stored.  */
1272     COSTS_N_INSNS (1),  /* store_unaligned.  */
1273     COSTS_N_INSNS (1),  /* loadv.  */
1274     COSTS_N_INSNS (1)   /* storev.  */
1275   },
1276   {
1277     /* FP SFmode */
1278     {
1279       COSTS_N_INSNS (36),       /* div.  */
1280       COSTS_N_INSNS (11),       /* mult.  */
1281       COSTS_N_INSNS (20),       /* mult_addsub. */
1282       COSTS_N_INSNS (30),       /* fma.  */
1283       COSTS_N_INSNS (9),        /* addsub.  */
1284       COSTS_N_INSNS (3),        /* fpconst.  */
1285       COSTS_N_INSNS (3),        /* neg.  */
1286       COSTS_N_INSNS (6),        /* compare.  */
1287       COSTS_N_INSNS (4),        /* widen.  */
1288       COSTS_N_INSNS (4),        /* narrow.  */
1289       COSTS_N_INSNS (8),        /* toint.  */
1290       COSTS_N_INSNS (8),        /* fromint.  */
1291       COSTS_N_INSNS (8)         /* roundint.  */
1292     },
1293     /* FP DFmode */
1294     {
1295       COSTS_N_INSNS (64),       /* div.  */
1296       COSTS_N_INSNS (16),       /* mult.  */
1297       COSTS_N_INSNS (25),       /* mult_addsub.  */
1298       COSTS_N_INSNS (30),       /* fma.  */
1299       COSTS_N_INSNS (9),        /* addsub.  */
1300       COSTS_N_INSNS (3),        /* fpconst.  */
1301       COSTS_N_INSNS (3),        /* neg.  */
1302       COSTS_N_INSNS (6),        /* compare.  */
1303       COSTS_N_INSNS (6),        /* widen.  */
1304       COSTS_N_INSNS (6),        /* narrow.  */
1305       COSTS_N_INSNS (8),        /* toint.  */
1306       COSTS_N_INSNS (8),        /* fromint.  */
1307       COSTS_N_INSNS (8)         /* roundint.  */
1308     }
1309   },
1310   /* Vector */
1311   {
1312     COSTS_N_INSNS (1),  /* alu.  */
1313     COSTS_N_INSNS (4),  /* mult.  */
1314     COSTS_N_INSNS (1),  /* movi.  */
1315     COSTS_N_INSNS (2),  /* dup.  */
1316     COSTS_N_INSNS (2)   /* extract.  */
1317   }
1318 };
1319
1320 const struct cpu_cost_table cortexa5_extra_costs =
1321 {
1322   /* ALU */
1323   {
1324     0,                  /* arith.  */
1325     0,                  /* logical.  */
1326     COSTS_N_INSNS (1),  /* shift.  */
1327     COSTS_N_INSNS (1),  /* shift_reg.  */
1328     COSTS_N_INSNS (1),  /* arith_shift.  */
1329     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1330     COSTS_N_INSNS (1),  /* log_shift.  */
1331     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1332     COSTS_N_INSNS (1),  /* extend.  */
1333     COSTS_N_INSNS (1),  /* extend_arith.  */
1334     COSTS_N_INSNS (1),  /* bfi.  */
1335     COSTS_N_INSNS (1),  /* bfx.  */
1336     COSTS_N_INSNS (1),  /* clz.  */
1337     COSTS_N_INSNS (1),  /* rev.  */
1338     0,                  /* non_exec.  */
1339     true                /* non_exec_costs_exec.  */
1340   },
1341
1342   {
1343     /* MULT SImode */
1344     {
1345       0,                        /* simple.  */
1346       COSTS_N_INSNS (1),        /* flag_setting.  */
1347       COSTS_N_INSNS (1),        /* extend.  */
1348       COSTS_N_INSNS (1),        /* add.  */
1349       COSTS_N_INSNS (1),        /* extend_add.  */
1350       COSTS_N_INSNS (7)         /* idiv.  */
1351     },
1352     /* MULT DImode */
1353     {
1354       0,                        /* simple (N/A).  */
1355       0,                        /* flag_setting (N/A).  */
1356       COSTS_N_INSNS (1),        /* extend.  */
1357       0,                        /* add.  */
1358       COSTS_N_INSNS (2),        /* extend_add.  */
1359       0                         /* idiv (N/A).  */
1360     }
1361   },
1362   /* LD/ST */
1363   {
1364     COSTS_N_INSNS (1),  /* load.  */
1365     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1366     COSTS_N_INSNS (6),  /* ldrd.  */
1367     COSTS_N_INSNS (1),  /* ldm_1st.  */
1368     1,                  /* ldm_regs_per_insn_1st.  */
1369     2,                  /* ldm_regs_per_insn_subsequent.  */
1370     COSTS_N_INSNS (2),  /* loadf.  */
1371     COSTS_N_INSNS (4),  /* loadd.  */
1372     COSTS_N_INSNS (1),  /* load_unaligned.  */
1373     COSTS_N_INSNS (1),  /* store.  */
1374     COSTS_N_INSNS (3),  /* strd.  */
1375     COSTS_N_INSNS (1),  /* stm_1st.  */
1376     1,                  /* stm_regs_per_insn_1st.  */
1377     2,                  /* stm_regs_per_insn_subsequent.  */
1378     COSTS_N_INSNS (2),  /* storef.  */
1379     COSTS_N_INSNS (2),  /* stored.  */
1380     COSTS_N_INSNS (1),  /* store_unaligned.  */
1381     COSTS_N_INSNS (1),  /* loadv.  */
1382     COSTS_N_INSNS (1)   /* storev.  */
1383   },
1384   {
1385     /* FP SFmode */
1386     {
1387       COSTS_N_INSNS (15),       /* div.  */
1388       COSTS_N_INSNS (3),        /* mult.  */
1389       COSTS_N_INSNS (7),        /* mult_addsub. */
1390       COSTS_N_INSNS (7),        /* fma.  */
1391       COSTS_N_INSNS (3),        /* addsub.  */
1392       COSTS_N_INSNS (3),        /* fpconst.  */
1393       COSTS_N_INSNS (3),        /* neg.  */
1394       COSTS_N_INSNS (3),        /* compare.  */
1395       COSTS_N_INSNS (3),        /* widen.  */
1396       COSTS_N_INSNS (3),        /* narrow.  */
1397       COSTS_N_INSNS (3),        /* toint.  */
1398       COSTS_N_INSNS (3),        /* fromint.  */
1399       COSTS_N_INSNS (3)         /* roundint.  */
1400     },
1401     /* FP DFmode */
1402     {
1403       COSTS_N_INSNS (30),       /* div.  */
1404       COSTS_N_INSNS (6),        /* mult.  */
1405       COSTS_N_INSNS (10),       /* mult_addsub.  */
1406       COSTS_N_INSNS (7),        /* fma.  */
1407       COSTS_N_INSNS (3),        /* addsub.  */
1408       COSTS_N_INSNS (3),        /* fpconst.  */
1409       COSTS_N_INSNS (3),        /* neg.  */
1410       COSTS_N_INSNS (3),        /* compare.  */
1411       COSTS_N_INSNS (3),        /* widen.  */
1412       COSTS_N_INSNS (3),        /* narrow.  */
1413       COSTS_N_INSNS (3),        /* toint.  */
1414       COSTS_N_INSNS (3),        /* fromint.  */
1415       COSTS_N_INSNS (3)         /* roundint.  */
1416     }
1417   },
1418   /* Vector */
1419   {
1420     COSTS_N_INSNS (1),  /* alu.  */
1421     COSTS_N_INSNS (4),  /* mult.  */
1422     COSTS_N_INSNS (1),  /* movi.  */
1423     COSTS_N_INSNS (2),  /* dup.  */
1424     COSTS_N_INSNS (2)   /* extract.  */
1425   }
1426 };
1427
1428
1429 const struct cpu_cost_table cortexa7_extra_costs =
1430 {
1431   /* ALU */
1432   {
1433     0,                  /* arith.  */
1434     0,                  /* logical.  */
1435     COSTS_N_INSNS (1),  /* shift.  */
1436     COSTS_N_INSNS (1),  /* shift_reg.  */
1437     COSTS_N_INSNS (1),  /* arith_shift.  */
1438     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1439     COSTS_N_INSNS (1),  /* log_shift.  */
1440     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1441     COSTS_N_INSNS (1),  /* extend.  */
1442     COSTS_N_INSNS (1),  /* extend_arith.  */
1443     COSTS_N_INSNS (1),  /* bfi.  */
1444     COSTS_N_INSNS (1),  /* bfx.  */
1445     COSTS_N_INSNS (1),  /* clz.  */
1446     COSTS_N_INSNS (1),  /* rev.  */
1447     0,                  /* non_exec.  */
1448     true                /* non_exec_costs_exec.  */
1449   },
1450
1451   {
1452     /* MULT SImode */
1453     {
1454       0,                        /* simple.  */
1455       COSTS_N_INSNS (1),        /* flag_setting.  */
1456       COSTS_N_INSNS (1),        /* extend.  */
1457       COSTS_N_INSNS (1),        /* add.  */
1458       COSTS_N_INSNS (1),        /* extend_add.  */
1459       COSTS_N_INSNS (7)         /* idiv.  */
1460     },
1461     /* MULT DImode */
1462     {
1463       0,                        /* simple (N/A).  */
1464       0,                        /* flag_setting (N/A).  */
1465       COSTS_N_INSNS (1),        /* extend.  */
1466       0,                        /* add.  */
1467       COSTS_N_INSNS (2),        /* extend_add.  */
1468       0                         /* idiv (N/A).  */
1469     }
1470   },
1471   /* LD/ST */
1472   {
1473     COSTS_N_INSNS (1),  /* load.  */
1474     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1475     COSTS_N_INSNS (3),  /* ldrd.  */
1476     COSTS_N_INSNS (1),  /* ldm_1st.  */
1477     1,                  /* ldm_regs_per_insn_1st.  */
1478     2,                  /* ldm_regs_per_insn_subsequent.  */
1479     COSTS_N_INSNS (2),  /* loadf.  */
1480     COSTS_N_INSNS (2),  /* loadd.  */
1481     COSTS_N_INSNS (1),  /* load_unaligned.  */
1482     COSTS_N_INSNS (1),  /* store.  */
1483     COSTS_N_INSNS (3),  /* strd.  */
1484     COSTS_N_INSNS (1),  /* stm_1st.  */
1485     1,                  /* stm_regs_per_insn_1st.  */
1486     2,                  /* stm_regs_per_insn_subsequent.  */
1487     COSTS_N_INSNS (2),  /* storef.  */
1488     COSTS_N_INSNS (2),  /* stored.  */
1489     COSTS_N_INSNS (1),  /* store_unaligned.  */
1490     COSTS_N_INSNS (1),  /* loadv.  */
1491     COSTS_N_INSNS (1)   /* storev.  */
1492   },
1493   {
1494     /* FP SFmode */
1495     {
1496       COSTS_N_INSNS (15),       /* div.  */
1497       COSTS_N_INSNS (3),        /* mult.  */
1498       COSTS_N_INSNS (7),        /* mult_addsub. */
1499       COSTS_N_INSNS (7),        /* fma.  */
1500       COSTS_N_INSNS (3),        /* addsub.  */
1501       COSTS_N_INSNS (3),        /* fpconst.  */
1502       COSTS_N_INSNS (3),        /* neg.  */
1503       COSTS_N_INSNS (3),        /* compare.  */
1504       COSTS_N_INSNS (3),        /* widen.  */
1505       COSTS_N_INSNS (3),        /* narrow.  */
1506       COSTS_N_INSNS (3),        /* toint.  */
1507       COSTS_N_INSNS (3),        /* fromint.  */
1508       COSTS_N_INSNS (3)         /* roundint.  */
1509     },
1510     /* FP DFmode */
1511     {
1512       COSTS_N_INSNS (30),       /* div.  */
1513       COSTS_N_INSNS (6),        /* mult.  */
1514       COSTS_N_INSNS (10),       /* mult_addsub.  */
1515       COSTS_N_INSNS (7),        /* fma.  */
1516       COSTS_N_INSNS (3),        /* addsub.  */
1517       COSTS_N_INSNS (3),        /* fpconst.  */
1518       COSTS_N_INSNS (3),        /* neg.  */
1519       COSTS_N_INSNS (3),        /* compare.  */
1520       COSTS_N_INSNS (3),        /* widen.  */
1521       COSTS_N_INSNS (3),        /* narrow.  */
1522       COSTS_N_INSNS (3),        /* toint.  */
1523       COSTS_N_INSNS (3),        /* fromint.  */
1524       COSTS_N_INSNS (3)         /* roundint.  */
1525     }
1526   },
1527   /* Vector */
1528   {
1529     COSTS_N_INSNS (1),  /* alu.  */
1530     COSTS_N_INSNS (4),  /* mult.  */
1531     COSTS_N_INSNS (1),  /* movi.  */
1532     COSTS_N_INSNS (2),  /* dup.  */
1533     COSTS_N_INSNS (2)   /* extract.  */
1534   }
1535 };
1536
1537 const struct cpu_cost_table cortexa12_extra_costs =
1538 {
1539   /* ALU */
1540   {
1541     0,                  /* arith.  */
1542     0,                  /* logical.  */
1543     0,                  /* shift.  */
1544     COSTS_N_INSNS (1),  /* shift_reg.  */
1545     COSTS_N_INSNS (1),  /* arith_shift.  */
1546     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1547     COSTS_N_INSNS (1),  /* log_shift.  */
1548     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1549     0,                  /* extend.  */
1550     COSTS_N_INSNS (1),  /* extend_arith.  */
1551     0,                  /* bfi.  */
1552     COSTS_N_INSNS (1),  /* bfx.  */
1553     COSTS_N_INSNS (1),  /* clz.  */
1554     COSTS_N_INSNS (1),  /* rev.  */
1555     0,                  /* non_exec.  */
1556     true                /* non_exec_costs_exec.  */
1557   },
1558   /* MULT SImode */
1559   {
1560     {
1561       COSTS_N_INSNS (2),        /* simple.  */
1562       COSTS_N_INSNS (3),        /* flag_setting.  */
1563       COSTS_N_INSNS (2),        /* extend.  */
1564       COSTS_N_INSNS (3),        /* add.  */
1565       COSTS_N_INSNS (2),        /* extend_add.  */
1566       COSTS_N_INSNS (18)        /* idiv.  */
1567     },
1568     /* MULT DImode */
1569     {
1570       0,                        /* simple (N/A).  */
1571       0,                        /* flag_setting (N/A).  */
1572       COSTS_N_INSNS (3),        /* extend.  */
1573       0,                        /* add (N/A).  */
1574       COSTS_N_INSNS (3),        /* extend_add.  */
1575       0                         /* idiv (N/A).  */
1576     }
1577   },
1578   /* LD/ST */
1579   {
1580     COSTS_N_INSNS (3),  /* load.  */
1581     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1582     COSTS_N_INSNS (3),  /* ldrd.  */
1583     COSTS_N_INSNS (3),  /* ldm_1st.  */
1584     1,                  /* ldm_regs_per_insn_1st.  */
1585     2,                  /* ldm_regs_per_insn_subsequent.  */
1586     COSTS_N_INSNS (3),  /* loadf.  */
1587     COSTS_N_INSNS (3),  /* loadd.  */
1588     0,                  /* load_unaligned.  */
1589     0,                  /* store.  */
1590     0,                  /* strd.  */
1591     0,                  /* stm_1st.  */
1592     1,                  /* stm_regs_per_insn_1st.  */
1593     2,                  /* stm_regs_per_insn_subsequent.  */
1594     COSTS_N_INSNS (2),  /* storef.  */
1595     COSTS_N_INSNS (2),  /* stored.  */
1596     0,                  /* store_unaligned.  */
1597     COSTS_N_INSNS (1),  /* loadv.  */
1598     COSTS_N_INSNS (1)   /* storev.  */
1599   },
1600   {
1601     /* FP SFmode */
1602     {
1603       COSTS_N_INSNS (17),       /* div.  */
1604       COSTS_N_INSNS (4),        /* mult.  */
1605       COSTS_N_INSNS (8),        /* mult_addsub. */
1606       COSTS_N_INSNS (8),        /* fma.  */
1607       COSTS_N_INSNS (4),        /* addsub.  */
1608       COSTS_N_INSNS (2),        /* fpconst. */
1609       COSTS_N_INSNS (2),        /* neg.  */
1610       COSTS_N_INSNS (2),        /* compare.  */
1611       COSTS_N_INSNS (4),        /* widen.  */
1612       COSTS_N_INSNS (4),        /* narrow.  */
1613       COSTS_N_INSNS (4),        /* toint.  */
1614       COSTS_N_INSNS (4),        /* fromint.  */
1615       COSTS_N_INSNS (4)         /* roundint.  */
1616     },
1617     /* FP DFmode */
1618     {
1619       COSTS_N_INSNS (31),       /* div.  */
1620       COSTS_N_INSNS (4),        /* mult.  */
1621       COSTS_N_INSNS (8),        /* mult_addsub.  */
1622       COSTS_N_INSNS (8),        /* fma.  */
1623       COSTS_N_INSNS (4),        /* addsub.  */
1624       COSTS_N_INSNS (2),        /* fpconst.  */
1625       COSTS_N_INSNS (2),        /* neg.  */
1626       COSTS_N_INSNS (2),        /* compare.  */
1627       COSTS_N_INSNS (4),        /* widen.  */
1628       COSTS_N_INSNS (4),        /* narrow.  */
1629       COSTS_N_INSNS (4),        /* toint.  */
1630       COSTS_N_INSNS (4),        /* fromint.  */
1631       COSTS_N_INSNS (4)         /* roundint.  */
1632     }
1633   },
1634   /* Vector */
1635   {
1636     COSTS_N_INSNS (1),  /* alu.  */
1637     COSTS_N_INSNS (4),  /* mult.  */
1638     COSTS_N_INSNS (1),  /* movi.  */
1639     COSTS_N_INSNS (2),  /* dup.  */
1640     COSTS_N_INSNS (2)   /* extract.  */
1641   }
1642 };
1643
1644 const struct cpu_cost_table cortexa15_extra_costs =
1645 {
1646   /* ALU */
1647   {
1648     0,                  /* arith.  */
1649     0,                  /* logical.  */
1650     0,                  /* shift.  */
1651     0,                  /* shift_reg.  */
1652     COSTS_N_INSNS (1),  /* arith_shift.  */
1653     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1654     COSTS_N_INSNS (1),  /* log_shift.  */
1655     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1656     0,                  /* extend.  */
1657     COSTS_N_INSNS (1),  /* extend_arith.  */
1658     COSTS_N_INSNS (1),  /* bfi.  */
1659     0,                  /* bfx.  */
1660     0,                  /* clz.  */
1661     0,                  /* rev.  */
1662     0,                  /* non_exec.  */
1663     true                /* non_exec_costs_exec.  */
1664   },
1665   /* MULT SImode */
1666   {
1667     {
1668       COSTS_N_INSNS (2),        /* simple.  */
1669       COSTS_N_INSNS (3),        /* flag_setting.  */
1670       COSTS_N_INSNS (2),        /* extend.  */
1671       COSTS_N_INSNS (2),        /* add.  */
1672       COSTS_N_INSNS (2),        /* extend_add.  */
1673       COSTS_N_INSNS (18)        /* idiv.  */
1674     },
1675     /* MULT DImode */
1676     {
1677       0,                        /* simple (N/A).  */
1678       0,                        /* flag_setting (N/A).  */
1679       COSTS_N_INSNS (3),        /* extend.  */
1680       0,                        /* add (N/A).  */
1681       COSTS_N_INSNS (3),        /* extend_add.  */
1682       0                         /* idiv (N/A).  */
1683     }
1684   },
1685   /* LD/ST */
1686   {
1687     COSTS_N_INSNS (3),  /* load.  */
1688     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1689     COSTS_N_INSNS (3),  /* ldrd.  */
1690     COSTS_N_INSNS (4),  /* ldm_1st.  */
1691     1,                  /* ldm_regs_per_insn_1st.  */
1692     2,                  /* ldm_regs_per_insn_subsequent.  */
1693     COSTS_N_INSNS (4),  /* loadf.  */
1694     COSTS_N_INSNS (4),  /* loadd.  */
1695     0,                  /* load_unaligned.  */
1696     0,                  /* store.  */
1697     0,                  /* strd.  */
1698     COSTS_N_INSNS (1),  /* stm_1st.  */
1699     1,                  /* stm_regs_per_insn_1st.  */
1700     2,                  /* stm_regs_per_insn_subsequent.  */
1701     0,                  /* storef.  */
1702     0,                  /* stored.  */
1703     0,                  /* store_unaligned.  */
1704     COSTS_N_INSNS (1),  /* loadv.  */
1705     COSTS_N_INSNS (1)   /* storev.  */
1706   },
1707   {
1708     /* FP SFmode */
1709     {
1710       COSTS_N_INSNS (17),       /* div.  */
1711       COSTS_N_INSNS (4),        /* mult.  */
1712       COSTS_N_INSNS (8),        /* mult_addsub. */
1713       COSTS_N_INSNS (8),        /* fma.  */
1714       COSTS_N_INSNS (4),        /* addsub.  */
1715       COSTS_N_INSNS (2),        /* fpconst. */
1716       COSTS_N_INSNS (2),        /* neg.  */
1717       COSTS_N_INSNS (5),        /* compare.  */
1718       COSTS_N_INSNS (4),        /* widen.  */
1719       COSTS_N_INSNS (4),        /* narrow.  */
1720       COSTS_N_INSNS (4),        /* toint.  */
1721       COSTS_N_INSNS (4),        /* fromint.  */
1722       COSTS_N_INSNS (4)         /* roundint.  */
1723     },
1724     /* FP DFmode */
1725     {
1726       COSTS_N_INSNS (31),       /* div.  */
1727       COSTS_N_INSNS (4),        /* mult.  */
1728       COSTS_N_INSNS (8),        /* mult_addsub.  */
1729       COSTS_N_INSNS (8),        /* fma.  */
1730       COSTS_N_INSNS (4),        /* addsub.  */
1731       COSTS_N_INSNS (2),        /* fpconst.  */
1732       COSTS_N_INSNS (2),        /* neg.  */
1733       COSTS_N_INSNS (2),        /* compare.  */
1734       COSTS_N_INSNS (4),        /* widen.  */
1735       COSTS_N_INSNS (4),        /* narrow.  */
1736       COSTS_N_INSNS (4),        /* toint.  */
1737       COSTS_N_INSNS (4),        /* fromint.  */
1738       COSTS_N_INSNS (4)         /* roundint.  */
1739     }
1740   },
1741   /* Vector */
1742   {
1743     COSTS_N_INSNS (1),  /* alu.  */
1744     COSTS_N_INSNS (4),  /* mult.  */
1745     COSTS_N_INSNS (1),  /* movi.  */
1746     COSTS_N_INSNS (2),  /* dup.  */
1747     COSTS_N_INSNS (2)   /* extract.  */
1748   }
1749 };
1750
1751 const struct cpu_cost_table v7m_extra_costs =
1752 {
1753   /* ALU */
1754   {
1755     0,                  /* arith.  */
1756     0,                  /* logical.  */
1757     0,                  /* shift.  */
1758     0,                  /* shift_reg.  */
1759     0,                  /* arith_shift.  */
1760     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1761     0,                  /* log_shift.  */
1762     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1763     0,                  /* extend.  */
1764     COSTS_N_INSNS (1),  /* extend_arith.  */
1765     0,                  /* bfi.  */
1766     0,                  /* bfx.  */
1767     0,                  /* clz.  */
1768     0,                  /* rev.  */
1769     COSTS_N_INSNS (1),  /* non_exec.  */
1770     false               /* non_exec_costs_exec.  */
1771   },
1772   {
1773     /* MULT SImode */
1774     {
1775       COSTS_N_INSNS (1),        /* simple.  */
1776       COSTS_N_INSNS (1),        /* flag_setting.  */
1777       COSTS_N_INSNS (2),        /* extend.  */
1778       COSTS_N_INSNS (1),        /* add.  */
1779       COSTS_N_INSNS (3),        /* extend_add.  */
1780       COSTS_N_INSNS (8)         /* idiv.  */
1781     },
1782     /* MULT DImode */
1783     {
1784       0,                        /* simple (N/A).  */
1785       0,                        /* flag_setting (N/A).  */
1786       COSTS_N_INSNS (2),        /* extend.  */
1787       0,                        /* add (N/A).  */
1788       COSTS_N_INSNS (3),        /* extend_add.  */
1789       0                         /* idiv (N/A).  */
1790     }
1791   },
1792   /* LD/ST */
1793   {
1794     COSTS_N_INSNS (2),  /* load.  */
1795     0,                  /* load_sign_extend.  */
1796     COSTS_N_INSNS (3),  /* ldrd.  */
1797     COSTS_N_INSNS (2),  /* ldm_1st.  */
1798     1,                  /* ldm_regs_per_insn_1st.  */
1799     1,                  /* ldm_regs_per_insn_subsequent.  */
1800     COSTS_N_INSNS (2),  /* loadf.  */
1801     COSTS_N_INSNS (3),  /* loadd.  */
1802     COSTS_N_INSNS (1),  /* load_unaligned.  */
1803     COSTS_N_INSNS (2),  /* store.  */
1804     COSTS_N_INSNS (3),  /* strd.  */
1805     COSTS_N_INSNS (2),  /* stm_1st.  */
1806     1,                  /* stm_regs_per_insn_1st.  */
1807     1,                  /* stm_regs_per_insn_subsequent.  */
1808     COSTS_N_INSNS (2),  /* storef.  */
1809     COSTS_N_INSNS (3),  /* stored.  */
1810     COSTS_N_INSNS (1),  /* store_unaligned.  */
1811     COSTS_N_INSNS (1),  /* loadv.  */
1812     COSTS_N_INSNS (1)   /* storev.  */
1813   },
1814   {
1815     /* FP SFmode */
1816     {
1817       COSTS_N_INSNS (7),        /* div.  */
1818       COSTS_N_INSNS (2),        /* mult.  */
1819       COSTS_N_INSNS (5),        /* mult_addsub.  */
1820       COSTS_N_INSNS (3),        /* fma.  */
1821       COSTS_N_INSNS (1),        /* addsub.  */
1822       0,                        /* fpconst.  */
1823       0,                        /* neg.  */
1824       0,                        /* compare.  */
1825       0,                        /* widen.  */
1826       0,                        /* narrow.  */
1827       0,                        /* toint.  */
1828       0,                        /* fromint.  */
1829       0                         /* roundint.  */
1830     },
1831     /* FP DFmode */
1832     {
1833       COSTS_N_INSNS (15),       /* div.  */
1834       COSTS_N_INSNS (5),        /* mult.  */
1835       COSTS_N_INSNS (7),        /* mult_addsub.  */
1836       COSTS_N_INSNS (7),        /* fma.  */
1837       COSTS_N_INSNS (3),        /* addsub.  */
1838       0,                        /* fpconst.  */
1839       0,                        /* neg.  */
1840       0,                        /* compare.  */
1841       0,                        /* widen.  */
1842       0,                        /* narrow.  */
1843       0,                        /* toint.  */
1844       0,                        /* fromint.  */
1845       0                         /* roundint.  */
1846     }
1847   },
1848   /* Vector */
1849   {
1850     COSTS_N_INSNS (1),  /* alu.  */
1851     COSTS_N_INSNS (4),  /* mult.  */
1852     COSTS_N_INSNS (1),  /* movi.  */
1853     COSTS_N_INSNS (2),  /* dup.  */
1854     COSTS_N_INSNS (2)   /* extract.  */
1855   }
1856 };
1857
1858 const struct addr_mode_cost_table generic_addr_mode_costs =
1859 {
1860   /* int.  */
1861   {
1862     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1863     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1864     COSTS_N_INSNS (0)   /* AMO_WB.  */
1865   },
1866   /* float.  */
1867   {
1868     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1869     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1870     COSTS_N_INSNS (0)   /* AMO_WB.  */
1871   },
1872   /* vector.  */
1873   {
1874     COSTS_N_INSNS (0),  /* AMO_DEFAULT.  */
1875     COSTS_N_INSNS (0),  /* AMO_NO_WB.  */
1876     COSTS_N_INSNS (0)   /* AMO_WB.  */
1877   }
1878 };
1879
1880 const struct tune_params arm_slowmul_tune =
1881 {
1882   &generic_extra_costs,                 /* Insn extra costs.  */
1883   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1884   NULL,                                 /* Sched adj cost.  */
1885   arm_default_branch_cost,
1886   &arm_default_vec_cost,
1887   3,                                            /* Constant limit.  */
1888   5,                                            /* Max cond insns.  */
1889   8,                                            /* Memset max inline.  */
1890   1,                                            /* Issue rate.  */
1891   ARM_PREFETCH_NOT_BENEFICIAL,
1892   tune_params::PREF_CONST_POOL_TRUE,
1893   tune_params::PREF_LDRD_FALSE,
1894   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1895   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1896   tune_params::DISPARAGE_FLAGS_NEITHER,
1897   tune_params::PREF_NEON_STRINGOPS_FALSE,
1898   tune_params::FUSE_NOTHING,
1899   tune_params::SCHED_AUTOPREF_OFF
1900 };
1901
1902 const struct tune_params arm_fastmul_tune =
1903 {
1904   &generic_extra_costs,                 /* Insn extra costs.  */
1905   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1906   NULL,                                 /* Sched adj cost.  */
1907   arm_default_branch_cost,
1908   &arm_default_vec_cost,
1909   1,                                            /* Constant limit.  */
1910   5,                                            /* Max cond insns.  */
1911   8,                                            /* Memset max inline.  */
1912   1,                                            /* Issue rate.  */
1913   ARM_PREFETCH_NOT_BENEFICIAL,
1914   tune_params::PREF_CONST_POOL_TRUE,
1915   tune_params::PREF_LDRD_FALSE,
1916   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1917   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1918   tune_params::DISPARAGE_FLAGS_NEITHER,
1919   tune_params::PREF_NEON_STRINGOPS_FALSE,
1920   tune_params::FUSE_NOTHING,
1921   tune_params::SCHED_AUTOPREF_OFF
1922 };
1923
1924 /* StrongARM has early execution of branches, so a sequence that is worth
1925    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1926
1927 const struct tune_params arm_strongarm_tune =
1928 {
1929   &generic_extra_costs,                 /* Insn extra costs.  */
1930   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1931   NULL,                                 /* Sched adj cost.  */
1932   arm_default_branch_cost,
1933   &arm_default_vec_cost,
1934   1,                                            /* Constant limit.  */
1935   3,                                            /* Max cond insns.  */
1936   8,                                            /* Memset max inline.  */
1937   1,                                            /* Issue rate.  */
1938   ARM_PREFETCH_NOT_BENEFICIAL,
1939   tune_params::PREF_CONST_POOL_TRUE,
1940   tune_params::PREF_LDRD_FALSE,
1941   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1942   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1943   tune_params::DISPARAGE_FLAGS_NEITHER,
1944   tune_params::PREF_NEON_STRINGOPS_FALSE,
1945   tune_params::FUSE_NOTHING,
1946   tune_params::SCHED_AUTOPREF_OFF
1947 };
1948
1949 const struct tune_params arm_xscale_tune =
1950 {
1951   &generic_extra_costs,                 /* Insn extra costs.  */
1952   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1953   xscale_sched_adjust_cost,
1954   arm_default_branch_cost,
1955   &arm_default_vec_cost,
1956   2,                                            /* Constant limit.  */
1957   3,                                            /* Max cond insns.  */
1958   8,                                            /* Memset max inline.  */
1959   1,                                            /* Issue rate.  */
1960   ARM_PREFETCH_NOT_BENEFICIAL,
1961   tune_params::PREF_CONST_POOL_TRUE,
1962   tune_params::PREF_LDRD_FALSE,
1963   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1964   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1965   tune_params::DISPARAGE_FLAGS_NEITHER,
1966   tune_params::PREF_NEON_STRINGOPS_FALSE,
1967   tune_params::FUSE_NOTHING,
1968   tune_params::SCHED_AUTOPREF_OFF
1969 };
1970
1971 const struct tune_params arm_9e_tune =
1972 {
1973   &generic_extra_costs,                 /* Insn extra costs.  */
1974   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1975   NULL,                                 /* Sched adj cost.  */
1976   arm_default_branch_cost,
1977   &arm_default_vec_cost,
1978   1,                                            /* Constant limit.  */
1979   5,                                            /* Max cond insns.  */
1980   8,                                            /* Memset max inline.  */
1981   1,                                            /* Issue rate.  */
1982   ARM_PREFETCH_NOT_BENEFICIAL,
1983   tune_params::PREF_CONST_POOL_TRUE,
1984   tune_params::PREF_LDRD_FALSE,
1985   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
1986   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
1987   tune_params::DISPARAGE_FLAGS_NEITHER,
1988   tune_params::PREF_NEON_STRINGOPS_FALSE,
1989   tune_params::FUSE_NOTHING,
1990   tune_params::SCHED_AUTOPREF_OFF
1991 };
1992
1993 const struct tune_params arm_marvell_pj4_tune =
1994 {
1995   &generic_extra_costs,                 /* Insn extra costs.  */
1996   &generic_addr_mode_costs,             /* Addressing mode costs.  */
1997   NULL,                                 /* Sched adj cost.  */
1998   arm_default_branch_cost,
1999   &arm_default_vec_cost,
2000   1,                                            /* Constant limit.  */
2001   5,                                            /* Max cond insns.  */
2002   8,                                            /* Memset max inline.  */
2003   2,                                            /* Issue rate.  */
2004   ARM_PREFETCH_NOT_BENEFICIAL,
2005   tune_params::PREF_CONST_POOL_TRUE,
2006   tune_params::PREF_LDRD_FALSE,
2007   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2008   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2009   tune_params::DISPARAGE_FLAGS_NEITHER,
2010   tune_params::PREF_NEON_STRINGOPS_FALSE,
2011   tune_params::FUSE_NOTHING,
2012   tune_params::SCHED_AUTOPREF_OFF
2013 };
2014
2015 const struct tune_params arm_v6t2_tune =
2016 {
2017   &generic_extra_costs,                 /* Insn extra costs.  */
2018   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2019   NULL,                                 /* Sched adj cost.  */
2020   arm_default_branch_cost,
2021   &arm_default_vec_cost,
2022   1,                                            /* Constant limit.  */
2023   5,                                            /* Max cond insns.  */
2024   8,                                            /* Memset max inline.  */
2025   1,                                            /* Issue rate.  */
2026   ARM_PREFETCH_NOT_BENEFICIAL,
2027   tune_params::PREF_CONST_POOL_FALSE,
2028   tune_params::PREF_LDRD_FALSE,
2029   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2030   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2031   tune_params::DISPARAGE_FLAGS_NEITHER,
2032   tune_params::PREF_NEON_STRINGOPS_FALSE,
2033   tune_params::FUSE_NOTHING,
2034   tune_params::SCHED_AUTOPREF_OFF
2035 };
2036
2037
2038 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
2039 const struct tune_params arm_cortex_tune =
2040 {
2041   &generic_extra_costs,
2042   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2043   NULL,                                 /* Sched adj cost.  */
2044   arm_default_branch_cost,
2045   &arm_default_vec_cost,
2046   1,                                            /* Constant limit.  */
2047   5,                                            /* Max cond insns.  */
2048   8,                                            /* Memset max inline.  */
2049   2,                                            /* Issue rate.  */
2050   ARM_PREFETCH_NOT_BENEFICIAL,
2051   tune_params::PREF_CONST_POOL_FALSE,
2052   tune_params::PREF_LDRD_FALSE,
2053   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2054   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2055   tune_params::DISPARAGE_FLAGS_NEITHER,
2056   tune_params::PREF_NEON_STRINGOPS_FALSE,
2057   tune_params::FUSE_NOTHING,
2058   tune_params::SCHED_AUTOPREF_OFF
2059 };
2060
2061 const struct tune_params arm_cortex_a8_tune =
2062 {
2063   &cortexa8_extra_costs,
2064   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2065   NULL,                                 /* Sched adj cost.  */
2066   arm_default_branch_cost,
2067   &arm_default_vec_cost,
2068   1,                                            /* Constant limit.  */
2069   5,                                            /* Max cond insns.  */
2070   8,                                            /* Memset max inline.  */
2071   2,                                            /* Issue rate.  */
2072   ARM_PREFETCH_NOT_BENEFICIAL,
2073   tune_params::PREF_CONST_POOL_FALSE,
2074   tune_params::PREF_LDRD_FALSE,
2075   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2076   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2077   tune_params::DISPARAGE_FLAGS_NEITHER,
2078   tune_params::PREF_NEON_STRINGOPS_TRUE,
2079   tune_params::FUSE_NOTHING,
2080   tune_params::SCHED_AUTOPREF_OFF
2081 };
2082
2083 const struct tune_params arm_cortex_a7_tune =
2084 {
2085   &cortexa7_extra_costs,
2086   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2087   NULL,                                 /* Sched adj cost.  */
2088   arm_default_branch_cost,
2089   &arm_default_vec_cost,
2090   1,                                            /* Constant limit.  */
2091   5,                                            /* Max cond insns.  */
2092   8,                                            /* Memset max inline.  */
2093   2,                                            /* Issue rate.  */
2094   ARM_PREFETCH_NOT_BENEFICIAL,
2095   tune_params::PREF_CONST_POOL_FALSE,
2096   tune_params::PREF_LDRD_FALSE,
2097   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2098   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2099   tune_params::DISPARAGE_FLAGS_NEITHER,
2100   tune_params::PREF_NEON_STRINGOPS_TRUE,
2101   tune_params::FUSE_NOTHING,
2102   tune_params::SCHED_AUTOPREF_OFF
2103 };
2104
2105 const struct tune_params arm_cortex_a15_tune =
2106 {
2107   &cortexa15_extra_costs,
2108   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2109   NULL,                                 /* Sched adj cost.  */
2110   arm_default_branch_cost,
2111   &arm_default_vec_cost,
2112   1,                                            /* Constant limit.  */
2113   2,                                            /* Max cond insns.  */
2114   8,                                            /* Memset max inline.  */
2115   3,                                            /* Issue rate.  */
2116   ARM_PREFETCH_NOT_BENEFICIAL,
2117   tune_params::PREF_CONST_POOL_FALSE,
2118   tune_params::PREF_LDRD_TRUE,
2119   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2120   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2121   tune_params::DISPARAGE_FLAGS_ALL,
2122   tune_params::PREF_NEON_STRINGOPS_TRUE,
2123   tune_params::FUSE_NOTHING,
2124   tune_params::SCHED_AUTOPREF_FULL
2125 };
2126
2127 const struct tune_params arm_cortex_a35_tune =
2128 {
2129   &cortexa53_extra_costs,
2130   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2131   NULL,                                 /* Sched adj cost.  */
2132   arm_default_branch_cost,
2133   &arm_default_vec_cost,
2134   1,                                            /* Constant limit.  */
2135   5,                                            /* Max cond insns.  */
2136   8,                                            /* Memset max inline.  */
2137   1,                                            /* Issue rate.  */
2138   ARM_PREFETCH_NOT_BENEFICIAL,
2139   tune_params::PREF_CONST_POOL_FALSE,
2140   tune_params::PREF_LDRD_FALSE,
2141   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2142   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2143   tune_params::DISPARAGE_FLAGS_NEITHER,
2144   tune_params::PREF_NEON_STRINGOPS_TRUE,
2145   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2146   tune_params::SCHED_AUTOPREF_OFF
2147 };
2148
2149 const struct tune_params arm_cortex_a53_tune =
2150 {
2151   &cortexa53_extra_costs,
2152   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2153   NULL,                                 /* Sched adj cost.  */
2154   arm_default_branch_cost,
2155   &arm_default_vec_cost,
2156   1,                                            /* Constant limit.  */
2157   5,                                            /* Max cond insns.  */
2158   8,                                            /* Memset max inline.  */
2159   2,                                            /* Issue rate.  */
2160   ARM_PREFETCH_NOT_BENEFICIAL,
2161   tune_params::PREF_CONST_POOL_FALSE,
2162   tune_params::PREF_LDRD_FALSE,
2163   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2164   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2165   tune_params::DISPARAGE_FLAGS_NEITHER,
2166   tune_params::PREF_NEON_STRINGOPS_TRUE,
2167   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2168   tune_params::SCHED_AUTOPREF_OFF
2169 };
2170
2171 const struct tune_params arm_cortex_a57_tune =
2172 {
2173   &cortexa57_extra_costs,
2174   &generic_addr_mode_costs,             /* addressing mode costs */
2175   NULL,                                 /* Sched adj cost.  */
2176   arm_default_branch_cost,
2177   &arm_default_vec_cost,
2178   1,                                            /* Constant limit.  */
2179   2,                                            /* Max cond insns.  */
2180   8,                                            /* Memset max inline.  */
2181   3,                                            /* Issue rate.  */
2182   ARM_PREFETCH_NOT_BENEFICIAL,
2183   tune_params::PREF_CONST_POOL_FALSE,
2184   tune_params::PREF_LDRD_TRUE,
2185   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2186   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2187   tune_params::DISPARAGE_FLAGS_ALL,
2188   tune_params::PREF_NEON_STRINGOPS_TRUE,
2189   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2190   tune_params::SCHED_AUTOPREF_FULL
2191 };
2192
2193 const struct tune_params arm_exynosm1_tune =
2194 {
2195   &exynosm1_extra_costs,
2196   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2197   NULL,                                         /* Sched adj cost.  */
2198   arm_default_branch_cost,
2199   &arm_default_vec_cost,
2200   1,                                            /* Constant limit.  */
2201   2,                                            /* Max cond insns.  */
2202   8,                                            /* Memset max inline.  */
2203   3,                                            /* Issue rate.  */
2204   ARM_PREFETCH_NOT_BENEFICIAL,
2205   tune_params::PREF_CONST_POOL_FALSE,
2206   tune_params::PREF_LDRD_TRUE,
2207   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* Thumb.  */
2208   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,  /* ARM.  */
2209   tune_params::DISPARAGE_FLAGS_ALL,
2210   tune_params::PREF_NEON_STRINGOPS_TRUE,
2211   tune_params::FUSE_NOTHING,
2212   tune_params::SCHED_AUTOPREF_OFF
2213 };
2214
2215 const struct tune_params arm_xgene1_tune =
2216 {
2217   &xgene1_extra_costs,
2218   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2219   NULL,                                 /* Sched adj cost.  */
2220   arm_default_branch_cost,
2221   &arm_default_vec_cost,
2222   1,                                            /* Constant limit.  */
2223   2,                                            /* Max cond insns.  */
2224   32,                                           /* Memset max inline.  */
2225   4,                                            /* Issue rate.  */
2226   ARM_PREFETCH_NOT_BENEFICIAL,
2227   tune_params::PREF_CONST_POOL_FALSE,
2228   tune_params::PREF_LDRD_TRUE,
2229   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2230   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2231   tune_params::DISPARAGE_FLAGS_ALL,
2232   tune_params::PREF_NEON_STRINGOPS_FALSE,
2233   tune_params::FUSE_NOTHING,
2234   tune_params::SCHED_AUTOPREF_OFF
2235 };
2236
2237 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2238    less appealing.  Set max_insns_skipped to a low value.  */
2239
2240 const struct tune_params arm_cortex_a5_tune =
2241 {
2242   &cortexa5_extra_costs,
2243   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2244   NULL,                                 /* Sched adj cost.  */
2245   arm_cortex_a5_branch_cost,
2246   &arm_default_vec_cost,
2247   1,                                            /* Constant limit.  */
2248   1,                                            /* Max cond insns.  */
2249   8,                                            /* Memset max inline.  */
2250   2,                                            /* Issue rate.  */
2251   ARM_PREFETCH_NOT_BENEFICIAL,
2252   tune_params::PREF_CONST_POOL_FALSE,
2253   tune_params::PREF_LDRD_FALSE,
2254   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2255   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2256   tune_params::DISPARAGE_FLAGS_NEITHER,
2257   tune_params::PREF_NEON_STRINGOPS_TRUE,
2258   tune_params::FUSE_NOTHING,
2259   tune_params::SCHED_AUTOPREF_OFF
2260 };
2261
2262 const struct tune_params arm_cortex_a9_tune =
2263 {
2264   &cortexa9_extra_costs,
2265   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2266   cortex_a9_sched_adjust_cost,
2267   arm_default_branch_cost,
2268   &arm_default_vec_cost,
2269   1,                                            /* Constant limit.  */
2270   5,                                            /* Max cond insns.  */
2271   8,                                            /* Memset max inline.  */
2272   2,                                            /* Issue rate.  */
2273   ARM_PREFETCH_BENEFICIAL(4,32,32),
2274   tune_params::PREF_CONST_POOL_FALSE,
2275   tune_params::PREF_LDRD_FALSE,
2276   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2277   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2278   tune_params::DISPARAGE_FLAGS_NEITHER,
2279   tune_params::PREF_NEON_STRINGOPS_FALSE,
2280   tune_params::FUSE_NOTHING,
2281   tune_params::SCHED_AUTOPREF_OFF
2282 };
2283
2284 const struct tune_params arm_cortex_a12_tune =
2285 {
2286   &cortexa12_extra_costs,
2287   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2288   NULL,                                 /* Sched adj cost.  */
2289   arm_default_branch_cost,
2290   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2291   1,                                            /* Constant limit.  */
2292   2,                                            /* Max cond insns.  */
2293   8,                                            /* Memset max inline.  */
2294   2,                                            /* Issue rate.  */
2295   ARM_PREFETCH_NOT_BENEFICIAL,
2296   tune_params::PREF_CONST_POOL_FALSE,
2297   tune_params::PREF_LDRD_TRUE,
2298   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2299   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2300   tune_params::DISPARAGE_FLAGS_ALL,
2301   tune_params::PREF_NEON_STRINGOPS_TRUE,
2302   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2303   tune_params::SCHED_AUTOPREF_OFF
2304 };
2305
2306 const struct tune_params arm_cortex_a73_tune =
2307 {
2308   &cortexa57_extra_costs,
2309   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2310   NULL,                                         /* Sched adj cost.  */
2311   arm_default_branch_cost,
2312   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2313   1,                                            /* Constant limit.  */
2314   2,                                            /* Max cond insns.  */
2315   8,                                            /* Memset max inline.  */
2316   2,                                            /* Issue rate.  */
2317   ARM_PREFETCH_NOT_BENEFICIAL,
2318   tune_params::PREF_CONST_POOL_FALSE,
2319   tune_params::PREF_LDRD_TRUE,
2320   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2321   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2322   tune_params::DISPARAGE_FLAGS_ALL,
2323   tune_params::PREF_NEON_STRINGOPS_TRUE,
2324   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2325   tune_params::SCHED_AUTOPREF_FULL
2326 };
2327
2328 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2329    cycle to execute each.  An LDR from the constant pool also takes two cycles
2330    to execute, but mildly increases pipelining opportunity (consecutive
2331    loads/stores can be pipelined together, saving one cycle), and may also
2332    improve icache utilisation.  Hence we prefer the constant pool for such
2333    processors.  */
2334
2335 const struct tune_params arm_v7m_tune =
2336 {
2337   &v7m_extra_costs,
2338   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2339   NULL,                                 /* Sched adj cost.  */
2340   arm_cortex_m_branch_cost,
2341   &arm_default_vec_cost,
2342   1,                                            /* Constant limit.  */
2343   2,                                            /* Max cond insns.  */
2344   8,                                            /* Memset max inline.  */
2345   1,                                            /* Issue rate.  */
2346   ARM_PREFETCH_NOT_BENEFICIAL,
2347   tune_params::PREF_CONST_POOL_TRUE,
2348   tune_params::PREF_LDRD_FALSE,
2349   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2350   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2351   tune_params::DISPARAGE_FLAGS_NEITHER,
2352   tune_params::PREF_NEON_STRINGOPS_FALSE,
2353   tune_params::FUSE_NOTHING,
2354   tune_params::SCHED_AUTOPREF_OFF
2355 };
2356
2357 /* Cortex-M7 tuning.  */
2358
2359 const struct tune_params arm_cortex_m7_tune =
2360 {
2361   &v7m_extra_costs,
2362   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2363   NULL,                                 /* Sched adj cost.  */
2364   arm_cortex_m7_branch_cost,
2365   &arm_default_vec_cost,
2366   0,                                            /* Constant limit.  */
2367   1,                                            /* Max cond insns.  */
2368   8,                                            /* Memset max inline.  */
2369   2,                                            /* Issue rate.  */
2370   ARM_PREFETCH_NOT_BENEFICIAL,
2371   tune_params::PREF_CONST_POOL_TRUE,
2372   tune_params::PREF_LDRD_FALSE,
2373   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2374   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2375   tune_params::DISPARAGE_FLAGS_NEITHER,
2376   tune_params::PREF_NEON_STRINGOPS_FALSE,
2377   tune_params::FUSE_NOTHING,
2378   tune_params::SCHED_AUTOPREF_OFF
2379 };
2380
2381 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2382    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2383    cortex-m23.  */
2384 const struct tune_params arm_v6m_tune =
2385 {
2386   &generic_extra_costs,                 /* Insn extra costs.  */
2387   &generic_addr_mode_costs,             /* Addressing mode costs.  */
2388   NULL,                                 /* Sched adj cost.  */
2389   arm_default_branch_cost,
2390   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2391   1,                                            /* Constant limit.  */
2392   5,                                            /* Max cond insns.  */
2393   8,                                            /* Memset max inline.  */
2394   1,                                            /* Issue rate.  */
2395   ARM_PREFETCH_NOT_BENEFICIAL,
2396   tune_params::PREF_CONST_POOL_FALSE,
2397   tune_params::PREF_LDRD_FALSE,
2398   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* Thumb.  */
2399   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,          /* ARM.  */
2400   tune_params::DISPARAGE_FLAGS_NEITHER,
2401   tune_params::PREF_NEON_STRINGOPS_FALSE,
2402   tune_params::FUSE_NOTHING,
2403   tune_params::SCHED_AUTOPREF_OFF
2404 };
2405
2406 const struct tune_params arm_fa726te_tune =
2407 {
2408   &generic_extra_costs,                         /* Insn extra costs.  */
2409   &generic_addr_mode_costs,                     /* Addressing mode costs.  */
2410   fa726te_sched_adjust_cost,
2411   arm_default_branch_cost,
2412   &arm_default_vec_cost,
2413   1,                                            /* Constant limit.  */
2414   5,                                            /* Max cond insns.  */
2415   8,                                            /* Memset max inline.  */
2416   2,                                            /* Issue rate.  */
2417   ARM_PREFETCH_NOT_BENEFICIAL,
2418   tune_params::PREF_CONST_POOL_TRUE,
2419   tune_params::PREF_LDRD_FALSE,
2420   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* Thumb.  */
2421   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,           /* ARM.  */
2422   tune_params::DISPARAGE_FLAGS_NEITHER,
2423   tune_params::PREF_NEON_STRINGOPS_FALSE,
2424   tune_params::FUSE_NOTHING,
2425   tune_params::SCHED_AUTOPREF_OFF
2426 };
2427
2428 /* Auto-generated CPU, FPU and architecture tables.  */
2429 #include "arm-cpu-data.h"
2430
2431 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2432    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2433    is thus chosen to be big enough to hold the longest architecture name.  */
2434
2435 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2436
2437 /* Supported TLS relocations.  */
2438
2439 enum tls_reloc {
2440   TLS_GD32,
2441   TLS_GD32_FDPIC,
2442   TLS_LDM32,
2443   TLS_LDM32_FDPIC,
2444   TLS_LDO32,
2445   TLS_IE32,
2446   TLS_IE32_FDPIC,
2447   TLS_LE32,
2448   TLS_DESCSEQ   /* GNU scheme */
2449 };
2450
2451 /* The maximum number of insns to be used when loading a constant.  */
2452 inline static int
2453 arm_constant_limit (bool size_p)
2454 {
2455   return size_p ? 1 : current_tune->constant_limit;
2456 }
2457
2458 /* Emit an insn that's a simple single-set.  Both the operands must be known
2459    to be valid.  */
2460 inline static rtx_insn *
2461 emit_set_insn (rtx x, rtx y)
2462 {
2463   return emit_insn (gen_rtx_SET (x, y));
2464 }
2465
2466 /* Return the number of bits set in VALUE.  */
2467 static unsigned
2468 bit_count (unsigned long value)
2469 {
2470   unsigned long count = 0;
2471
2472   while (value)
2473     {
2474       count++;
2475       value &= value - 1;  /* Clear the least-significant set bit.  */
2476     }
2477
2478   return count;
2479 }
2480
2481 /* Return the number of bits set in BMAP.  */
2482 static unsigned
2483 bitmap_popcount (const sbitmap bmap)
2484 {
2485   unsigned int count = 0;
2486   unsigned int n = 0;
2487   sbitmap_iterator sbi;
2488
2489   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2490     count++;
2491   return count;
2492 }
2493
2494 typedef struct
2495 {
2496   machine_mode mode;
2497   const char *name;
2498 } arm_fixed_mode_set;
2499
2500 /* A small helper for setting fixed-point library libfuncs.  */
2501
2502 static void
2503 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2504                              const char *funcname, const char *modename,
2505                              int num_suffix)
2506 {
2507   char buffer[50];
2508
2509   if (num_suffix == 0)
2510     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2511   else
2512     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2513
2514   set_optab_libfunc (optable, mode, buffer);
2515 }
2516
2517 static void
2518 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2519                             machine_mode from, const char *funcname,
2520                             const char *toname, const char *fromname)
2521 {
2522   char buffer[50];
2523   const char *maybe_suffix_2 = "";
2524
2525   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2526   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2527       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2528       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2529     maybe_suffix_2 = "2";
2530
2531   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2532            maybe_suffix_2);
2533
2534   set_conv_libfunc (optable, to, from, buffer);
2535 }
2536
2537 static GTY(()) rtx speculation_barrier_libfunc;
2538
2539 /* Record that we have no arithmetic or comparison libfuncs for
2540    machine mode MODE.  */
2541
2542 static void
2543 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2544 {
2545   /* Arithmetic.  */
2546   set_optab_libfunc (add_optab, mode, NULL);
2547   set_optab_libfunc (sdiv_optab, mode, NULL);
2548   set_optab_libfunc (smul_optab, mode, NULL);
2549   set_optab_libfunc (neg_optab, mode, NULL);
2550   set_optab_libfunc (sub_optab, mode, NULL);
2551
2552   /* Comparisons.  */
2553   set_optab_libfunc (eq_optab, mode, NULL);
2554   set_optab_libfunc (ne_optab, mode, NULL);
2555   set_optab_libfunc (lt_optab, mode, NULL);
2556   set_optab_libfunc (le_optab, mode, NULL);
2557   set_optab_libfunc (ge_optab, mode, NULL);
2558   set_optab_libfunc (gt_optab, mode, NULL);
2559   set_optab_libfunc (unord_optab, mode, NULL);
2560 }
2561
2562 /* Set up library functions unique to ARM.  */
2563 static void
2564 arm_init_libfuncs (void)
2565 {
2566   machine_mode mode_iter;
2567
2568   /* For Linux, we have access to kernel support for atomic operations.  */
2569   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2570     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2571
2572   /* There are no special library functions unless we are using the
2573      ARM BPABI.  */
2574   if (!TARGET_BPABI)
2575     return;
2576
2577   /* The functions below are described in Section 4 of the "Run-Time
2578      ABI for the ARM architecture", Version 1.0.  */
2579
2580   /* Double-precision floating-point arithmetic.  Table 2.  */
2581   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2582   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2583   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2584   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2585   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2586
2587   /* Double-precision comparisons.  Table 3.  */
2588   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2589   set_optab_libfunc (ne_optab, DFmode, NULL);
2590   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2591   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2592   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2593   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2594   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2595
2596   /* Single-precision floating-point arithmetic.  Table 4.  */
2597   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2598   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2599   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2600   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2601   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2602
2603   /* Single-precision comparisons.  Table 5.  */
2604   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2605   set_optab_libfunc (ne_optab, SFmode, NULL);
2606   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2607   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2608   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2609   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2610   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2611
2612   /* Floating-point to integer conversions.  Table 6.  */
2613   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2614   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2615   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2616   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2617   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2618   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2619   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2620   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2621
2622   /* Conversions between floating types.  Table 7.  */
2623   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2624   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2625
2626   /* Integer to floating-point conversions.  Table 8.  */
2627   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2628   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2629   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2630   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2631   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2632   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2633   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2634   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2635
2636   /* Long long.  Table 9.  */
2637   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2638   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2639   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2640   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2641   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2642   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2643   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2644   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2645
2646   /* Integer (32/32->32) division.  \S 4.3.1.  */
2647   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2648   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2649
2650   /* The divmod functions are designed so that they can be used for
2651      plain division, even though they return both the quotient and the
2652      remainder.  The quotient is returned in the usual location (i.e.,
2653      r0 for SImode, {r0, r1} for DImode), just as would be expected
2654      for an ordinary division routine.  Because the AAPCS calling
2655      conventions specify that all of { r0, r1, r2, r3 } are
2656      callee-saved registers, there is no need to tell the compiler
2657      explicitly that those registers are clobbered by these
2658      routines.  */
2659   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2660   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2661
2662   /* For SImode division the ABI provides div-without-mod routines,
2663      which are faster.  */
2664   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2665   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2666
2667   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2668      divmod libcalls instead.  */
2669   set_optab_libfunc (smod_optab, DImode, NULL);
2670   set_optab_libfunc (umod_optab, DImode, NULL);
2671   set_optab_libfunc (smod_optab, SImode, NULL);
2672   set_optab_libfunc (umod_optab, SImode, NULL);
2673
2674   /* Half-precision float operations.  The compiler handles all operations
2675      with NULL libfuncs by converting the SFmode.  */
2676   switch (arm_fp16_format)
2677     {
2678     case ARM_FP16_FORMAT_IEEE:
2679     case ARM_FP16_FORMAT_ALTERNATIVE:
2680
2681       /* Conversions.  */
2682       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2683                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2684                          ? "__gnu_f2h_ieee"
2685                          : "__gnu_f2h_alternative"));
2686       set_conv_libfunc (sext_optab, SFmode, HFmode,
2687                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2688                          ? "__gnu_h2f_ieee"
2689                          : "__gnu_h2f_alternative"));
2690
2691       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2692                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2693                          ? "__gnu_d2h_ieee"
2694                          : "__gnu_d2h_alternative"));
2695
2696       arm_block_arith_comp_libfuncs_for_mode (HFmode);
2697       break;
2698
2699     default:
2700       break;
2701     }
2702
2703   /* For all possible libcalls in BFmode, record NULL.  */
2704   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2705     {
2706       set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2707       set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2708       set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2709       set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2710     }
2711   arm_block_arith_comp_libfuncs_for_mode (BFmode);
2712
2713   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2714   {
2715     const arm_fixed_mode_set fixed_arith_modes[] =
2716       {
2717         { E_QQmode, "qq" },
2718         { E_UQQmode, "uqq" },
2719         { E_HQmode, "hq" },
2720         { E_UHQmode, "uhq" },
2721         { E_SQmode, "sq" },
2722         { E_USQmode, "usq" },
2723         { E_DQmode, "dq" },
2724         { E_UDQmode, "udq" },
2725         { E_TQmode, "tq" },
2726         { E_UTQmode, "utq" },
2727         { E_HAmode, "ha" },
2728         { E_UHAmode, "uha" },
2729         { E_SAmode, "sa" },
2730         { E_USAmode, "usa" },
2731         { E_DAmode, "da" },
2732         { E_UDAmode, "uda" },
2733         { E_TAmode, "ta" },
2734         { E_UTAmode, "uta" }
2735       };
2736     const arm_fixed_mode_set fixed_conv_modes[] =
2737       {
2738         { E_QQmode, "qq" },
2739         { E_UQQmode, "uqq" },
2740         { E_HQmode, "hq" },
2741         { E_UHQmode, "uhq" },
2742         { E_SQmode, "sq" },
2743         { E_USQmode, "usq" },
2744         { E_DQmode, "dq" },
2745         { E_UDQmode, "udq" },
2746         { E_TQmode, "tq" },
2747         { E_UTQmode, "utq" },
2748         { E_HAmode, "ha" },
2749         { E_UHAmode, "uha" },
2750         { E_SAmode, "sa" },
2751         { E_USAmode, "usa" },
2752         { E_DAmode, "da" },
2753         { E_UDAmode, "uda" },
2754         { E_TAmode, "ta" },
2755         { E_UTAmode, "uta" },
2756         { E_QImode, "qi" },
2757         { E_HImode, "hi" },
2758         { E_SImode, "si" },
2759         { E_DImode, "di" },
2760         { E_TImode, "ti" },
2761         { E_SFmode, "sf" },
2762         { E_DFmode, "df" }
2763       };
2764     unsigned int i, j;
2765
2766     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2767       {
2768         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2769                                      "add", fixed_arith_modes[i].name, 3);
2770         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2771                                      "ssadd", fixed_arith_modes[i].name, 3);
2772         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2773                                      "usadd", fixed_arith_modes[i].name, 3);
2774         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2775                                      "sub", fixed_arith_modes[i].name, 3);
2776         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2777                                      "sssub", fixed_arith_modes[i].name, 3);
2778         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2779                                      "ussub", fixed_arith_modes[i].name, 3);
2780         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2781                                      "mul", fixed_arith_modes[i].name, 3);
2782         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2783                                      "ssmul", fixed_arith_modes[i].name, 3);
2784         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2785                                      "usmul", fixed_arith_modes[i].name, 3);
2786         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2787                                      "div", fixed_arith_modes[i].name, 3);
2788         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2789                                      "udiv", fixed_arith_modes[i].name, 3);
2790         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2791                                      "ssdiv", fixed_arith_modes[i].name, 3);
2792         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2793                                      "usdiv", fixed_arith_modes[i].name, 3);
2794         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2795                                      "neg", fixed_arith_modes[i].name, 2);
2796         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2797                                      "ssneg", fixed_arith_modes[i].name, 2);
2798         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2799                                      "usneg", fixed_arith_modes[i].name, 2);
2800         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2801                                      "ashl", fixed_arith_modes[i].name, 3);
2802         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2803                                      "ashr", fixed_arith_modes[i].name, 3);
2804         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2805                                      "lshr", fixed_arith_modes[i].name, 3);
2806         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2807                                      "ssashl", fixed_arith_modes[i].name, 3);
2808         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2809                                      "usashl", fixed_arith_modes[i].name, 3);
2810         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2811                                      "cmp", fixed_arith_modes[i].name, 2);
2812       }
2813
2814     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2815       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2816         {
2817           if (i == j
2818               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2819                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2820             continue;
2821
2822           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2823                                       fixed_conv_modes[j].mode, "fract",
2824                                       fixed_conv_modes[i].name,
2825                                       fixed_conv_modes[j].name);
2826           arm_set_fixed_conv_libfunc (satfract_optab,
2827                                       fixed_conv_modes[i].mode,
2828                                       fixed_conv_modes[j].mode, "satfract",
2829                                       fixed_conv_modes[i].name,
2830                                       fixed_conv_modes[j].name);
2831           arm_set_fixed_conv_libfunc (fractuns_optab,
2832                                       fixed_conv_modes[i].mode,
2833                                       fixed_conv_modes[j].mode, "fractuns",
2834                                       fixed_conv_modes[i].name,
2835                                       fixed_conv_modes[j].name);
2836           arm_set_fixed_conv_libfunc (satfractuns_optab,
2837                                       fixed_conv_modes[i].mode,
2838                                       fixed_conv_modes[j].mode, "satfractuns",
2839                                       fixed_conv_modes[i].name,
2840                                       fixed_conv_modes[j].name);
2841         }
2842   }
2843
2844   if (TARGET_AAPCS_BASED)
2845     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2846
2847   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2848 }
2849
2850 /* Implement TARGET_GIMPLE_FOLD_BUILTIN.  */
2851 static bool
2852 arm_gimple_fold_builtin (gimple_stmt_iterator *gsi)
2853 {
2854   gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
2855   tree fndecl = gimple_call_fndecl (stmt);
2856   unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
2857   unsigned int subcode = code >> ARM_BUILTIN_SHIFT;
2858   gimple *new_stmt = NULL;
2859   switch (code & ARM_BUILTIN_CLASS)
2860     {
2861     case ARM_BUILTIN_GENERAL:
2862       break;
2863     case ARM_BUILTIN_MVE:
2864       new_stmt = arm_mve::gimple_fold_builtin (subcode, stmt);
2865     }
2866   if (!new_stmt)
2867     return false;
2868
2869   gsi_replace (gsi, new_stmt, true);
2870   return true;
2871 }
2872
2873 /* On AAPCS systems, this is the "struct __va_list".  */
2874 static GTY(()) tree va_list_type;
2875
2876 /* Return the type to use as __builtin_va_list.  */
2877 static tree
2878 arm_build_builtin_va_list (void)
2879 {
2880   tree va_list_name;
2881   tree ap_field;
2882
2883   if (!TARGET_AAPCS_BASED)
2884     return std_build_builtin_va_list ();
2885
2886   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2887      defined as:
2888
2889        struct __va_list
2890        {
2891          void *__ap;
2892        };
2893
2894      The C Library ABI further reinforces this definition in \S
2895      4.1.
2896
2897      We must follow this definition exactly.  The structure tag
2898      name is visible in C++ mangled names, and thus forms a part
2899      of the ABI.  The field name may be used by people who
2900      #include <stdarg.h>.  */
2901   /* Create the type.  */
2902   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2903   /* Give it the required name.  */
2904   va_list_name = build_decl (BUILTINS_LOCATION,
2905                              TYPE_DECL,
2906                              get_identifier ("__va_list"),
2907                              va_list_type);
2908   DECL_ARTIFICIAL (va_list_name) = 1;
2909   TREE_PUBLIC (va_list_name) = 1;
2910   TYPE_NAME (va_list_type) = va_list_name;
2911   TYPE_STUB_DECL (va_list_type) = va_list_name;
2912   /* Create the __ap field.  */
2913   ap_field = build_decl (BUILTINS_LOCATION,
2914                          FIELD_DECL,
2915                          get_identifier ("__ap"),
2916                          ptr_type_node);
2917   DECL_ARTIFICIAL (ap_field) = 1;
2918   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2919   TYPE_FIELDS (va_list_type) = ap_field;
2920   /* Compute its layout.  */
2921   layout_type (va_list_type);
2922
2923   return va_list_type;
2924 }
2925
2926 /* Return an expression of type "void *" pointing to the next
2927    available argument in a variable-argument list.  VALIST is the
2928    user-level va_list object, of type __builtin_va_list.  */
2929 static tree
2930 arm_extract_valist_ptr (tree valist)
2931 {
2932   if (TREE_TYPE (valist) == error_mark_node)
2933     return error_mark_node;
2934
2935   /* On an AAPCS target, the pointer is stored within "struct
2936      va_list".  */
2937   if (TARGET_AAPCS_BASED)
2938     {
2939       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2940       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2941                        valist, ap_field, NULL_TREE);
2942     }
2943
2944   return valist;
2945 }
2946
2947 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2948 static void
2949 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2950 {
2951   valist = arm_extract_valist_ptr (valist);
2952   std_expand_builtin_va_start (valist, nextarg);
2953 }
2954
2955 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2956 static tree
2957 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2958                           gimple_seq *post_p)
2959 {
2960   valist = arm_extract_valist_ptr (valist);
2961   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2962 }
2963
2964 /* Check any incompatible options that the user has specified.  */
2965 static void
2966 arm_option_check_internal (struct gcc_options *opts)
2967 {
2968   int flags = opts->x_target_flags;
2969
2970   /* iWMMXt and NEON are incompatible.  */
2971   if (TARGET_IWMMXT
2972       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2973     error ("iWMMXt and NEON are incompatible");
2974
2975   /* Make sure that the processor choice does not conflict with any of the
2976      other command line choices.  */
2977   if (TARGET_ARM_P (flags)
2978       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2979     error ("target CPU does not support ARM mode");
2980
2981   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2982   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2983     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2984
2985   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2986     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2987
2988   /* If this target is normally configured to use APCS frames, warn if they
2989      are turned off and debugging is turned on.  */
2990   if (TARGET_ARM_P (flags)
2991       && write_symbols != NO_DEBUG
2992       && !TARGET_APCS_FRAME
2993       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2994     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2995              "debugging");
2996
2997   /* iWMMXt unsupported under Thumb mode.  */
2998   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2999     error ("iWMMXt unsupported under Thumb mode");
3000
3001   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
3002     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
3003
3004   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
3005     {
3006       error ("RTP PIC is incompatible with Thumb");
3007       flag_pic = 0;
3008     }
3009
3010   if (target_pure_code || target_slow_flash_data)
3011     {
3012       const char *flag = (target_pure_code ? "-mpure-code" :
3013                                              "-mslow-flash-data");
3014       bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
3015
3016       /* We only support -mslow-flash-data on M-profile targets with
3017          MOVT.  */
3018       if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
3019         error ("%qs only supports non-pic code on M-profile targets with the "
3020                "MOVT instruction", flag);
3021
3022       /* We only support -mpure-code on M-profile targets.  */
3023       if (target_pure_code && common_unsupported_modes)
3024         error ("%qs only supports non-pic code on M-profile targets", flag);
3025
3026       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3027          -mword-relocations forbids relocation of MOVT/MOVW.  */
3028       if (target_word_relocations)
3029         error ("%qs is incompatible with %<-mword-relocations%>", flag);
3030     }
3031 }
3032
3033 /* Recompute the global settings depending on target attribute options.  */
3034
3035 static void
3036 arm_option_params_internal (void)
3037 {
3038   /* If we are not using the default (ARM mode) section anchor offset
3039      ranges, then set the correct ranges now.  */
3040   if (TARGET_THUMB1)
3041     {
3042       /* Thumb-1 LDR instructions cannot have negative offsets.
3043          Permissible positive offset ranges are 5-bit (for byte loads),
3044          6-bit (for halfword loads), or 7-bit (for word loads).
3045          Empirical results suggest a 7-bit anchor range gives the best
3046          overall code size.  */
3047       targetm.min_anchor_offset = 0;
3048       targetm.max_anchor_offset = 127;
3049     }
3050   else if (TARGET_THUMB2)
3051     {
3052       /* The minimum is set such that the total size of the block
3053          for a particular anchor is 248 + 1 + 4095 bytes, which is
3054          divisible by eight, ensuring natural spacing of anchors.  */
3055       targetm.min_anchor_offset = -248;
3056       targetm.max_anchor_offset = 4095;
3057     }
3058   else
3059     {
3060       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3061       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3062     }
3063
3064   /* Increase the number of conditional instructions with -Os.  */
3065   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3066
3067   /* For THUMB2, we limit the conditional sequence to one IT block.  */
3068   if (TARGET_THUMB2)
3069     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3070
3071   if (TARGET_THUMB1)
3072     targetm.md_asm_adjust = thumb1_md_asm_adjust;
3073   else
3074     targetm.md_asm_adjust = arm_md_asm_adjust;
3075 }
3076
3077 /* True if -mflip-thumb should next add an attribute for the default
3078    mode, false if it should next add an attribute for the opposite mode.  */
3079 static GTY(()) bool thumb_flipper;
3080
3081 /* Options after initial target override.  */
3082 static GTY(()) tree init_optimize;
3083
3084 static void
3085 arm_override_options_after_change_1 (struct gcc_options *opts,
3086                                      struct gcc_options *opts_set)
3087 {
3088   /* -falign-functions without argument: supply one.  */
3089   if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3090     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3091       && opts->x_optimize_size ? "2" : "4";
3092 }
3093
3094 /* Implement targetm.override_options_after_change.  */
3095
3096 static void
3097 arm_override_options_after_change (void)
3098 {
3099   arm_override_options_after_change_1 (&global_options, &global_options_set);
3100 }
3101
3102 /* Implement TARGET_OPTION_RESTORE.  */
3103 static void
3104 arm_option_restore (struct gcc_options */* opts */,
3105                     struct gcc_options */* opts_set */,
3106                     struct cl_target_option *ptr)
3107 {
3108   arm_configure_build_target (&arm_active_target, ptr, false);
3109   arm_option_reconfigure_globals ();
3110 }
3111
3112 /* Reset options between modes that the user has specified.  */
3113 static void
3114 arm_option_override_internal (struct gcc_options *opts,
3115                               struct gcc_options *opts_set)
3116 {
3117   arm_override_options_after_change_1 (opts, opts_set);
3118
3119   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3120     {
3121       /* The default is to enable interworking, so this warning message would
3122          be confusing to users who have just compiled with
3123          eg, -march=armv4.  */
3124       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3125       opts->x_target_flags &= ~MASK_INTERWORK;
3126     }
3127
3128   if (TARGET_THUMB_P (opts->x_target_flags)
3129       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3130     {
3131       warning (0, "target CPU does not support THUMB instructions");
3132       opts->x_target_flags &= ~MASK_THUMB;
3133     }
3134
3135   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3136     {
3137       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3138       opts->x_target_flags &= ~MASK_APCS_FRAME;
3139     }
3140
3141   /* Callee super interworking implies thumb interworking.  Adding
3142      this to the flags here simplifies the logic elsewhere.  */
3143   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3144     opts->x_target_flags |= MASK_INTERWORK;
3145
3146   /* need to remember initial values so combinaisons of options like
3147      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3148   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3149
3150   if (! opts_set->x_arm_restrict_it)
3151     opts->x_arm_restrict_it = arm_arch8;
3152
3153   /* ARM execution state and M profile don't have [restrict] IT.  */
3154   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3155     opts->x_arm_restrict_it = 0;
3156
3157   /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
3158   if (!opts_set->x_arm_restrict_it
3159       && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3160     opts->x_arm_restrict_it = 0;
3161
3162   /* Enable -munaligned-access by default for
3163      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3164      i.e. Thumb2 and ARM state only.
3165      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3166      - ARMv8 architecture-base processors.
3167
3168      Disable -munaligned-access by default for
3169      - all pre-ARMv6 architecture-based processors
3170      - ARMv6-M architecture-based processors
3171      - ARMv8-M Baseline processors.  */
3172
3173   if (! opts_set->x_unaligned_access)
3174     {
3175       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3176                           && arm_arch6 && (arm_arch_notm || arm_arch7));
3177     }
3178   else if (opts->x_unaligned_access == 1
3179            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3180     {
3181       warning (0, "target CPU does not support unaligned accesses");
3182      opts->x_unaligned_access = 0;
3183     }
3184
3185   /* Don't warn since it's on by default in -O2.  */
3186   if (TARGET_THUMB1_P (opts->x_target_flags))
3187     opts->x_flag_schedule_insns = 0;
3188   else
3189     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3190
3191   /* Disable shrink-wrap when optimizing function for size, since it tends to
3192      generate additional returns.  */
3193   if (optimize_function_for_size_p (cfun)
3194       && TARGET_THUMB2_P (opts->x_target_flags))
3195     opts->x_flag_shrink_wrap = false;
3196   else
3197     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3198
3199   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3200      - epilogue_insns - does not accurately model the corresponding insns
3201      emitted in the asm file.  In particular, see the comment in thumb_exit
3202      'Find out how many of the (return) argument registers we can corrupt'.
3203      As a consequence, the epilogue may clobber registers without fipa-ra
3204      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3205      TODO: Accurately model clobbers for epilogue_insns and reenable
3206      fipa-ra.  */
3207   if (TARGET_THUMB1_P (opts->x_target_flags))
3208     opts->x_flag_ipa_ra = 0;
3209   else
3210     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3211
3212   /* Thumb2 inline assembly code should always use unified syntax.
3213      This will apply to ARM and Thumb1 eventually.  */
3214   if (TARGET_THUMB2_P (opts->x_target_flags))
3215     opts->x_inline_asm_unified = true;
3216
3217   if (arm_stack_protector_guard == SSP_GLOBAL
3218       && opts->x_arm_stack_protector_guard_offset_str)
3219     {
3220       error ("incompatible options %<-mstack-protector-guard=global%> and "
3221              "%<-mstack-protector-guard-offset=%s%>",
3222              arm_stack_protector_guard_offset_str);
3223     }
3224
3225   if (opts->x_arm_stack_protector_guard_offset_str)
3226     {
3227       char *end;
3228       const char *str = arm_stack_protector_guard_offset_str;
3229       errno = 0;
3230       long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3231       if (!*str || *end || errno)
3232         error ("%qs is not a valid offset in %qs", str,
3233                "-mstack-protector-guard-offset=");
3234       arm_stack_protector_guard_offset = offs;
3235     }
3236
3237   if (arm_current_function_pac_enabled_p ())
3238     {
3239       if (!arm_arch8m_main)
3240         error ("This architecture does not support branch protection "
3241                "instructions");
3242       if (TARGET_TPCS_FRAME)
3243         sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3244     }
3245
3246 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3247   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3248 #endif
3249 }
3250
3251 static sbitmap isa_all_fpubits_internal;
3252 static sbitmap isa_all_fpbits;
3253 static sbitmap isa_quirkbits;
3254
3255 static void
3256 arm_handle_no_branch_protection (void)
3257 {
3258   aarch_ra_sign_scope = AARCH_FUNCTION_NONE;
3259   aarch_enable_bti = 0;
3260 }
3261
3262 static void
3263 arm_handle_standard_branch_protection (void)
3264 {
3265   aarch_ra_sign_scope = AARCH_FUNCTION_NON_LEAF;
3266   aarch_enable_bti = 1;
3267 }
3268
3269 static void
3270 arm_handle_pac_ret_protection (void)
3271 {
3272   aarch_ra_sign_scope = AARCH_FUNCTION_NON_LEAF;
3273 }
3274
3275 static void
3276 arm_handle_pac_ret_leaf (void)
3277 {
3278   aarch_ra_sign_scope = AARCH_FUNCTION_ALL;
3279 }
3280
3281 static void
3282 arm_handle_bti_protection (void)
3283 {
3284   aarch_enable_bti = 1;
3285 }
3286
3287 static const struct aarch_branch_protect_type arm_pac_ret_subtypes[] = {
3288   { "leaf", false, arm_handle_pac_ret_leaf, NULL, 0 },
3289   { NULL, false, NULL, NULL, 0 }
3290 };
3291
3292 static const struct aarch_branch_protect_type arm_branch_protect_types[] = {
3293   { "none", true, arm_handle_no_branch_protection, NULL, 0 },
3294   { "standard", true, arm_handle_standard_branch_protection, NULL, 0 },
3295   { "pac-ret", false, arm_handle_pac_ret_protection, arm_pac_ret_subtypes,
3296     ARRAY_SIZE (arm_pac_ret_subtypes) },
3297   { "bti", false, arm_handle_bti_protection, NULL, 0 },
3298   { NULL, false, NULL, NULL, 0 }
3299 };
3300
3301 /* Configure a build target TARGET from the user-specified options OPTS and
3302    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3303    architecture have been specified, but the two are not identical.  */
3304 void
3305 arm_configure_build_target (struct arm_build_target *target,
3306                             struct cl_target_option *opts,
3307                             bool warn_compatible)
3308 {
3309   const cpu_option *arm_selected_tune = NULL;
3310   const arch_option *arm_selected_arch = NULL;
3311   const cpu_option *arm_selected_cpu = NULL;
3312   const arm_fpu_desc *arm_selected_fpu = NULL;
3313   const char *tune_opts = NULL;
3314   const char *arch_opts = NULL;
3315   const char *cpu_opts = NULL;
3316
3317   bitmap_clear (target->isa);
3318   target->core_name = NULL;
3319   target->arch_name = NULL;
3320
3321   if (opts->x_arm_arch_string)
3322     {
3323       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3324                                                       "-march",
3325                                                       opts->x_arm_arch_string);
3326       arch_opts = strchr (opts->x_arm_arch_string, '+');
3327     }
3328
3329   if (opts->x_arm_cpu_string)
3330     {
3331       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3332                                                     opts->x_arm_cpu_string);
3333       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3334       arm_selected_tune = arm_selected_cpu;
3335       /* If taking the tuning from -mcpu, we don't need to rescan the
3336          options for tuning.  */
3337     }
3338
3339   if (opts->x_arm_tune_string)
3340     {
3341       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3342                                                      opts->x_arm_tune_string);
3343       tune_opts = strchr (opts->x_arm_tune_string, '+');
3344     }
3345
3346   if (opts->x_arm_branch_protection_string)
3347     {
3348       aarch_validate_mbranch_protection (arm_branch_protect_types,
3349                                          opts->x_arm_branch_protection_string,
3350                                          "-mbranch-protection=");
3351     }
3352
3353   if (arm_selected_arch)
3354     {
3355       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3356       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3357                                  arch_opts);
3358
3359       if (arm_selected_cpu)
3360         {
3361           auto_sbitmap cpu_isa (isa_num_bits);
3362           auto_sbitmap isa_delta (isa_num_bits);
3363
3364           arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3365           arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3366                                      cpu_opts);
3367           bitmap_xor (isa_delta, cpu_isa, target->isa);
3368           /* Ignore any bits that are quirk bits.  */
3369           bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3370           /* If the user (or the default configuration) has specified a
3371              specific FPU, then ignore any bits that depend on the FPU
3372              configuration.  Do similarly if using the soft-float
3373              ABI.  */
3374           if (opts->x_arm_fpu_index != TARGET_FPU_auto
3375               || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3376             bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3377
3378           if (!bitmap_empty_p (isa_delta))
3379             {
3380               if (warn_compatible)
3381                 warning (0, "switch %<-mcpu=%s%> conflicts "
3382                          "with switch %<-march=%s%>",
3383                          opts->x_arm_cpu_string,
3384                          opts->x_arm_arch_string);
3385
3386               /* -march wins for code generation.
3387                  -mcpu wins for default tuning.  */
3388               if (!arm_selected_tune)
3389                 arm_selected_tune = arm_selected_cpu;
3390
3391               arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3392               target->arch_name = arm_selected_arch->common.name;
3393             }
3394           else
3395             {
3396               /* Architecture and CPU are essentially the same.
3397                  Prefer the CPU setting.  */
3398               arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3399               target->core_name = arm_selected_cpu->common.name;
3400               /* Copy the CPU's capabilities, so that we inherit the
3401                  appropriate extensions and quirks.  */
3402               bitmap_copy (target->isa, cpu_isa);
3403             }
3404         }
3405       else
3406         {
3407           /* Pick a CPU based on the architecture.  */
3408           arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3409           target->arch_name = arm_selected_arch->common.name;
3410           /* Note: target->core_name is left unset in this path.  */
3411         }
3412     }
3413   else if (arm_selected_cpu)
3414     {
3415       target->core_name = arm_selected_cpu->common.name;
3416       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3417       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3418                                  cpu_opts);
3419       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3420     }
3421   /* If the user did not specify a processor or architecture, choose
3422      one for them.  */
3423   else
3424     {
3425       const cpu_option *sel;
3426       auto_sbitmap sought_isa (isa_num_bits);
3427       bitmap_clear (sought_isa);
3428       auto_sbitmap default_isa (isa_num_bits);
3429
3430       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3431                                                     TARGET_CPU_DEFAULT);
3432       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3433       gcc_assert (arm_selected_cpu->common.name);
3434
3435       /* RWE: All of the selection logic below (to the end of this
3436          'if' clause) looks somewhat suspect.  It appears to be mostly
3437          there to support forcing thumb support when the default CPU
3438          does not have thumb (somewhat dubious in terms of what the
3439          user might be expecting).  I think it should be removed once
3440          support for the pre-thumb era cores is removed.  */
3441       sel = arm_selected_cpu;
3442       arm_initialize_isa (default_isa, sel->common.isa_bits);
3443       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3444                                  cpu_opts);
3445
3446       /* Now check to see if the user has specified any command line
3447          switches that require certain abilities from the cpu.  */
3448
3449       if (TARGET_INTERWORK || TARGET_THUMB)
3450         bitmap_set_bit (sought_isa, isa_bit_thumb);
3451
3452       /* If there are such requirements and the default CPU does not
3453          satisfy them, we need to run over the complete list of
3454          cores looking for one that is satisfactory.  */
3455       if (!bitmap_empty_p (sought_isa)
3456           && !bitmap_subset_p (sought_isa, default_isa))
3457         {
3458           auto_sbitmap candidate_isa (isa_num_bits);
3459           /* We're only interested in a CPU with at least the
3460              capabilities of the default CPU and the required
3461              additional features.  */
3462           bitmap_ior (default_isa, default_isa, sought_isa);
3463
3464           /* Try to locate a CPU type that supports all of the abilities
3465              of the default CPU, plus the extra abilities requested by
3466              the user.  */
3467           for (sel = all_cores; sel->common.name != NULL; sel++)
3468             {
3469               arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3470               /* An exact match?  */
3471               if (bitmap_equal_p (default_isa, candidate_isa))
3472                 break;
3473             }
3474
3475           if (sel->common.name == NULL)
3476             {
3477               unsigned current_bit_count = isa_num_bits;
3478               const cpu_option *best_fit = NULL;
3479
3480               /* Ideally we would like to issue an error message here
3481                  saying that it was not possible to find a CPU compatible
3482                  with the default CPU, but which also supports the command
3483                  line options specified by the programmer, and so they
3484                  ought to use the -mcpu=<name> command line option to
3485                  override the default CPU type.
3486
3487                  If we cannot find a CPU that has exactly the
3488                  characteristics of the default CPU and the given
3489                  command line options we scan the array again looking
3490                  for a best match.  The best match must have at least
3491                  the capabilities of the perfect match.  */
3492               for (sel = all_cores; sel->common.name != NULL; sel++)
3493                 {
3494                   arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3495
3496                   if (bitmap_subset_p (default_isa, candidate_isa))
3497                     {
3498                       unsigned count;
3499
3500                       bitmap_and_compl (candidate_isa, candidate_isa,
3501                                         default_isa);
3502                       count = bitmap_popcount (candidate_isa);
3503
3504                       if (count < current_bit_count)
3505                         {
3506                           best_fit = sel;
3507                           current_bit_count = count;
3508                         }
3509                     }
3510
3511                   gcc_assert (best_fit);
3512                   sel = best_fit;
3513                 }
3514             }
3515           arm_selected_cpu = sel;
3516         }
3517
3518       /* Now we know the CPU, we can finally initialize the target
3519          structure.  */
3520       target->core_name = arm_selected_cpu->common.name;
3521       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3522       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3523                                  cpu_opts);
3524       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3525     }
3526
3527   gcc_assert (arm_selected_cpu);
3528   gcc_assert (arm_selected_arch);
3529
3530   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3531     {
3532       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3533       auto_sbitmap fpu_bits (isa_num_bits);
3534
3535       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3536       /* This should clear out ALL bits relating to the FPU/simd
3537          extensions, to avoid potentially invalid combinations later on
3538          that we can't match.  At present we only clear out those bits
3539          that can be set by -mfpu.  This should be fixed in GCC-12.  */
3540       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3541       bitmap_ior (target->isa, target->isa, fpu_bits);
3542     }
3543
3544   /* If we have the soft-float ABI, clear any feature bits relating to use of
3545      floating-point operations.  They'll just confuse things later on.  */
3546   if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3547     bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3548
3549   /* There may be implied bits which we still need to enable. These are
3550      non-named features which are needed to complete other sets of features,
3551      but cannot be enabled from arm-cpus.in due to being shared between
3552      multiple fgroups. Each entry in all_implied_fbits is of the form
3553      ante -> cons, meaning that if the feature "ante" is enabled, we should
3554      implicitly enable "cons".  */
3555   const struct fbit_implication *impl = all_implied_fbits;
3556   while (impl->ante)
3557     {
3558       if (bitmap_bit_p (target->isa, impl->ante))
3559         bitmap_set_bit (target->isa, impl->cons);
3560       impl++;
3561     }
3562
3563   if (!arm_selected_tune)
3564     arm_selected_tune = arm_selected_cpu;
3565   else /* Validate the features passed to -mtune.  */
3566     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3567
3568   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3569
3570   /* Finish initializing the target structure.  */
3571   if (!target->arch_name)
3572     target->arch_name = arm_selected_arch->common.name;
3573   target->arch_pp_name = arm_selected_arch->arch;
3574   target->base_arch = arm_selected_arch->base_arch;
3575   target->profile = arm_selected_arch->profile;
3576
3577   target->tune_flags = tune_data->tune_flags;
3578   target->tune = tune_data->tune;
3579   target->tune_core = tune_data->scheduler;
3580 }
3581
3582 /* Fix up any incompatible options that the user has specified.  */
3583 static void
3584 arm_option_override (void)
3585 {
3586   static const enum isa_feature fpu_bitlist_internal[]
3587     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3588   /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main.  */
3589   static const enum isa_feature fp_bitlist[]
3590     = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3591   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3592   cl_target_option opts;
3593
3594   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3595   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3596
3597   isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3598   isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3599   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3600   arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3601
3602   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3603
3604   if (!OPTION_SET_P (arm_fpu_index))
3605     {
3606       bool ok;
3607       int fpu_index;
3608
3609       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3610                                   CL_TARGET);
3611       gcc_assert (ok);
3612       arm_fpu_index = (enum fpu_type) fpu_index;
3613     }
3614
3615   cl_target_option_save (&opts, &global_options, &global_options_set);
3616   arm_configure_build_target (&arm_active_target, &opts, true);
3617
3618 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3619   SUBTARGET_OVERRIDE_OPTIONS;
3620 #endif
3621
3622   /* Initialize boolean versions of the architectural flags, for use
3623      in the arm.md file and for enabling feature flags.  */
3624   arm_option_reconfigure_globals ();
3625
3626   arm_tune = arm_active_target.tune_core;
3627   tune_flags = arm_active_target.tune_flags;
3628   current_tune = arm_active_target.tune;
3629
3630   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3631   if (TARGET_APCS_FRAME)
3632     flag_shrink_wrap = false;
3633
3634   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3635     {
3636       warning (0, "%<-mapcs-stack-check%> incompatible with "
3637                "%<-mno-apcs-frame%>");
3638       target_flags |= MASK_APCS_FRAME;
3639     }
3640
3641   if (TARGET_POKE_FUNCTION_NAME)
3642     target_flags |= MASK_APCS_FRAME;
3643
3644   if (TARGET_APCS_REENT && flag_pic)
3645     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3646
3647   if (TARGET_APCS_REENT)
3648     warning (0, "APCS reentrant code not supported.  Ignored");
3649
3650   /* Set up some tuning parameters.  */
3651   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3652   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3653   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3654   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3655   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3656   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3657
3658   /* For arm2/3 there is no need to do any scheduling if we are doing
3659      software floating-point.  */
3660   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3661     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3662
3663   /* Override the default structure alignment for AAPCS ABI.  */
3664   if (!OPTION_SET_P (arm_structure_size_boundary))
3665     {
3666       if (TARGET_AAPCS_BASED)
3667         arm_structure_size_boundary = 8;
3668     }
3669   else
3670     {
3671       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3672
3673       if (arm_structure_size_boundary != 8
3674           && arm_structure_size_boundary != 32
3675           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3676         {
3677           if (ARM_DOUBLEWORD_ALIGN)
3678             warning (0,
3679                      "structure size boundary can only be set to 8, 32 or 64");
3680           else
3681             warning (0, "structure size boundary can only be set to 8 or 32");
3682           arm_structure_size_boundary
3683             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3684         }
3685     }
3686
3687   if (TARGET_VXWORKS_RTP)
3688     {
3689       if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3690         arm_pic_data_is_text_relative = 0;
3691     }
3692   else if (flag_pic
3693            && !arm_pic_data_is_text_relative
3694            && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3695     /* When text & data segments don't have a fixed displacement, the
3696        intended use is with a single, read only, pic base register.
3697        Unless the user explicitly requested not to do that, set
3698        it.  */
3699     target_flags |= MASK_SINGLE_PIC_BASE;
3700
3701   /* If stack checking is disabled, we can use r10 as the PIC register,
3702      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3703   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3704     {
3705       if (TARGET_VXWORKS_RTP)
3706         warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3707       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3708     }
3709
3710   if (flag_pic && TARGET_VXWORKS_RTP)
3711     arm_pic_register = 9;
3712
3713   /* If in FDPIC mode then force arm_pic_register to be r9.  */
3714   if (TARGET_FDPIC)
3715     {
3716       arm_pic_register = FDPIC_REGNUM;
3717       if (TARGET_THUMB1)
3718         sorry ("FDPIC mode is not supported in Thumb-1 mode");
3719     }
3720
3721   if (arm_pic_register_string != NULL)
3722     {
3723       int pic_register = decode_reg_name (arm_pic_register_string);
3724
3725       if (!flag_pic)
3726         warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3727
3728       /* Prevent the user from choosing an obviously stupid PIC register.  */
3729       else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3730                || pic_register == HARD_FRAME_POINTER_REGNUM
3731                || pic_register == STACK_POINTER_REGNUM
3732                || pic_register >= PC_REGNUM
3733                || (TARGET_VXWORKS_RTP
3734                    && (unsigned int) pic_register != arm_pic_register))
3735         error ("unable to use %qs for PIC register", arm_pic_register_string);
3736       else
3737         arm_pic_register = pic_register;
3738     }
3739
3740   if (flag_pic)
3741     target_word_relocations = 1;
3742
3743   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3744   if (fix_cm3_ldrd == 2)
3745     {
3746       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3747         fix_cm3_ldrd = 1;
3748       else
3749         fix_cm3_ldrd = 0;
3750     }
3751
3752   /* Enable fix_vlldm by default if required.  */
3753   if (fix_vlldm == 2)
3754     {
3755       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3756         fix_vlldm = 1;
3757       else
3758         fix_vlldm = 0;
3759     }
3760
3761   /* Enable fix_aes by default if required.  */
3762   if (fix_aes_erratum_1742098 == 2)
3763     {
3764       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3765         fix_aes_erratum_1742098 = 1;
3766       else
3767         fix_aes_erratum_1742098 = 0;
3768     }
3769
3770   /* Hot/Cold partitioning is not currently supported, since we can't
3771      handle literal pool placement in that case.  */
3772   if (flag_reorder_blocks_and_partition)
3773     {
3774       inform (input_location,
3775               "%<-freorder-blocks-and-partition%> not supported "
3776               "on this architecture");
3777       flag_reorder_blocks_and_partition = 0;
3778       flag_reorder_blocks = 1;
3779     }
3780
3781   if (flag_pic)
3782     /* Hoisting PIC address calculations more aggressively provides a small,
3783        but measurable, size reduction for PIC code.  Therefore, we decrease
3784        the bar for unrestricted expression hoisting to the cost of PIC address
3785        calculation, which is 2 instructions.  */
3786     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3787                          param_gcse_unrestricted_cost, 2);
3788
3789   /* ARM EABI defaults to strict volatile bitfields.  */
3790   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3791       && abi_version_at_least(2))
3792     flag_strict_volatile_bitfields = 1;
3793
3794   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3795      have deemed it beneficial (signified by setting
3796      prefetch.num_slots to 1 or more).  */
3797   if (flag_prefetch_loop_arrays < 0
3798       && HAVE_prefetch
3799       && optimize >= 3
3800       && current_tune->prefetch.num_slots > 0)
3801     flag_prefetch_loop_arrays = 1;
3802
3803   /* Set up parameters to be used in prefetching algorithm.  Do not
3804      override the defaults unless we are tuning for a core we have
3805      researched values for.  */
3806   if (current_tune->prefetch.num_slots > 0)
3807     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3808                          param_simultaneous_prefetches,
3809                          current_tune->prefetch.num_slots);
3810   if (current_tune->prefetch.l1_cache_line_size >= 0)
3811     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3812                          param_l1_cache_line_size,
3813                          current_tune->prefetch.l1_cache_line_size);
3814   if (current_tune->prefetch.l1_cache_line_size >= 0)
3815     {
3816       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3817                            param_destruct_interfere_size,
3818                            current_tune->prefetch.l1_cache_line_size);
3819       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3820                            param_construct_interfere_size,
3821                            current_tune->prefetch.l1_cache_line_size);
3822     }
3823   else
3824     {
3825       /* For a generic ARM target, JF Bastien proposed using 64 for both.  */
3826       /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3827          constructive?  */
3828       /* More recent Cortex chips have a 64-byte cache line, but are marked
3829          ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults.  */
3830       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3831                            param_destruct_interfere_size, 64);
3832       SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3833                            param_construct_interfere_size, 64);
3834     }
3835
3836   if (current_tune->prefetch.l1_cache_size >= 0)
3837     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3838                          param_l1_cache_size,
3839                          current_tune->prefetch.l1_cache_size);
3840
3841   /* Look through ready list and all of queue for instructions
3842      relevant for L2 auto-prefetcher.  */
3843   int sched_autopref_queue_depth;
3844
3845   switch (current_tune->sched_autopref)
3846     {
3847     case tune_params::SCHED_AUTOPREF_OFF:
3848       sched_autopref_queue_depth = -1;
3849       break;
3850
3851     case tune_params::SCHED_AUTOPREF_RANK:
3852       sched_autopref_queue_depth = 0;
3853       break;
3854
3855     case tune_params::SCHED_AUTOPREF_FULL:
3856       sched_autopref_queue_depth = max_insn_queue_index + 1;
3857       break;
3858
3859     default:
3860       gcc_unreachable ();
3861     }
3862
3863   SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3864                        param_sched_autopref_queue_depth,
3865                        sched_autopref_queue_depth);
3866
3867   /* Currently, for slow flash data, we just disable literal pools.  We also
3868      disable it for pure-code.  */
3869   if (target_slow_flash_data || target_pure_code)
3870     arm_disable_literal_pool = true;
3871
3872   /* Disable scheduling fusion by default if it's not armv7 processor
3873      or doesn't prefer ldrd/strd.  */
3874   if (flag_schedule_fusion == 2
3875       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3876     flag_schedule_fusion = 0;
3877
3878   /* Need to remember initial options before they are overriden.  */
3879   init_optimize = build_optimization_node (&global_options,
3880                                            &global_options_set);
3881
3882   arm_options_perform_arch_sanity_checks ();
3883   arm_option_override_internal (&global_options, &global_options_set);
3884   arm_option_check_internal (&global_options);
3885   arm_option_params_internal ();
3886
3887   /* Create the default target_options structure.  */
3888   target_option_default_node = target_option_current_node
3889     = build_target_option_node (&global_options, &global_options_set);
3890
3891   /* Register global variables with the garbage collector.  */
3892   arm_add_gc_roots ();
3893
3894   /* Init initial mode for testing.  */
3895   thumb_flipper = TARGET_THUMB;
3896 }
3897
3898
3899 /* Reconfigure global status flags from the active_target.isa.  */
3900 void
3901 arm_option_reconfigure_globals (void)
3902 {
3903   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3904   arm_base_arch = arm_active_target.base_arch;
3905
3906   /* Initialize boolean versions of the architectural flags, for use
3907      in the arm.md file.  */
3908   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3909   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3910   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3911   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3912   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3913   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3914   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3915   arm_arch6m = arm_arch6 && !arm_arch_notm;
3916   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3917   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3918   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3919   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3920   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3921   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3922   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3923   arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3924                                     isa_bit_armv8_1m_main);
3925   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3926   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3927   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3928   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3929   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3930   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3931   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3932   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3933   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3934   arm_arch8m_main = arm_arch7 && arm_arch_cmse;
3935   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3936   arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3937   arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3938
3939   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3940   if (arm_fp16_inst)
3941     {
3942       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3943         error ("selected fp16 options are incompatible");
3944       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3945     }
3946
3947   arm_arch_cde = 0;
3948   arm_arch_cde_coproc = 0;
3949   int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3950                     isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3951                     isa_bit_cdecp6, isa_bit_cdecp7};
3952   for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3953     {
3954       int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3955       if (cde_bit)
3956         {
3957           arm_arch_cde |= cde_bit;
3958           arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3959         }
3960     }
3961
3962   /* And finally, set up some quirks.  */
3963   arm_arch_no_volatile_ce
3964     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3965   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3966                                             isa_bit_quirk_armv6kz);
3967
3968   /* Use the cp15 method if it is available.  */
3969   if (target_thread_pointer == TP_AUTO)
3970     {
3971       if (arm_arch6k && !TARGET_THUMB1)
3972         target_thread_pointer = TP_TPIDRURO;
3973       else
3974         target_thread_pointer = TP_SOFT;
3975     }
3976
3977   if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3978     error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3979 }
3980
3981 /* Perform some validation between the desired architecture and the rest of the
3982    options.  */
3983 void
3984 arm_options_perform_arch_sanity_checks (void)
3985 {
3986   /* V5T code we generate is completely interworking capable, so we turn off
3987      TARGET_INTERWORK here to avoid many tests later on.  */
3988
3989   /* XXX However, we must pass the right pre-processor defines to CPP
3990      or GLD can get confused.  This is a hack.  */
3991   if (TARGET_INTERWORK)
3992     arm_cpp_interwork = 1;
3993
3994   if (arm_arch5t)
3995     target_flags &= ~MASK_INTERWORK;
3996
3997   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3998     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3999
4000   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
4001     error ("iwmmxt abi requires an iwmmxt capable cpu");
4002
4003   /* BPABI targets use linker tricks to allow interworking on cores
4004      without thumb support.  */
4005   if (TARGET_INTERWORK
4006       && !TARGET_BPABI
4007       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
4008     {
4009       warning (0, "target CPU does not support interworking" );
4010       target_flags &= ~MASK_INTERWORK;
4011     }
4012
4013   /* If soft-float is specified then don't use FPU.  */
4014   if (TARGET_SOFT_FLOAT)
4015     arm_fpu_attr = FPU_NONE;
4016   else
4017     arm_fpu_attr = FPU_VFP;
4018
4019   if (TARGET_AAPCS_BASED)
4020     {
4021       if (TARGET_CALLER_INTERWORKING)
4022         error ("AAPCS does not support %<-mcaller-super-interworking%>");
4023       else
4024         if (TARGET_CALLEE_INTERWORKING)
4025           error ("AAPCS does not support %<-mcallee-super-interworking%>");
4026     }
4027
4028   /* __fp16 support currently assumes the core has ldrh.  */
4029   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
4030     sorry ("%<__fp16%> and no ldrh");
4031
4032   if (use_cmse && !arm_arch_cmse)
4033     error ("target CPU does not support ARMv8-M Security Extensions");
4034
4035   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
4036      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
4037   if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
4038     error ("ARMv8-M Security Extensions incompatible with selected FPU");
4039
4040
4041   if (TARGET_AAPCS_BASED)
4042     {
4043       if (arm_abi == ARM_ABI_IWMMXT)
4044         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
4045       else if (TARGET_HARD_FLOAT_ABI)
4046         {
4047           arm_pcs_default = ARM_PCS_AAPCS_VFP;
4048           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
4049               && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
4050             error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
4051         }
4052       else
4053         arm_pcs_default = ARM_PCS_AAPCS;
4054     }
4055   else
4056     {
4057       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
4058         sorry ("%<-mfloat-abi=hard%> and VFP");
4059
4060       if (arm_abi == ARM_ABI_APCS)
4061         arm_pcs_default = ARM_PCS_APCS;
4062       else
4063         arm_pcs_default = ARM_PCS_ATPCS;
4064     }
4065 }
4066
4067 /* Test whether a local function descriptor is canonical, i.e.,
4068    whether we can use GOTOFFFUNCDESC to compute the address of the
4069    function.  */
4070 static bool
4071 arm_fdpic_local_funcdesc_p (rtx fnx)
4072 {
4073   tree fn;
4074   enum symbol_visibility vis;
4075   bool ret;
4076
4077   if (!TARGET_FDPIC)
4078     return true;
4079
4080   if (! SYMBOL_REF_LOCAL_P (fnx))
4081     return false;
4082
4083   fn = SYMBOL_REF_DECL (fnx);
4084
4085   if (! fn)
4086     return false;
4087
4088   vis = DECL_VISIBILITY (fn);
4089
4090   if (vis == VISIBILITY_PROTECTED)
4091     /* Private function descriptors for protected functions are not
4092        canonical.  Temporarily change the visibility to global so that
4093        we can ensure uniqueness of funcdesc pointers.  */
4094     DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4095
4096   ret = default_binds_local_p_1 (fn, flag_pic);
4097
4098   DECL_VISIBILITY (fn) = vis;
4099
4100   return ret;
4101 }
4102
4103 static void
4104 arm_add_gc_roots (void)
4105 {
4106   gcc_obstack_init(&minipool_obstack);
4107   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4108 }
4109 \f
4110 /* A table of known ARM exception types.
4111    For use with the interrupt function attribute.  */
4112
4113 typedef struct
4114 {
4115   const char *const arg;
4116   const unsigned long return_value;
4117 }
4118 isr_attribute_arg;
4119
4120 static const isr_attribute_arg isr_attribute_args [] =
4121 {
4122   { "IRQ",   ARM_FT_ISR },
4123   { "irq",   ARM_FT_ISR },
4124   { "FIQ",   ARM_FT_FIQ },
4125   { "fiq",   ARM_FT_FIQ },
4126   { "ABORT", ARM_FT_ISR },
4127   { "abort", ARM_FT_ISR },
4128   { "UNDEF", ARM_FT_EXCEPTION },
4129   { "undef", ARM_FT_EXCEPTION },
4130   { "SWI",   ARM_FT_EXCEPTION },
4131   { "swi",   ARM_FT_EXCEPTION },
4132   { NULL,    ARM_FT_NORMAL }
4133 };
4134
4135 /* Returns the (interrupt) function type of the current
4136    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
4137
4138 static unsigned long
4139 arm_isr_value (tree argument)
4140 {
4141   const isr_attribute_arg * ptr;
4142   const char *              arg;
4143
4144   if (!arm_arch_notm)
4145     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4146
4147   /* No argument - default to IRQ.  */
4148   if (argument == NULL_TREE)
4149     return ARM_FT_ISR;
4150
4151   /* Get the value of the argument.  */
4152   if (TREE_VALUE (argument) == NULL_TREE
4153       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4154     return ARM_FT_UNKNOWN;
4155
4156   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4157
4158   /* Check it against the list of known arguments.  */
4159   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4160     if (streq (arg, ptr->arg))
4161       return ptr->return_value;
4162
4163   /* An unrecognized interrupt type.  */
4164   return ARM_FT_UNKNOWN;
4165 }
4166
4167 /* Computes the type of the current function.  */
4168
4169 static unsigned long
4170 arm_compute_func_type (void)
4171 {
4172   unsigned long type = ARM_FT_UNKNOWN;
4173   tree a;
4174   tree attr;
4175
4176   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4177
4178   /* Decide if the current function is volatile.  Such functions
4179      never return, and many memory cycles can be saved by not storing
4180      register values that will never be needed again.  This optimization
4181      was added to speed up context switching in a kernel application.  */
4182   if (optimize > 0
4183       && (TREE_NOTHROW (current_function_decl)
4184           || !(flag_unwind_tables
4185                || (flag_exceptions
4186                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4187       && TREE_THIS_VOLATILE (current_function_decl))
4188     type |= ARM_FT_VOLATILE;
4189
4190   if (cfun->static_chain_decl != NULL)
4191     type |= ARM_FT_NESTED;
4192
4193   attr = DECL_ATTRIBUTES (current_function_decl);
4194
4195   a = lookup_attribute ("naked", attr);
4196   if (a != NULL_TREE)
4197     type |= ARM_FT_NAKED;
4198
4199   a = lookup_attribute ("isr", attr);
4200   if (a == NULL_TREE)
4201     a = lookup_attribute ("interrupt", attr);
4202
4203   if (a == NULL_TREE)
4204     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4205   else
4206     type |= arm_isr_value (TREE_VALUE (a));
4207
4208   if (lookup_attribute ("cmse_nonsecure_entry", attr))
4209     type |= ARM_FT_CMSE_ENTRY;
4210
4211   return type;
4212 }
4213
4214 /* Returns the type of the current function.  */
4215
4216 unsigned long
4217 arm_current_func_type (void)
4218 {
4219   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4220     cfun->machine->func_type = arm_compute_func_type ();
4221
4222   return cfun->machine->func_type;
4223 }
4224
4225 bool
4226 arm_allocate_stack_slots_for_args (void)
4227 {
4228   /* Naked functions should not allocate stack slots for arguments.  */
4229   return !IS_NAKED (arm_current_func_type ());
4230 }
4231
4232 static bool
4233 arm_warn_func_return (tree decl)
4234 {
4235   /* Naked functions are implemented entirely in assembly, including the
4236      return sequence, so suppress warnings about this.  */
4237   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4238 }
4239
4240 \f
4241 /* Output assembler code for a block containing the constant parts
4242    of a trampoline, leaving space for the variable parts.
4243
4244    On the ARM, (if r8 is the static chain regnum, and remembering that
4245    referencing pc adds an offset of 8) the trampoline looks like:
4246            ldr          r8, [pc, #0]
4247            ldr          pc, [pc]
4248            .word        static chain value
4249            .word        function's address
4250    XXX FIXME: When the trampoline returns, r8 will be clobbered.
4251
4252    In FDPIC mode, the trampoline looks like:
4253            .word        trampoline address
4254            .word        trampoline GOT address
4255            ldr          r12, [pc, #8] ; #4 for Arm mode
4256            ldr          r9,  [pc, #8] ; #4 for Arm mode
4257            ldr          pc,  [pc, #8] ; #4 for Arm mode
4258            .word        static chain value
4259            .word        GOT address
4260            .word        function's address
4261 */
4262
4263 static void
4264 arm_asm_trampoline_template (FILE *f)
4265 {
4266   fprintf (f, "\t.syntax unified\n");
4267
4268   if (TARGET_FDPIC)
4269     {
4270       /* The first two words are a function descriptor pointing to the
4271          trampoline code just below.  */
4272       if (TARGET_ARM)
4273         fprintf (f, "\t.arm\n");
4274       else if (TARGET_THUMB2)
4275         fprintf (f, "\t.thumb\n");
4276       else
4277         /* Only ARM and Thumb-2 are supported.  */
4278         gcc_unreachable ();
4279
4280       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4281       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4282       /* Trampoline code which sets the static chain register but also
4283          PIC register before jumping into real code.  */
4284       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4285                    STATIC_CHAIN_REGNUM, PC_REGNUM,
4286                    TARGET_THUMB2 ? 8 : 4);
4287       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4288                    PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4289                    TARGET_THUMB2 ? 8 : 4);
4290       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4291                    PC_REGNUM, PC_REGNUM,
4292                    TARGET_THUMB2 ? 8 : 4);
4293       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4294     }
4295   else if (TARGET_ARM)
4296     {
4297       fprintf (f, "\t.arm\n");
4298       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4299       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4300     }
4301   else if (TARGET_THUMB2)
4302     {
4303       fprintf (f, "\t.thumb\n");
4304       /* The Thumb-2 trampoline is similar to the arm implementation.
4305          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
4306       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4307                    STATIC_CHAIN_REGNUM, PC_REGNUM);
4308       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4309     }
4310   else
4311     {
4312       ASM_OUTPUT_ALIGN (f, 2);
4313       fprintf (f, "\t.code\t16\n");
4314       fprintf (f, ".Ltrampoline_start:\n");
4315       asm_fprintf (f, "\tpush\t{r0, r1}\n");
4316       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4317       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4318       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4319       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4320       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4321     }
4322   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4323   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4324 }
4325
4326 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
4327
4328 static void
4329 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4330 {
4331   rtx fnaddr, mem, a_tramp;
4332
4333   emit_block_move (m_tramp, assemble_trampoline_template (),
4334                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4335
4336   if (TARGET_FDPIC)
4337     {
4338       rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4339       rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4340       rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4341       /* The function start address is at offset 8, but in Thumb mode
4342          we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4343          below.  */
4344       rtx trampoline_code_start
4345         = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4346
4347       /* Write initial funcdesc which points to the trampoline.  */
4348       mem = adjust_address (m_tramp, SImode, 0);
4349       emit_move_insn (mem, trampoline_code_start);
4350       mem = adjust_address (m_tramp, SImode, 4);
4351       emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4352       /* Setup static chain.  */
4353       mem = adjust_address (m_tramp, SImode, 20);
4354       emit_move_insn (mem, chain_value);
4355       /* GOT + real function entry point.  */
4356       mem = adjust_address (m_tramp, SImode, 24);
4357       emit_move_insn (mem, gotaddr);
4358       mem = adjust_address (m_tramp, SImode, 28);
4359       emit_move_insn (mem, fnaddr);
4360     }
4361   else
4362     {
4363       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4364       emit_move_insn (mem, chain_value);
4365
4366       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4367       fnaddr = XEXP (DECL_RTL (fndecl), 0);
4368       emit_move_insn (mem, fnaddr);
4369     }
4370
4371   a_tramp = XEXP (m_tramp, 0);
4372   maybe_emit_call_builtin___clear_cache (a_tramp,
4373                                          plus_constant (ptr_mode,
4374                                                         a_tramp,
4375                                                         TRAMPOLINE_SIZE));
4376 }
4377
4378 /* Thumb trampolines should be entered in thumb mode, so set
4379    the bottom bit of the address.  */
4380
4381 static rtx
4382 arm_trampoline_adjust_address (rtx addr)
4383 {
4384   /* For FDPIC don't fix trampoline address since it's a function
4385      descriptor and not a function address.  */
4386   if (TARGET_THUMB && !TARGET_FDPIC)
4387     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4388                                 NULL, 0, OPTAB_LIB_WIDEN);
4389   return addr;
4390 }
4391 \f
4392 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4393    includes call-clobbered registers too.  If this is a leaf function
4394    we can just examine the registers used by the RTL, but otherwise we
4395    have to assume that whatever function is called might clobber
4396    anything, and so we have to save all the call-clobbered registers
4397    as well.  */
4398 static inline bool reg_needs_saving_p (unsigned reg)
4399 {
4400   unsigned long func_type = arm_current_func_type ();
4401
4402   if (IS_INTERRUPT (func_type))
4403     if (df_regs_ever_live_p (reg)
4404         /* Save call-clobbered core registers.  */
4405         || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4406       return true;
4407     else
4408       return false;
4409   else
4410     if (!df_regs_ever_live_p (reg)
4411         || call_used_or_fixed_reg_p (reg))
4412       return false;
4413     else
4414       return true;
4415 }
4416
4417 /* Return 1 if it is possible to return using a single instruction.
4418    If SIBLING is non-null, this is a test for a return before a sibling
4419    call.  SIBLING is the call insn, so we can examine its register usage.  */
4420
4421 int
4422 use_return_insn (int iscond, rtx sibling)
4423 {
4424   int regno;
4425   unsigned int func_type;
4426   unsigned long saved_int_regs;
4427   unsigned HOST_WIDE_INT stack_adjust;
4428   arm_stack_offsets *offsets;
4429
4430   /* Never use a return instruction before reload has run.  */
4431   if (!reload_completed)
4432     return 0;
4433
4434   /* Never use a return instruction when return address signing
4435      mechanism is enabled as it requires more than one
4436      instruction.  */
4437   if (arm_current_function_pac_enabled_p ())
4438     return 0;
4439
4440   func_type = arm_current_func_type ();
4441
4442   /* Naked, volatile and stack alignment functions need special
4443      consideration.  */
4444   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4445     return 0;
4446
4447   /* So do interrupt functions that use the frame pointer and Thumb
4448      interrupt functions.  */
4449   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4450     return 0;
4451
4452   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4453       && !optimize_function_for_size_p (cfun))
4454     return 0;
4455
4456   offsets = arm_get_frame_offsets ();
4457   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4458
4459   /* As do variadic functions.  */
4460   if (crtl->args.pretend_args_size
4461       || cfun->machine->uses_anonymous_args
4462       /* Or if the function calls __builtin_eh_return () */
4463       || crtl->calls_eh_return
4464       /* Or if the function calls alloca */
4465       || cfun->calls_alloca
4466       /* Or if there is a stack adjustment.  However, if the stack pointer
4467          is saved on the stack, we can use a pre-incrementing stack load.  */
4468       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4469                                  && stack_adjust == 4))
4470       /* Or if the static chain register was saved above the frame, under the
4471          assumption that the stack pointer isn't saved on the stack.  */
4472       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4473           && arm_compute_static_chain_stack_bytes() != 0))
4474     return 0;
4475
4476   saved_int_regs = offsets->saved_regs_mask;
4477
4478   /* Unfortunately, the insn
4479
4480        ldmib sp, {..., sp, ...}
4481
4482      triggers a bug on most SA-110 based devices, such that the stack
4483      pointer won't be correctly restored if the instruction takes a
4484      page fault.  We work around this problem by popping r3 along with
4485      the other registers, since that is never slower than executing
4486      another instruction.
4487
4488      We test for !arm_arch5t here, because code for any architecture
4489      less than this could potentially be run on one of the buggy
4490      chips.  */
4491   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4492     {
4493       /* Validate that r3 is a call-clobbered register (always true in
4494          the default abi) ...  */
4495       if (!call_used_or_fixed_reg_p (3))
4496         return 0;
4497
4498       /* ... that it isn't being used for a return value ... */
4499       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4500         return 0;
4501
4502       /* ... or for a tail-call argument ...  */
4503       if (sibling)
4504         {
4505           gcc_assert (CALL_P (sibling));
4506
4507           if (find_regno_fusage (sibling, USE, 3))
4508             return 0;
4509         }
4510
4511       /* ... and that there are no call-saved registers in r0-r2
4512          (always true in the default ABI).  */
4513       if (saved_int_regs & 0x7)
4514         return 0;
4515     }
4516
4517   /* Can't be done if interworking with Thumb, and any registers have been
4518      stacked.  */
4519   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4520     return 0;
4521
4522   /* On StrongARM, conditional returns are expensive if they aren't
4523      taken and multiple registers have been stacked.  */
4524   if (iscond && arm_tune_strongarm)
4525     {
4526       /* Conditional return when just the LR is stored is a simple
4527          conditional-load instruction, that's not expensive.  */
4528       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4529         return 0;
4530
4531       if (flag_pic
4532           && arm_pic_register != INVALID_REGNUM
4533           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4534         return 0;
4535     }
4536
4537   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4538      several instructions if anything needs to be popped.  Armv8.1-M Mainline
4539      also needs several instructions to save and restore FP context.  */
4540   if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4541     return 0;
4542
4543   /* If there are saved registers but the LR isn't saved, then we need
4544      two instructions for the return.  */
4545   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4546     return 0;
4547
4548   /* Can't be done if any of the VFP regs are pushed,
4549      since this also requires an insn.  */
4550   if (TARGET_VFP_BASE)
4551     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4552       if (reg_needs_saving_p (regno))
4553         return 0;
4554
4555   if (TARGET_REALLY_IWMMXT)
4556     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4557       if (reg_needs_saving_p (regno))
4558         return 0;
4559
4560   return 1;
4561 }
4562
4563 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4564    shrink-wrapping if possible.  This is the case if we need to emit a
4565    prologue, which we can test by looking at the offsets.  */
4566 bool
4567 use_simple_return_p (void)
4568 {
4569   arm_stack_offsets *offsets;
4570
4571   /* Note this function can be called before or after reload.  */
4572   if (!reload_completed)
4573     arm_compute_frame_layout ();
4574
4575   offsets = arm_get_frame_offsets ();
4576   return offsets->outgoing_args != 0;
4577 }
4578
4579 /* Return TRUE if int I is a valid immediate ARM constant.  */
4580
4581 int
4582 const_ok_for_arm (HOST_WIDE_INT i)
4583 {
4584   int lowbit;
4585
4586   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4587      be all zero, or all one.  */
4588   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4589       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4590           != ((~(unsigned HOST_WIDE_INT) 0)
4591               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4592     return FALSE;
4593
4594   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4595
4596   /* Fast return for 0 and small values.  We must do this for zero, since
4597      the code below can't handle that one case.  */
4598   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4599     return TRUE;
4600
4601   /* Get the number of trailing zeros.  */
4602   lowbit = ffs((int) i) - 1;
4603
4604   /* Only even shifts are allowed in ARM mode so round down to the
4605      nearest even number.  */
4606   if (TARGET_ARM)
4607     lowbit &= ~1;
4608
4609   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4610     return TRUE;
4611
4612   if (TARGET_ARM)
4613     {
4614       /* Allow rotated constants in ARM mode.  */
4615       if (lowbit <= 4
4616            && ((i & ~0xc000003f) == 0
4617                || (i & ~0xf000000f) == 0
4618                || (i & ~0xfc000003) == 0))
4619         return TRUE;
4620     }
4621   else if (TARGET_THUMB2)
4622     {
4623       HOST_WIDE_INT v;
4624
4625       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4626       v = i & 0xff;
4627       v |= v << 16;
4628       if (i == v || i == (v | (v << 8)))
4629         return TRUE;
4630
4631       /* Allow repeated pattern 0xXY00XY00.  */
4632       v = i & 0xff00;
4633       v |= v << 16;
4634       if (i == v)
4635         return TRUE;
4636     }
4637   else if (TARGET_HAVE_MOVT)
4638     {
4639       /* Thumb-1 Targets with MOVT.  */
4640       if (i > 0xffff)
4641         return FALSE;
4642       else
4643         return TRUE;
4644     }
4645
4646   return FALSE;
4647 }
4648
4649 /* Return true if I is a valid constant for the operation CODE.  */
4650 int
4651 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4652 {
4653   if (const_ok_for_arm (i))
4654     return 1;
4655
4656   switch (code)
4657     {
4658     case SET:
4659       /* See if we can use movw.  */
4660       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4661         return 1;
4662       else
4663         /* Otherwise, try mvn.  */
4664         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4665
4666     case PLUS:
4667       /* See if we can use addw or subw.  */
4668       if (TARGET_THUMB2
4669           && ((i & 0xfffff000) == 0
4670               || ((-i) & 0xfffff000) == 0))
4671         return 1;
4672       /* Fall through.  */
4673     case COMPARE:
4674     case EQ:
4675     case NE:
4676     case GT:
4677     case LE:
4678     case LT:
4679     case GE:
4680     case GEU:
4681     case LTU:
4682     case GTU:
4683     case LEU:
4684     case UNORDERED:
4685     case ORDERED:
4686     case UNEQ:
4687     case UNGE:
4688     case UNLT:
4689     case UNGT:
4690     case UNLE:
4691       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4692
4693     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
4694     case XOR:
4695       return 0;
4696
4697     case IOR:
4698       if (TARGET_THUMB2)
4699         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4700       return 0;
4701
4702     case AND:
4703       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4704
4705     default:
4706       gcc_unreachable ();
4707     }
4708 }
4709
4710 /* Return true if I is a valid di mode constant for the operation CODE.  */
4711 int
4712 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4713 {
4714   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4715   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4716   rtx hi = GEN_INT (hi_val);
4717   rtx lo = GEN_INT (lo_val);
4718
4719   if (TARGET_THUMB1)
4720     return 0;
4721
4722   switch (code)
4723     {
4724     case AND:
4725     case IOR:
4726     case XOR:
4727       return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4728              || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4729     case PLUS:
4730       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4731
4732     default:
4733       return 0;
4734     }
4735 }
4736
4737 /* Emit a sequence of insns to handle a large constant.
4738    CODE is the code of the operation required, it can be any of SET, PLUS,
4739    IOR, AND, XOR, MINUS;
4740    MODE is the mode in which the operation is being performed;
4741    VAL is the integer to operate on;
4742    SOURCE is the other operand (a register, or a null-pointer for SET);
4743    SUBTARGETS means it is safe to create scratch registers if that will
4744    either produce a simpler sequence, or we will want to cse the values.
4745    Return value is the number of insns emitted.  */
4746
4747 /* ??? Tweak this for thumb2.  */
4748 int
4749 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4750                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4751 {
4752   rtx cond;
4753
4754   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4755     cond = COND_EXEC_TEST (PATTERN (insn));
4756   else
4757     cond = NULL_RTX;
4758
4759   if (subtargets || code == SET
4760       || (REG_P (target) && REG_P (source)
4761           && REGNO (target) != REGNO (source)))
4762     {
4763       /* After arm_reorg has been called, we can't fix up expensive
4764          constants by pushing them into memory so we must synthesize
4765          them in-line, regardless of the cost.  This is only likely to
4766          be more costly on chips that have load delay slots and we are
4767          compiling without running the scheduler (so no splitting
4768          occurred before the final instruction emission).
4769
4770          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4771       */
4772       if (!cfun->machine->after_arm_reorg
4773           && !cond
4774           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4775                                 1, 0)
4776               > (arm_constant_limit (optimize_function_for_size_p (cfun))
4777                  + (code != SET))))
4778         {
4779           if (code == SET)
4780             {
4781               /* Currently SET is the only monadic value for CODE, all
4782                  the rest are diadic.  */
4783               if (TARGET_USE_MOVT)
4784                 arm_emit_movpair (target, GEN_INT (val));
4785               else
4786                 emit_set_insn (target, GEN_INT (val));
4787
4788               return 1;
4789             }
4790           else
4791             {
4792               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4793
4794               if (TARGET_USE_MOVT)
4795                 arm_emit_movpair (temp, GEN_INT (val));
4796               else
4797                 emit_set_insn (temp, GEN_INT (val));
4798
4799               /* For MINUS, the value is subtracted from, since we never
4800                  have subtraction of a constant.  */
4801               if (code == MINUS)
4802                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4803               else
4804                 emit_set_insn (target,
4805                                gen_rtx_fmt_ee (code, mode, source, temp));
4806               return 2;
4807             }
4808         }
4809     }
4810
4811   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4812                            1);
4813 }
4814
4815 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4816    ARM/THUMB2 immediates, and add up to VAL.
4817    Thr function return value gives the number of insns required.  */
4818 static int
4819 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4820                             struct four_ints *return_sequence)
4821 {
4822   int best_consecutive_zeros = 0;
4823   int i;
4824   int best_start = 0;
4825   int insns1, insns2;
4826   struct four_ints tmp_sequence;
4827
4828   /* If we aren't targeting ARM, the best place to start is always at
4829      the bottom, otherwise look more closely.  */
4830   if (TARGET_ARM)
4831     {
4832       for (i = 0; i < 32; i += 2)
4833         {
4834           int consecutive_zeros = 0;
4835
4836           if (!(val & (3 << i)))
4837             {
4838               while ((i < 32) && !(val & (3 << i)))
4839                 {
4840                   consecutive_zeros += 2;
4841                   i += 2;
4842                 }
4843               if (consecutive_zeros > best_consecutive_zeros)
4844                 {
4845                   best_consecutive_zeros = consecutive_zeros;
4846                   best_start = i - consecutive_zeros;
4847                 }
4848               i -= 2;
4849             }
4850         }
4851     }
4852
4853   /* So long as it won't require any more insns to do so, it's
4854      desirable to emit a small constant (in bits 0...9) in the last
4855      insn.  This way there is more chance that it can be combined with
4856      a later addressing insn to form a pre-indexed load or store
4857      operation.  Consider:
4858
4859            *((volatile int *)0xe0000100) = 1;
4860            *((volatile int *)0xe0000110) = 2;
4861
4862      We want this to wind up as:
4863
4864             mov rA, #0xe0000000
4865             mov rB, #1
4866             str rB, [rA, #0x100]
4867             mov rB, #2
4868             str rB, [rA, #0x110]
4869
4870      rather than having to synthesize both large constants from scratch.
4871
4872      Therefore, we calculate how many insns would be required to emit
4873      the constant starting from `best_start', and also starting from
4874      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4875      yield a shorter sequence, we may as well use zero.  */
4876   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4877   if (best_start != 0
4878       && ((HOST_WIDE_INT_1U << best_start) < val))
4879     {
4880       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4881       if (insns2 <= insns1)
4882         {
4883           *return_sequence = tmp_sequence;
4884           insns1 = insns2;
4885         }
4886     }
4887
4888   return insns1;
4889 }
4890
4891 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4892 static int
4893 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4894                              struct four_ints *return_sequence, int i)
4895 {
4896   int remainder = val & 0xffffffff;
4897   int insns = 0;
4898
4899   /* Try and find a way of doing the job in either two or three
4900      instructions.
4901
4902      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4903      location.  We start at position I.  This may be the MSB, or
4904      optimial_immediate_sequence may have positioned it at the largest block
4905      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4906      wrapping around to the top of the word when we drop off the bottom.
4907      In the worst case this code should produce no more than four insns.
4908
4909      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4910      constants, shifted to any arbitrary location.  We should always start
4911      at the MSB.  */
4912   do
4913     {
4914       int end;
4915       unsigned int b1, b2, b3, b4;
4916       unsigned HOST_WIDE_INT result;
4917       int loc;
4918
4919       gcc_assert (insns < 4);
4920
4921       if (i <= 0)
4922         i += 32;
4923
4924       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4925       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4926         {
4927           loc = i;
4928           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4929             /* We can use addw/subw for the last 12 bits.  */
4930             result = remainder;
4931           else
4932             {
4933               /* Use an 8-bit shifted/rotated immediate.  */
4934               end = i - 8;
4935               if (end < 0)
4936                 end += 32;
4937               result = remainder & ((0x0ff << end)
4938                                    | ((i < end) ? (0xff >> (32 - end))
4939                                                 : 0));
4940               i -= 8;
4941             }
4942         }
4943       else
4944         {
4945           /* Arm allows rotates by a multiple of two. Thumb-2 allows
4946              arbitrary shifts.  */
4947           i -= TARGET_ARM ? 2 : 1;
4948           continue;
4949         }
4950
4951       /* Next, see if we can do a better job with a thumb2 replicated
4952          constant.
4953
4954          We do it this way around to catch the cases like 0x01F001E0 where
4955          two 8-bit immediates would work, but a replicated constant would
4956          make it worse.
4957
4958          TODO: 16-bit constants that don't clear all the bits, but still win.
4959          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4960       if (TARGET_THUMB2)
4961         {
4962           b1 = (remainder & 0xff000000) >> 24;
4963           b2 = (remainder & 0x00ff0000) >> 16;
4964           b3 = (remainder & 0x0000ff00) >> 8;
4965           b4 = remainder & 0xff;
4966
4967           if (loc > 24)
4968             {
4969               /* The 8-bit immediate already found clears b1 (and maybe b2),
4970                  but must leave b3 and b4 alone.  */
4971
4972               /* First try to find a 32-bit replicated constant that clears
4973                  almost everything.  We can assume that we can't do it in one,
4974                  or else we wouldn't be here.  */
4975               unsigned int tmp = b1 & b2 & b3 & b4;
4976               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4977                                   + (tmp << 24);
4978               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4979                                             + (tmp == b3) + (tmp == b4);
4980               if (tmp
4981                   && (matching_bytes >= 3
4982                       || (matching_bytes == 2
4983                           && const_ok_for_op (remainder & ~tmp2, code))))
4984                 {
4985                   /* At least 3 of the bytes match, and the fourth has at
4986                      least as many bits set, or two of the bytes match
4987                      and it will only require one more insn to finish.  */
4988                   result = tmp2;
4989                   i = tmp != b1 ? 32
4990                       : tmp != b2 ? 24
4991                       : tmp != b3 ? 16
4992                       : 8;
4993                 }
4994
4995               /* Second, try to find a 16-bit replicated constant that can
4996                  leave three of the bytes clear.  If b2 or b4 is already
4997                  zero, then we can.  If the 8-bit from above would not
4998                  clear b2 anyway, then we still win.  */
4999               else if (b1 == b3 && (!b2 || !b4
5000                                || (remainder & 0x00ff0000 & ~result)))
5001                 {
5002                   result = remainder & 0xff00ff00;
5003                   i = 24;
5004                 }
5005             }
5006           else if (loc > 16)
5007             {
5008               /* The 8-bit immediate already found clears b2 (and maybe b3)
5009                  and we don't get here unless b1 is alredy clear, but it will
5010                  leave b4 unchanged.  */
5011
5012               /* If we can clear b2 and b4 at once, then we win, since the
5013                  8-bits couldn't possibly reach that far.  */
5014               if (b2 == b4)
5015                 {
5016                   result = remainder & 0x00ff00ff;
5017                   i = 16;
5018                 }
5019             }
5020         }
5021
5022       return_sequence->i[insns++] = result;
5023       remainder &= ~result;
5024
5025       if (code == SET || code == MINUS)
5026         code = PLUS;
5027     }
5028   while (remainder);
5029
5030   return insns;
5031 }
5032
5033 /* Emit an instruction with the indicated PATTERN.  If COND is
5034    non-NULL, conditionalize the execution of the instruction on COND
5035    being true.  */
5036
5037 static void
5038 emit_constant_insn (rtx cond, rtx pattern)
5039 {
5040   if (cond)
5041     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
5042   emit_insn (pattern);
5043 }
5044
5045 /* As above, but extra parameter GENERATE which, if clear, suppresses
5046    RTL generation.  */
5047
5048 static int
5049 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
5050                   unsigned HOST_WIDE_INT val, rtx target, rtx source,
5051                   int subtargets, int generate)
5052 {
5053   int can_invert = 0;
5054   int can_negate = 0;
5055   int final_invert = 0;
5056   int i;
5057   int set_sign_bit_copies = 0;
5058   int clear_sign_bit_copies = 0;
5059   int clear_zero_bit_copies = 0;
5060   int set_zero_bit_copies = 0;
5061   int insns = 0, neg_insns, inv_insns;
5062   unsigned HOST_WIDE_INT temp1, temp2;
5063   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
5064   struct four_ints *immediates;
5065   struct four_ints pos_immediates, neg_immediates, inv_immediates;
5066
5067   /* Find out which operations are safe for a given CODE.  Also do a quick
5068      check for degenerate cases; these can occur when DImode operations
5069      are split.  */
5070   switch (code)
5071     {
5072     case SET:
5073       can_invert = 1;
5074       break;
5075
5076     case PLUS:
5077       can_negate = 1;
5078       break;
5079
5080     case IOR:
5081       if (remainder == 0xffffffff)
5082         {
5083           if (generate)
5084             emit_constant_insn (cond,
5085                                 gen_rtx_SET (target,
5086                                              GEN_INT (ARM_SIGN_EXTEND (val))));
5087           return 1;
5088         }
5089
5090       if (remainder == 0)
5091         {
5092           if (reload_completed && rtx_equal_p (target, source))
5093             return 0;
5094
5095           if (generate)
5096             emit_constant_insn (cond, gen_rtx_SET (target, source));
5097           return 1;
5098         }
5099       break;
5100
5101     case AND:
5102       if (remainder == 0)
5103         {
5104           if (generate)
5105             emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5106           return 1;
5107         }
5108       if (remainder == 0xffffffff)
5109         {
5110           if (reload_completed && rtx_equal_p (target, source))
5111             return 0;
5112           if (generate)
5113             emit_constant_insn (cond, gen_rtx_SET (target, source));
5114           return 1;
5115         }
5116       can_invert = 1;
5117       break;
5118
5119     case XOR:
5120       if (remainder == 0)
5121         {
5122           if (reload_completed && rtx_equal_p (target, source))
5123             return 0;
5124           if (generate)
5125             emit_constant_insn (cond, gen_rtx_SET (target, source));
5126           return 1;
5127         }
5128
5129       if (remainder == 0xffffffff)
5130         {
5131           if (generate)
5132             emit_constant_insn (cond,
5133                                 gen_rtx_SET (target,
5134                                              gen_rtx_NOT (mode, source)));
5135           return 1;
5136         }
5137       final_invert = 1;
5138       break;
5139
5140     case MINUS:
5141       /* We treat MINUS as (val - source), since (source - val) is always
5142          passed as (source + (-val)).  */
5143       if (remainder == 0)
5144         {
5145           if (generate)
5146             emit_constant_insn (cond,
5147                                 gen_rtx_SET (target,
5148                                              gen_rtx_NEG (mode, source)));
5149           return 1;
5150         }
5151       if (const_ok_for_arm (val))
5152         {
5153           if (generate)
5154             emit_constant_insn (cond,
5155                                 gen_rtx_SET (target,
5156                                              gen_rtx_MINUS (mode, GEN_INT (val),
5157                                                             source)));
5158           return 1;
5159         }
5160
5161       break;
5162
5163     default:
5164       gcc_unreachable ();
5165     }
5166
5167   /* If we can do it in one insn get out quickly.  */
5168   if (const_ok_for_op (val, code))
5169     {
5170       if (generate)
5171         emit_constant_insn (cond,
5172                             gen_rtx_SET (target,
5173                                          (source
5174                                           ? gen_rtx_fmt_ee (code, mode, source,
5175                                                             GEN_INT (val))
5176                                           : GEN_INT (val))));
5177       return 1;
5178     }
5179
5180   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5181      insn.  */
5182   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5183       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5184     {
5185       if (generate)
5186         {
5187           if (mode == SImode && i == 16)
5188             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5189                smaller insn.  */
5190             emit_constant_insn (cond,
5191                                 gen_zero_extendhisi2
5192                                 (target, gen_lowpart (HImode, source)));
5193           else
5194             /* Extz only supports SImode, but we can coerce the operands
5195                into that mode.  */
5196             emit_constant_insn (cond,
5197                                 gen_extzv_t2 (gen_lowpart (SImode, target),
5198                                               gen_lowpart (SImode, source),
5199                                               GEN_INT (i), const0_rtx));
5200         }
5201
5202       return 1;
5203     }
5204
5205   /* Calculate a few attributes that may be useful for specific
5206      optimizations.  */
5207   /* Count number of leading zeros.  */
5208   for (i = 31; i >= 0; i--)
5209     {
5210       if ((remainder & (1 << i)) == 0)
5211         clear_sign_bit_copies++;
5212       else
5213         break;
5214     }
5215
5216   /* Count number of leading 1's.  */
5217   for (i = 31; i >= 0; i--)
5218     {
5219       if ((remainder & (1 << i)) != 0)
5220         set_sign_bit_copies++;
5221       else
5222         break;
5223     }
5224
5225   /* Count number of trailing zero's.  */
5226   for (i = 0; i <= 31; i++)
5227     {
5228       if ((remainder & (1 << i)) == 0)
5229         clear_zero_bit_copies++;
5230       else
5231         break;
5232     }
5233
5234   /* Count number of trailing 1's.  */
5235   for (i = 0; i <= 31; i++)
5236     {
5237       if ((remainder & (1 << i)) != 0)
5238         set_zero_bit_copies++;
5239       else
5240         break;
5241     }
5242
5243   switch (code)
5244     {
5245     case SET:
5246       /* See if we can do this by sign_extending a constant that is known
5247          to be negative.  This is a good, way of doing it, since the shift
5248          may well merge into a subsequent insn.  */
5249       if (set_sign_bit_copies > 1)
5250         {
5251           if (const_ok_for_arm
5252               (temp1 = ARM_SIGN_EXTEND (remainder
5253                                         << (set_sign_bit_copies - 1))))
5254             {
5255               if (generate)
5256                 {
5257                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5258                   emit_constant_insn (cond,
5259                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5260                   emit_constant_insn (cond,
5261                                       gen_ashrsi3 (target, new_src,
5262                                                    GEN_INT (set_sign_bit_copies - 1)));
5263                 }
5264               return 2;
5265             }
5266           /* For an inverted constant, we will need to set the low bits,
5267              these will be shifted out of harm's way.  */
5268           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5269           if (const_ok_for_arm (~temp1))
5270             {
5271               if (generate)
5272                 {
5273                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5274                   emit_constant_insn (cond,
5275                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5276                   emit_constant_insn (cond,
5277                                       gen_ashrsi3 (target, new_src,
5278                                                    GEN_INT (set_sign_bit_copies - 1)));
5279                 }
5280               return 2;
5281             }
5282         }
5283
5284       /* See if we can calculate the value as the difference between two
5285          valid immediates.  */
5286       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5287         {
5288           int topshift = clear_sign_bit_copies & ~1;
5289
5290           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5291                                    & (0xff000000 >> topshift));
5292
5293           /* If temp1 is zero, then that means the 9 most significant
5294              bits of remainder were 1 and we've caused it to overflow.
5295              When topshift is 0 we don't need to do anything since we
5296              can borrow from 'bit 32'.  */
5297           if (temp1 == 0 && topshift != 0)
5298             temp1 = 0x80000000 >> (topshift - 1);
5299
5300           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5301
5302           if (const_ok_for_arm (temp2))
5303             {
5304               if (generate)
5305                 {
5306                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5307                   emit_constant_insn (cond,
5308                                       gen_rtx_SET (new_src, GEN_INT (temp1)));
5309                   emit_constant_insn (cond,
5310                                       gen_addsi3 (target, new_src,
5311                                                   GEN_INT (-temp2)));
5312                 }
5313
5314               return 2;
5315             }
5316         }
5317
5318       /* See if we can generate this by setting the bottom (or the top)
5319          16 bits, and then shifting these into the other half of the
5320          word.  We only look for the simplest cases, to do more would cost
5321          too much.  Be careful, however, not to generate this when the
5322          alternative would take fewer insns.  */
5323       if (val & 0xffff0000)
5324         {
5325           temp1 = remainder & 0xffff0000;
5326           temp2 = remainder & 0x0000ffff;
5327
5328           /* Overlaps outside this range are best done using other methods.  */
5329           for (i = 9; i < 24; i++)
5330             {
5331               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5332                   && !const_ok_for_arm (temp2))
5333                 {
5334                   rtx new_src = (subtargets
5335                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5336                                  : target);
5337                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5338                                             source, subtargets, generate);
5339                   source = new_src;
5340                   if (generate)
5341                     emit_constant_insn
5342                       (cond,
5343                        gen_rtx_SET
5344                        (target,
5345                         gen_rtx_IOR (mode,
5346                                      gen_rtx_ASHIFT (mode, source,
5347                                                      GEN_INT (i)),
5348                                      source)));
5349                   return insns + 1;
5350                 }
5351             }
5352
5353           /* Don't duplicate cases already considered.  */
5354           for (i = 17; i < 24; i++)
5355             {
5356               if (((temp1 | (temp1 >> i)) == remainder)
5357                   && !const_ok_for_arm (temp1))
5358                 {
5359                   rtx new_src = (subtargets
5360                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5361                                  : target);
5362                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5363                                             source, subtargets, generate);
5364                   source = new_src;
5365                   if (generate)
5366                     emit_constant_insn
5367                       (cond,
5368                        gen_rtx_SET (target,
5369                                     gen_rtx_IOR
5370                                     (mode,
5371                                      gen_rtx_LSHIFTRT (mode, source,
5372                                                        GEN_INT (i)),
5373                                      source)));
5374                   return insns + 1;
5375                 }
5376             }
5377         }
5378       break;
5379
5380     case IOR:
5381     case XOR:
5382       /* If we have IOR or XOR, and the constant can be loaded in a
5383          single instruction, and we can find a temporary to put it in,
5384          then this can be done in two instructions instead of 3-4.  */
5385       if (subtargets
5386           /* TARGET can't be NULL if SUBTARGETS is 0 */
5387           || (reload_completed && !reg_mentioned_p (target, source)))
5388         {
5389           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5390             {
5391               if (generate)
5392                 {
5393                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5394
5395                   emit_constant_insn (cond,
5396                                       gen_rtx_SET (sub, GEN_INT (val)));
5397                   emit_constant_insn (cond,
5398                                       gen_rtx_SET (target,
5399                                                    gen_rtx_fmt_ee (code, mode,
5400                                                                    source, sub)));
5401                 }
5402               return 2;
5403             }
5404         }
5405
5406       if (code == XOR)
5407         break;
5408
5409       /*  Convert.
5410           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5411                              and the remainder 0s for e.g. 0xfff00000)
5412           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5413
5414           This can be done in 2 instructions by using shifts with mov or mvn.
5415           e.g. for
5416           x = x | 0xfff00000;
5417           we generate.
5418           mvn   r0, r0, asl #12
5419           mvn   r0, r0, lsr #12  */
5420       if (set_sign_bit_copies > 8
5421           && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5422         {
5423           if (generate)
5424             {
5425               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5426               rtx shift = GEN_INT (set_sign_bit_copies);
5427
5428               emit_constant_insn
5429                 (cond,
5430                  gen_rtx_SET (sub,
5431                               gen_rtx_NOT (mode,
5432                                            gen_rtx_ASHIFT (mode,
5433                                                            source,
5434                                                            shift))));
5435               emit_constant_insn
5436                 (cond,
5437                  gen_rtx_SET (target,
5438                               gen_rtx_NOT (mode,
5439                                            gen_rtx_LSHIFTRT (mode, sub,
5440                                                              shift))));
5441             }
5442           return 2;
5443         }
5444
5445       /* Convert
5446           x = y | constant (which has set_zero_bit_copies number of trailing ones).
5447            to
5448           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5449
5450           For eg. r0 = r0 | 0xfff
5451                mvn      r0, r0, lsr #12
5452                mvn      r0, r0, asl #12
5453
5454       */
5455       if (set_zero_bit_copies > 8
5456           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5457         {
5458           if (generate)
5459             {
5460               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5461               rtx shift = GEN_INT (set_zero_bit_copies);
5462
5463               emit_constant_insn
5464                 (cond,
5465                  gen_rtx_SET (sub,
5466                               gen_rtx_NOT (mode,
5467                                            gen_rtx_LSHIFTRT (mode,
5468                                                              source,
5469                                                              shift))));
5470               emit_constant_insn
5471                 (cond,
5472                  gen_rtx_SET (target,
5473                               gen_rtx_NOT (mode,
5474                                            gen_rtx_ASHIFT (mode, sub,
5475                                                            shift))));
5476             }
5477           return 2;
5478         }
5479
5480       /* This will never be reached for Thumb2 because orn is a valid
5481          instruction. This is for Thumb1 and the ARM 32 bit cases.
5482
5483          x = y | constant (such that ~constant is a valid constant)
5484          Transform this to
5485          x = ~(~y & ~constant).
5486       */
5487       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5488         {
5489           if (generate)
5490             {
5491               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5492               emit_constant_insn (cond,
5493                                   gen_rtx_SET (sub,
5494                                                gen_rtx_NOT (mode, source)));
5495               source = sub;
5496               if (subtargets)
5497                 sub = gen_reg_rtx (mode);
5498               emit_constant_insn (cond,
5499                                   gen_rtx_SET (sub,
5500                                                gen_rtx_AND (mode, source,
5501                                                             GEN_INT (temp1))));
5502               emit_constant_insn (cond,
5503                                   gen_rtx_SET (target,
5504                                                gen_rtx_NOT (mode, sub)));
5505             }
5506           return 3;
5507         }
5508       break;
5509
5510     case AND:
5511       /* See if two shifts will do 2 or more insn's worth of work.  */
5512       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5513         {
5514           HOST_WIDE_INT shift_mask = ((0xffffffff
5515                                        << (32 - clear_sign_bit_copies))
5516                                       & 0xffffffff);
5517
5518           if ((remainder | shift_mask) != 0xffffffff)
5519             {
5520               HOST_WIDE_INT new_val
5521                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5522
5523               if (generate)
5524                 {
5525                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5526                   insns = arm_gen_constant (AND, SImode, cond, new_val,
5527                                             new_src, source, subtargets, 1);
5528                   source = new_src;
5529                 }
5530               else
5531                 {
5532                   rtx targ = subtargets ? NULL_RTX : target;
5533                   insns = arm_gen_constant (AND, mode, cond, new_val,
5534                                             targ, source, subtargets, 0);
5535                 }
5536             }
5537
5538           if (generate)
5539             {
5540               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5541               rtx shift = GEN_INT (clear_sign_bit_copies);
5542
5543               emit_insn (gen_ashlsi3 (new_src, source, shift));
5544               emit_insn (gen_lshrsi3 (target, new_src, shift));
5545             }
5546
5547           return insns + 2;
5548         }
5549
5550       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5551         {
5552           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5553
5554           if ((remainder | shift_mask) != 0xffffffff)
5555             {
5556               HOST_WIDE_INT new_val
5557                 = ARM_SIGN_EXTEND (remainder | shift_mask);
5558               if (generate)
5559                 {
5560                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5561
5562                   insns = arm_gen_constant (AND, mode, cond, new_val,
5563                                             new_src, source, subtargets, 1);
5564                   source = new_src;
5565                 }
5566               else
5567                 {
5568                   rtx targ = subtargets ? NULL_RTX : target;
5569
5570                   insns = arm_gen_constant (AND, mode, cond, new_val,
5571                                             targ, source, subtargets, 0);
5572                 }
5573             }
5574
5575           if (generate)
5576             {
5577               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5578               rtx shift = GEN_INT (clear_zero_bit_copies);
5579
5580               emit_insn (gen_lshrsi3 (new_src, source, shift));
5581               emit_insn (gen_ashlsi3 (target, new_src, shift));
5582             }
5583
5584           return insns + 2;
5585         }
5586
5587       break;
5588
5589     default:
5590       break;
5591     }
5592
5593   /* Calculate what the instruction sequences would be if we generated it
5594      normally, negated, or inverted.  */
5595   if (code == AND)
5596     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5597     insns = 99;
5598   else
5599     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5600
5601   if (can_negate)
5602     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5603                                             &neg_immediates);
5604   else
5605     neg_insns = 99;
5606
5607   if (can_invert || final_invert)
5608     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5609                                             &inv_immediates);
5610   else
5611     inv_insns = 99;
5612
5613   immediates = &pos_immediates;
5614
5615   /* Is the negated immediate sequence more efficient?  */
5616   if (neg_insns < insns && neg_insns <= inv_insns)
5617     {
5618       insns = neg_insns;
5619       immediates = &neg_immediates;
5620     }
5621   else
5622     can_negate = 0;
5623
5624   /* Is the inverted immediate sequence more efficient?
5625      We must allow for an extra NOT instruction for XOR operations, although
5626      there is some chance that the final 'mvn' will get optimized later.  */
5627   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5628     {
5629       insns = inv_insns;
5630       immediates = &inv_immediates;
5631     }
5632   else
5633     {
5634       can_invert = 0;
5635       final_invert = 0;
5636     }
5637
5638   /* Now output the chosen sequence as instructions.  */
5639   if (generate)
5640     {
5641       for (i = 0; i < insns; i++)
5642         {
5643           rtx new_src, temp1_rtx;
5644
5645           temp1 = immediates->i[i];
5646
5647           if (code == SET || code == MINUS)
5648             new_src = (subtargets ? gen_reg_rtx (mode) : target);
5649           else if ((final_invert || i < (insns - 1)) && subtargets)
5650             new_src = gen_reg_rtx (mode);
5651           else
5652             new_src = target;
5653
5654           if (can_invert)
5655             temp1 = ~temp1;
5656           else if (can_negate)
5657             temp1 = -temp1;
5658
5659           temp1 = trunc_int_for_mode (temp1, mode);
5660           temp1_rtx = GEN_INT (temp1);
5661
5662           if (code == SET)
5663             ;
5664           else if (code == MINUS)
5665             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5666           else
5667             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5668
5669           emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5670           source = new_src;
5671
5672           if (code == SET)
5673             {
5674               can_negate = can_invert;
5675               can_invert = 0;
5676               code = PLUS;
5677             }
5678           else if (code == MINUS)
5679             code = PLUS;
5680         }
5681     }
5682
5683   if (final_invert)
5684     {
5685       if (generate)
5686         emit_constant_insn (cond, gen_rtx_SET (target,
5687                                                gen_rtx_NOT (mode, source)));
5688       insns++;
5689     }
5690
5691   return insns;
5692 }
5693
5694 /* Return TRUE if op is a constant where both the low and top words are
5695    suitable for RSB/RSC instructions.  This is never true for Thumb, since
5696    we do not have RSC in that case.  */
5697 static bool
5698 arm_const_double_prefer_rsbs_rsc (rtx op)
5699 {
5700   /* Thumb lacks RSC, so we never prefer that sequence.  */
5701   if (TARGET_THUMB || !CONST_INT_P (op))
5702     return false;
5703   HOST_WIDE_INT hi, lo;
5704   lo = UINTVAL (op) & 0xffffffffULL;
5705   hi = UINTVAL (op) >> 32;
5706   return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5707 }
5708
5709 /* Canonicalize a comparison so that we are more likely to recognize it.
5710    This can be done for a few constant compares, where we can make the
5711    immediate value easier to load.  */
5712
5713 static void
5714 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5715                              bool op0_preserve_value)
5716 {
5717   machine_mode mode;
5718   unsigned HOST_WIDE_INT i, maxval;
5719
5720   mode = GET_MODE (*op0);
5721   if (mode == VOIDmode)
5722     mode = GET_MODE (*op1);
5723
5724   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5725
5726   /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
5727      ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
5728      either reversed or (for constant OP1) adjusted to GE/LT.
5729      Similarly for GTU/LEU in Thumb mode.  */
5730   if (mode == DImode)
5731     {
5732
5733       if (*code == GT || *code == LE
5734           || *code == GTU || *code == LEU)
5735         {
5736           /* Missing comparison.  First try to use an available
5737              comparison.  */
5738           if (CONST_INT_P (*op1))
5739             {
5740               i = INTVAL (*op1);
5741               switch (*code)
5742                 {
5743                 case GT:
5744                 case LE:
5745                   if (i != maxval)
5746                     {
5747                       /* Try to convert to GE/LT, unless that would be more
5748                          expensive.  */
5749                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5750                           && arm_const_double_prefer_rsbs_rsc (*op1))
5751                         return;
5752                       *op1 = GEN_INT (i + 1);
5753                       *code = *code == GT ? GE : LT;
5754                     }
5755                   else
5756                     {
5757                       /* GT maxval is always false, LE maxval is always true.
5758                          We can't fold that away here as we must make a
5759                          comparison, but we can fold them to comparisons
5760                          with the same result that can be handled:
5761                            op0 GT maxval -> op0 LT minval
5762                            op0 LE maxval -> op0 GE minval
5763                          where minval = (-maxval - 1).  */
5764                       *op1 = GEN_INT (-maxval - 1);
5765                       *code = *code == GT ? LT : GE;
5766                     }
5767                   return;
5768
5769                 case GTU:
5770                 case LEU:
5771                   if (i != ~((unsigned HOST_WIDE_INT) 0))
5772                     {
5773                       /* Try to convert to GEU/LTU, unless that would
5774                          be more expensive.  */
5775                       if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5776                           && arm_const_double_prefer_rsbs_rsc (*op1))
5777                         return;
5778                       *op1 = GEN_INT (i + 1);
5779                       *code = *code == GTU ? GEU : LTU;
5780                     }
5781                   else
5782                     {
5783                       /* GTU ~0 is always false, LEU ~0 is always true.
5784                          We can't fold that away here as we must make a
5785                          comparison, but we can fold them to comparisons
5786                          with the same result that can be handled:
5787                            op0 GTU ~0 -> op0 LTU 0
5788                            op0 LEU ~0 -> op0 GEU 0.  */
5789                       *op1 = const0_rtx;
5790                       *code = *code == GTU ? LTU : GEU;
5791                     }
5792                   return;
5793
5794                 default:
5795                   gcc_unreachable ();
5796                 }
5797             }
5798
5799           if (!op0_preserve_value)
5800             {
5801               std::swap (*op0, *op1);
5802               *code = (int)swap_condition ((enum rtx_code)*code);
5803             }
5804         }
5805       return;
5806     }
5807
5808   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5809      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5810      to facilitate possible combining with a cmp into 'ands'.  */
5811   if (mode == SImode
5812       && GET_CODE (*op0) == ZERO_EXTEND
5813       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5814       && GET_MODE (XEXP (*op0, 0)) == QImode
5815       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5816       && subreg_lowpart_p (XEXP (*op0, 0))
5817       && *op1 == const0_rtx)
5818     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5819                         GEN_INT (255));
5820
5821   /* Comparisons smaller than DImode.  Only adjust comparisons against
5822      an out-of-range constant.  */
5823   if (!CONST_INT_P (*op1)
5824       || const_ok_for_arm (INTVAL (*op1))
5825       || const_ok_for_arm (- INTVAL (*op1)))
5826     return;
5827
5828   i = INTVAL (*op1);
5829
5830   switch (*code)
5831     {
5832     case EQ:
5833     case NE:
5834       return;
5835
5836     case GT:
5837     case LE:
5838       if (i != maxval
5839           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5840         {
5841           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5842           *code = *code == GT ? GE : LT;
5843           return;
5844         }
5845       break;
5846
5847     case GE:
5848     case LT:
5849       if (i != ~maxval
5850           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5851         {
5852           *op1 = GEN_INT (i - 1);
5853           *code = *code == GE ? GT : LE;
5854           return;
5855         }
5856       break;
5857
5858     case GTU:
5859     case LEU:
5860       if (i != ~((unsigned HOST_WIDE_INT) 0)
5861           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5862         {
5863           *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5864           *code = *code == GTU ? GEU : LTU;
5865           return;
5866         }
5867       break;
5868
5869     case GEU:
5870     case LTU:
5871       if (i != 0
5872           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5873         {
5874           *op1 = GEN_INT (i - 1);
5875           *code = *code == GEU ? GTU : LEU;
5876           return;
5877         }
5878       break;
5879
5880     default:
5881       gcc_unreachable ();
5882     }
5883 }
5884
5885
5886 /* Define how to find the value returned by a function.  */
5887
5888 static rtx
5889 arm_function_value(const_tree type, const_tree func,
5890                    bool outgoing ATTRIBUTE_UNUSED)
5891 {
5892   machine_mode mode;
5893   int unsignedp ATTRIBUTE_UNUSED;
5894   rtx r ATTRIBUTE_UNUSED;
5895
5896   mode = TYPE_MODE (type);
5897
5898   if (TARGET_AAPCS_BASED)
5899     return aapcs_allocate_return_reg (mode, type, func);
5900
5901   /* Promote integer types.  */
5902   if (INTEGRAL_TYPE_P (type))
5903     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5904
5905   /* Promotes small structs returned in a register to full-word size
5906      for big-endian AAPCS.  */
5907   if (arm_return_in_msb (type))
5908     {
5909       HOST_WIDE_INT size = int_size_in_bytes (type);
5910       if (size % UNITS_PER_WORD != 0)
5911         {
5912           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5913           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5914         }
5915     }
5916
5917   return arm_libcall_value_1 (mode);
5918 }
5919
5920 /* libcall hashtable helpers.  */
5921
5922 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5923 {
5924   static inline hashval_t hash (const rtx_def *);
5925   static inline bool equal (const rtx_def *, const rtx_def *);
5926   static inline void remove (rtx_def *);
5927 };
5928
5929 inline bool
5930 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5931 {
5932   return rtx_equal_p (p1, p2);
5933 }
5934
5935 inline hashval_t
5936 libcall_hasher::hash (const rtx_def *p1)
5937 {
5938   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5939 }
5940
5941 typedef hash_table<libcall_hasher> libcall_table_type;
5942
5943 static void
5944 add_libcall (libcall_table_type *htab, rtx libcall)
5945 {
5946   *htab->find_slot (libcall, INSERT) = libcall;
5947 }
5948
5949 static bool
5950 arm_libcall_uses_aapcs_base (const_rtx libcall)
5951 {
5952   static bool init_done = false;
5953   static libcall_table_type *libcall_htab = NULL;
5954
5955   if (!init_done)
5956     {
5957       init_done = true;
5958
5959       libcall_htab = new libcall_table_type (31);
5960       add_libcall (libcall_htab,
5961                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5962       add_libcall (libcall_htab,
5963                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5964       add_libcall (libcall_htab,
5965                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5966       add_libcall (libcall_htab,
5967                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5968
5969       add_libcall (libcall_htab,
5970                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5971       add_libcall (libcall_htab,
5972                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5973       add_libcall (libcall_htab,
5974                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5975       add_libcall (libcall_htab,
5976                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5977
5978       add_libcall (libcall_htab,
5979                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
5980       add_libcall (libcall_htab,
5981                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5982       add_libcall (libcall_htab,
5983                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
5984       add_libcall (libcall_htab,
5985                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
5986       add_libcall (libcall_htab,
5987                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
5988       add_libcall (libcall_htab,
5989                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
5990       add_libcall (libcall_htab,
5991                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
5992       add_libcall (libcall_htab,
5993                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
5994       add_libcall (libcall_htab,
5995                    convert_optab_libfunc (sfix_optab, SImode, SFmode));
5996       add_libcall (libcall_htab,
5997                    convert_optab_libfunc (ufix_optab, SImode, SFmode));
5998
5999       /* Values from double-precision helper functions are returned in core
6000          registers if the selected core only supports single-precision
6001          arithmetic, even if we are using the hard-float ABI.  The same is
6002          true for single-precision helpers except in case of MVE, because in
6003          MVE we will be using the hard-float ABI on a CPU which doesn't support
6004          single-precision operations in hardware.  In MVE the following check
6005          enables use of emulation for the single-precision arithmetic
6006          operations.  */
6007       if (TARGET_HAVE_MVE)
6008         {
6009           add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
6010           add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
6011           add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
6012           add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
6013           add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
6014           add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
6015           add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
6016           add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
6017           add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
6018           add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
6019           add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
6020         }
6021       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
6022       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
6023       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
6024       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
6025       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
6026       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
6027       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
6028       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
6029       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
6030       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
6031       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
6032       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
6033                                                         SFmode));
6034       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
6035                                                         DFmode));
6036       add_libcall (libcall_htab,
6037                    convert_optab_libfunc (trunc_optab, HFmode, DFmode));
6038     }
6039
6040   return libcall && libcall_htab->find (libcall) != NULL;
6041 }
6042
6043 static rtx
6044 arm_libcall_value_1 (machine_mode mode)
6045 {
6046   if (TARGET_AAPCS_BASED)
6047     return aapcs_libcall_value (mode);
6048   else if (TARGET_IWMMXT_ABI
6049            && arm_vector_mode_supported_p (mode))
6050     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
6051   else
6052     return gen_rtx_REG (mode, ARG_REGISTER (1));
6053 }
6054
6055 /* Define how to find the value returned by a library function
6056    assuming the value has mode MODE.  */
6057
6058 static rtx
6059 arm_libcall_value (machine_mode mode, const_rtx libcall)
6060 {
6061   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
6062       && GET_MODE_CLASS (mode) == MODE_FLOAT)
6063     {
6064       /* The following libcalls return their result in integer registers,
6065          even though they return a floating point value.  */
6066       if (arm_libcall_uses_aapcs_base (libcall))
6067         return gen_rtx_REG (mode, ARG_REGISTER(1));
6068
6069     }
6070
6071   return arm_libcall_value_1 (mode);
6072 }
6073
6074 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
6075
6076 static bool
6077 arm_function_value_regno_p (const unsigned int regno)
6078 {
6079   if (regno == ARG_REGISTER (1)
6080       || (TARGET_32BIT
6081           && TARGET_AAPCS_BASED
6082           && TARGET_HARD_FLOAT
6083           && regno == FIRST_VFP_REGNUM)
6084       || (TARGET_IWMMXT_ABI
6085           && regno == FIRST_IWMMXT_REGNUM))
6086     return true;
6087
6088   return false;
6089 }
6090
6091 /* Determine the amount of memory needed to store the possible return
6092    registers of an untyped call.  */
6093 int
6094 arm_apply_result_size (void)
6095 {
6096   int size = 16;
6097
6098   if (TARGET_32BIT)
6099     {
6100       if (TARGET_HARD_FLOAT_ABI)
6101         size += 32;
6102       if (TARGET_IWMMXT_ABI)
6103         size += 8;
6104     }
6105
6106   return size;
6107 }
6108
6109 /* Decide whether TYPE should be returned in memory (true)
6110    or in a register (false).  FNTYPE is the type of the function making
6111    the call.  */
6112 static bool
6113 arm_return_in_memory (const_tree type, const_tree fntype)
6114 {
6115   HOST_WIDE_INT size;
6116
6117   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
6118
6119   if (TARGET_AAPCS_BASED)
6120     {
6121       /* Simple, non-aggregate types (ie not including vectors and
6122          complex) are always returned in a register (or registers).
6123          We don't care about which register here, so we can short-cut
6124          some of the detail.  */
6125       if (!AGGREGATE_TYPE_P (type)
6126           && TREE_CODE (type) != VECTOR_TYPE
6127           && TREE_CODE (type) != COMPLEX_TYPE)
6128         return false;
6129
6130       /* Any return value that is no larger than one word can be
6131          returned in r0.  */
6132       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6133         return false;
6134
6135       /* Check any available co-processors to see if they accept the
6136          type as a register candidate (VFP, for example, can return
6137          some aggregates in consecutive registers).  These aren't
6138          available if the call is variadic.  */
6139       if (aapcs_select_return_coproc (type, fntype) >= 0)
6140         return false;
6141
6142       /* Vector values should be returned using ARM registers, not
6143          memory (unless they're over 16 bytes, which will break since
6144          we only have four call-clobbered registers to play with).  */
6145       if (TREE_CODE (type) == VECTOR_TYPE)
6146         return (size < 0 || size > (4 * UNITS_PER_WORD));
6147
6148       /* The rest go in memory.  */
6149       return true;
6150     }
6151
6152   if (TREE_CODE (type) == VECTOR_TYPE)
6153     return (size < 0 || size > (4 * UNITS_PER_WORD));
6154
6155   if (!AGGREGATE_TYPE_P (type) &&
6156       (TREE_CODE (type) != VECTOR_TYPE))
6157     /* All simple types are returned in registers.  */
6158     return false;
6159
6160   if (arm_abi != ARM_ABI_APCS)
6161     {
6162       /* ATPCS and later return aggregate types in memory only if they are
6163          larger than a word (or are variable size).  */
6164       return (size < 0 || size > UNITS_PER_WORD);
6165     }
6166
6167   /* For the arm-wince targets we choose to be compatible with Microsoft's
6168      ARM and Thumb compilers, which always return aggregates in memory.  */
6169 #ifndef ARM_WINCE
6170   /* All structures/unions bigger than one word are returned in memory.
6171      Also catch the case where int_size_in_bytes returns -1.  In this case
6172      the aggregate is either huge or of variable size, and in either case
6173      we will want to return it via memory and not in a register.  */
6174   if (size < 0 || size > UNITS_PER_WORD)
6175     return true;
6176
6177   if (TREE_CODE (type) == RECORD_TYPE)
6178     {
6179       tree field;
6180
6181       /* For a struct the APCS says that we only return in a register
6182          if the type is 'integer like' and every addressable element
6183          has an offset of zero.  For practical purposes this means
6184          that the structure can have at most one non bit-field element
6185          and that this element must be the first one in the structure.  */
6186
6187       /* Find the first field, ignoring non FIELD_DECL things which will
6188          have been created by C++.  */
6189       /* NOTE: This code is deprecated and has not been updated to handle
6190          DECL_FIELD_ABI_IGNORED.  */
6191       for (field = TYPE_FIELDS (type);
6192            field && TREE_CODE (field) != FIELD_DECL;
6193            field = DECL_CHAIN (field))
6194         continue;
6195
6196       if (field == NULL)
6197         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
6198
6199       /* Check that the first field is valid for returning in a register.  */
6200
6201       /* ... Floats are not allowed */
6202       if (FLOAT_TYPE_P (TREE_TYPE (field)))
6203         return true;
6204
6205       /* ... Aggregates that are not themselves valid for returning in
6206          a register are not allowed.  */
6207       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6208         return true;
6209
6210       /* Now check the remaining fields, if any.  Only bitfields are allowed,
6211          since they are not addressable.  */
6212       for (field = DECL_CHAIN (field);
6213            field;
6214            field = DECL_CHAIN (field))
6215         {
6216           if (TREE_CODE (field) != FIELD_DECL)
6217             continue;
6218
6219           if (!DECL_BIT_FIELD_TYPE (field))
6220             return true;
6221         }
6222
6223       return false;
6224     }
6225
6226   if (TREE_CODE (type) == UNION_TYPE)
6227     {
6228       tree field;
6229
6230       /* Unions can be returned in registers if every element is
6231          integral, or can be returned in an integer register.  */
6232       for (field = TYPE_FIELDS (type);
6233            field;
6234            field = DECL_CHAIN (field))
6235         {
6236           if (TREE_CODE (field) != FIELD_DECL)
6237             continue;
6238
6239           if (FLOAT_TYPE_P (TREE_TYPE (field)))
6240             return true;
6241
6242           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6243             return true;
6244         }
6245
6246       return false;
6247     }
6248 #endif /* not ARM_WINCE */
6249
6250   /* Return all other types in memory.  */
6251   return true;
6252 }
6253
6254 const struct pcs_attribute_arg
6255 {
6256   const char *arg;
6257   enum arm_pcs value;
6258 } pcs_attribute_args[] =
6259   {
6260     {"aapcs", ARM_PCS_AAPCS},
6261     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6262 #if 0
6263     /* We could recognize these, but changes would be needed elsewhere
6264      * to implement them.  */
6265     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6266     {"atpcs", ARM_PCS_ATPCS},
6267     {"apcs", ARM_PCS_APCS},
6268 #endif
6269     {NULL, ARM_PCS_UNKNOWN}
6270   };
6271
6272 static enum arm_pcs
6273 arm_pcs_from_attribute (tree attr)
6274 {
6275   const struct pcs_attribute_arg *ptr;
6276   const char *arg;
6277
6278   /* Get the value of the argument.  */
6279   if (TREE_VALUE (attr) == NULL_TREE
6280       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6281     return ARM_PCS_UNKNOWN;
6282
6283   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6284
6285   /* Check it against the list of known arguments.  */
6286   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6287     if (streq (arg, ptr->arg))
6288       return ptr->value;
6289
6290   /* An unrecognized interrupt type.  */
6291   return ARM_PCS_UNKNOWN;
6292 }
6293
6294 /* Get the PCS variant to use for this call.  TYPE is the function's type
6295    specification, DECL is the specific declartion.  DECL may be null if
6296    the call could be indirect or if this is a library call.  */
6297 static enum arm_pcs
6298 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6299 {
6300   bool user_convention = false;
6301   enum arm_pcs user_pcs = arm_pcs_default;
6302   tree attr;
6303
6304   gcc_assert (type);
6305
6306   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6307   if (attr)
6308     {
6309       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6310       user_convention = true;
6311     }
6312
6313   if (TARGET_AAPCS_BASED)
6314     {
6315       /* Detect varargs functions.  These always use the base rules
6316          (no argument is ever a candidate for a co-processor
6317          register).  */
6318       bool base_rules = stdarg_p (type);
6319
6320       if (user_convention)
6321         {
6322           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6323             sorry ("non-AAPCS derived PCS variant");
6324           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6325             error ("variadic functions must use the base AAPCS variant");
6326         }
6327
6328       if (base_rules)
6329         return ARM_PCS_AAPCS;
6330       else if (user_convention)
6331         return user_pcs;
6332 #if 0
6333       /* Unfortunately, this is not safe and can lead to wrong code
6334          being generated (PR96882).  Not all calls into the back-end
6335          pass the DECL, so it is unsafe to make any PCS-changing
6336          decisions based on it.  In particular the RETURN_IN_MEMORY
6337          hook is only ever passed a TYPE.  This needs revisiting to
6338          see if there are any partial improvements that can be
6339          re-enabled.  */
6340       else if (decl && flag_unit_at_a_time)
6341         {
6342           /* Local functions never leak outside this compilation unit,
6343              so we are free to use whatever conventions are
6344              appropriate.  */
6345           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
6346           cgraph_node *local_info_node
6347             = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6348           if (local_info_node && local_info_node->local)
6349             return ARM_PCS_AAPCS_LOCAL;
6350         }
6351 #endif
6352     }
6353   else if (user_convention && user_pcs != arm_pcs_default)
6354     sorry ("PCS variant");
6355
6356   /* For everything else we use the target's default.  */
6357   return arm_pcs_default;
6358 }
6359
6360
6361 static void
6362 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6363                     const_tree fntype ATTRIBUTE_UNUSED,
6364                     rtx libcall ATTRIBUTE_UNUSED,
6365                     const_tree fndecl ATTRIBUTE_UNUSED)
6366 {
6367   /* Record the unallocated VFP registers.  */
6368   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6369   pcum->aapcs_vfp_reg_alloc = 0;
6370 }
6371
6372 /* Bitmasks that indicate whether earlier versions of GCC would have
6373    taken a different path through the ABI logic.  This should result in
6374    a -Wpsabi warning if the earlier path led to a different ABI decision.
6375
6376    WARN_PSABI_EMPTY_CXX17_BASE
6377       Indicates that the type includes an artificial empty C++17 base field
6378       that, prior to GCC 10.1, would prevent the type from being treated as
6379       a HFA or HVA.  See PR94711 for details.
6380
6381    WARN_PSABI_NO_UNIQUE_ADDRESS
6382       Indicates that the type includes an empty [[no_unique_address]] field
6383       that, prior to GCC 10.1, would prevent the type from being treated as
6384       a HFA or HVA.  */
6385 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6386 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6387 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6388
6389 /* Walk down the type tree of TYPE counting consecutive base elements.
6390    If *MODEP is VOIDmode, then set it to the first valid floating point
6391    type.  If a non-floating point type is found, or if a floating point
6392    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6393    otherwise return the count in the sub-tree.
6394
6395    The WARN_PSABI_FLAGS argument allows the caller to check whether this
6396    function has changed its behavior relative to earlier versions of GCC.
6397    Normally the argument should be nonnull and point to a zero-initialized
6398    variable.  The function then records whether the ABI decision might
6399    be affected by a known fix to the ABI logic, setting the associated
6400    WARN_PSABI_* bits if so.
6401
6402    When the argument is instead a null pointer, the function tries to
6403    simulate the behavior of GCC before all such ABI fixes were made.
6404    This is useful to check whether the function returns something
6405    different after the ABI fixes.  */
6406 static int
6407 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6408                          unsigned int *warn_psabi_flags)
6409 {
6410   machine_mode mode;
6411   HOST_WIDE_INT size;
6412
6413   switch (TREE_CODE (type))
6414     {
6415     case REAL_TYPE:
6416       mode = TYPE_MODE (type);
6417       if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6418         return -1;
6419
6420       if (*modep == VOIDmode)
6421         *modep = mode;
6422
6423       if (*modep == mode)
6424         return 1;
6425
6426       break;
6427
6428     case COMPLEX_TYPE:
6429       mode = TYPE_MODE (TREE_TYPE (type));
6430       if (mode != DFmode && mode != SFmode)
6431         return -1;
6432
6433       if (*modep == VOIDmode)
6434         *modep = mode;
6435
6436       if (*modep == mode)
6437         return 2;
6438
6439       break;
6440
6441     case VECTOR_TYPE:
6442       /* Use V2SImode and V4SImode as representatives of all 64-bit
6443          and 128-bit vector types, whether or not those modes are
6444          supported with the present options.  */
6445       size = int_size_in_bytes (type);
6446       switch (size)
6447         {
6448         case 8:
6449           mode = V2SImode;
6450           break;
6451         case 16:
6452           mode = V4SImode;
6453           break;
6454         default:
6455           return -1;
6456         }
6457
6458       if (*modep == VOIDmode)
6459         *modep = mode;
6460
6461       /* Vector modes are considered to be opaque: two vectors are
6462          equivalent for the purposes of being homogeneous aggregates
6463          if they are the same size.  */
6464       if (*modep == mode)
6465         return 1;
6466
6467       break;
6468
6469     case ARRAY_TYPE:
6470       {
6471         int count;
6472         tree index = TYPE_DOMAIN (type);
6473
6474         /* Can't handle incomplete types nor sizes that are not
6475            fixed.  */
6476         if (!COMPLETE_TYPE_P (type)
6477             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6478           return -1;
6479
6480         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6481                                          warn_psabi_flags);
6482         if (count == -1
6483             || !index
6484             || !TYPE_MAX_VALUE (index)
6485             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6486             || !TYPE_MIN_VALUE (index)
6487             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6488             || count < 0)
6489           return -1;
6490
6491         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6492                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6493
6494         /* There must be no padding.  */
6495         if (wi::to_wide (TYPE_SIZE (type))
6496             != count * GET_MODE_BITSIZE (*modep))
6497           return -1;
6498
6499         return count;
6500       }
6501
6502     case RECORD_TYPE:
6503       {
6504         int count = 0;
6505         int sub_count;
6506         tree field;
6507
6508         /* Can't handle incomplete types nor sizes that are not
6509            fixed.  */
6510         if (!COMPLETE_TYPE_P (type)
6511             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6512           return -1;
6513
6514         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6515           {
6516             if (TREE_CODE (field) != FIELD_DECL)
6517               continue;
6518
6519             if (DECL_FIELD_ABI_IGNORED (field))
6520               {
6521                 /* See whether this is something that earlier versions of
6522                    GCC failed to ignore.  */
6523                 unsigned int flag;
6524                 if (lookup_attribute ("no_unique_address",
6525                                       DECL_ATTRIBUTES (field)))
6526                   flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6527                 else if (cxx17_empty_base_field_p (field))
6528                   flag = WARN_PSABI_EMPTY_CXX17_BASE;
6529                 else
6530                   /* No compatibility problem.  */
6531                   continue;
6532
6533                 /* Simulate the old behavior when WARN_PSABI_FLAGS is null.  */
6534                 if (warn_psabi_flags)
6535                   {
6536                     *warn_psabi_flags |= flag;
6537                     continue;
6538                   }
6539               }
6540             /* A zero-width bitfield may affect layout in some
6541                circumstances, but adds no members.  The determination
6542                of whether or not a type is an HFA is performed after
6543                layout is complete, so if the type still looks like an
6544                HFA afterwards, it is still classed as one.  This is
6545                potentially an ABI break for the hard-float ABI.  */
6546             else if (DECL_BIT_FIELD (field)
6547                      && integer_zerop (DECL_SIZE (field)))
6548               {
6549                 /* Prior to GCC-12 these fields were striped early,
6550                    hiding them from the back-end entirely and
6551                    resulting in the correct behaviour for argument
6552                    passing.  Simulate that old behaviour without
6553                    generating a warning.  */
6554                 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6555                   continue;
6556                 if (warn_psabi_flags)
6557                   {
6558                     *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6559                     continue;
6560                   }
6561               }
6562
6563             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6564                                                  warn_psabi_flags);
6565             if (sub_count < 0)
6566               return -1;
6567             count += sub_count;
6568           }
6569
6570         /* There must be no padding.  */
6571         if (wi::to_wide (TYPE_SIZE (type))
6572             != count * GET_MODE_BITSIZE (*modep))
6573           return -1;
6574
6575         return count;
6576       }
6577
6578     case UNION_TYPE:
6579     case QUAL_UNION_TYPE:
6580       {
6581         /* These aren't very interesting except in a degenerate case.  */
6582         int count = 0;
6583         int sub_count;
6584         tree field;
6585
6586         /* Can't handle incomplete types nor sizes that are not
6587            fixed.  */
6588         if (!COMPLETE_TYPE_P (type)
6589             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6590           return -1;
6591
6592         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6593           {
6594             if (TREE_CODE (field) != FIELD_DECL)
6595               continue;
6596
6597             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6598                                                  warn_psabi_flags);
6599             if (sub_count < 0)
6600               return -1;
6601             count = count > sub_count ? count : sub_count;
6602           }
6603
6604         /* There must be no padding.  */
6605         if (wi::to_wide (TYPE_SIZE (type))
6606             != count * GET_MODE_BITSIZE (*modep))
6607           return -1;
6608
6609         return count;
6610       }
6611
6612     default:
6613       break;
6614     }
6615
6616   return -1;
6617 }
6618
6619 /* Return true if PCS_VARIANT should use VFP registers.  */
6620 static bool
6621 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6622 {
6623   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6624     {
6625       static bool seen_thumb1_vfp = false;
6626
6627       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6628         {
6629           sorry ("Thumb-1 %<hard-float%> VFP ABI");
6630           /* sorry() is not immediately fatal, so only display this once.  */
6631           seen_thumb1_vfp = true;
6632         }
6633
6634       return true;
6635     }
6636
6637   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6638     return false;
6639
6640   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6641          (TARGET_VFP_DOUBLE || !is_double));
6642 }
6643
6644 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6645    suitable for passing or returning in VFP registers for the PCS
6646    variant selected.  If it is, then *BASE_MODE is updated to contain
6647    a machine mode describing each element of the argument's type and
6648    *COUNT to hold the number of such elements.  */
6649 static bool
6650 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6651                                        machine_mode mode, const_tree type,
6652                                        machine_mode *base_mode, int *count)
6653 {
6654   machine_mode new_mode = VOIDmode;
6655
6656   /* If we have the type information, prefer that to working things
6657      out from the mode.  */
6658   if (type)
6659     {
6660       unsigned int warn_psabi_flags = 0;
6661       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6662                                               &warn_psabi_flags);
6663       if (ag_count > 0 && ag_count <= 4)
6664         {
6665           static unsigned last_reported_type_uid;
6666           unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6667           int alt;
6668           if (warn_psabi
6669               && warn_psabi_flags
6670               && uid != last_reported_type_uid
6671               && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6672                   != ag_count))
6673             {
6674               const char *url10
6675                 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6676               const char *url12
6677                 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6678               gcc_assert (alt == -1);
6679               last_reported_type_uid = uid;
6680               /* Use TYPE_MAIN_VARIANT to strip any redundant const
6681                  qualification.  */
6682               if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6683                 inform (input_location, "parameter passing for argument of "
6684                         "type %qT with %<[[no_unique_address]]%> members "
6685                         "changed %{in GCC 10.1%}",
6686                         TYPE_MAIN_VARIANT (type), url10);
6687               else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6688                 inform (input_location, "parameter passing for argument of "
6689                         "type %qT when C++17 is enabled changed to match "
6690                         "C++14 %{in GCC 10.1%}",
6691                         TYPE_MAIN_VARIANT (type), url10);
6692               else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6693                 inform (input_location, "parameter passing for argument of "
6694                         "type %qT changed %{in GCC 12.1%}",
6695                         TYPE_MAIN_VARIANT (type), url12);
6696             }
6697           *count = ag_count;
6698         }
6699       else
6700         return false;
6701     }
6702   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6703            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6704            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6705     {
6706       *count = 1;
6707       new_mode = mode;
6708     }
6709   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6710     {
6711       *count = 2;
6712       new_mode = (mode == DCmode ? DFmode : SFmode);
6713     }
6714   else
6715     return false;
6716
6717
6718   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6719     return false;
6720
6721   *base_mode = new_mode;
6722
6723   if (TARGET_GENERAL_REGS_ONLY)
6724     error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6725            type);
6726
6727   return true;
6728 }
6729
6730 static bool
6731 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6732                                machine_mode mode, const_tree type)
6733 {
6734   int count ATTRIBUTE_UNUSED;
6735   machine_mode ag_mode ATTRIBUTE_UNUSED;
6736
6737   if (!use_vfp_abi (pcs_variant, false))
6738     return false;
6739   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6740                                                 &ag_mode, &count);
6741 }
6742
6743 static bool
6744 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6745                              const_tree type)
6746 {
6747   if (!use_vfp_abi (pcum->pcs_variant, false))
6748     return false;
6749
6750   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6751                                                 &pcum->aapcs_vfp_rmode,
6752                                                 &pcum->aapcs_vfp_rcount);
6753 }
6754
6755 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6756    for the behaviour of this function.  */
6757
6758 static bool
6759 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6760                     const_tree type  ATTRIBUTE_UNUSED)
6761 {
6762   int rmode_size
6763     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6764   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6765   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6766   int regno;
6767
6768   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6769     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6770       {
6771         pcum->aapcs_vfp_reg_alloc = mask << regno;
6772         if (mode == BLKmode
6773             || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6774             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6775           {
6776             int i;
6777             int rcount = pcum->aapcs_vfp_rcount;
6778             int rshift = shift;
6779             machine_mode rmode = pcum->aapcs_vfp_rmode;
6780             rtx par;
6781             if (!(TARGET_NEON || TARGET_HAVE_MVE))
6782               {
6783                 /* Avoid using unsupported vector modes.  */
6784                 if (rmode == V2SImode)
6785                   rmode = DImode;
6786                 else if (rmode == V4SImode)
6787                   {
6788                     rmode = DImode;
6789                     rcount *= 2;
6790                     rshift /= 2;
6791                   }
6792               }
6793             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6794             for (i = 0; i < rcount; i++)
6795               {
6796                 rtx tmp = gen_rtx_REG (rmode,
6797                                        FIRST_VFP_REGNUM + regno + i * rshift);
6798                 tmp = gen_rtx_EXPR_LIST
6799                   (VOIDmode, tmp,
6800                    GEN_INT (i * GET_MODE_SIZE (rmode)));
6801                 XVECEXP (par, 0, i) = tmp;
6802               }
6803
6804             pcum->aapcs_reg = par;
6805           }
6806         else
6807           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6808         return true;
6809       }
6810   return false;
6811 }
6812
6813 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6814    comment there for the behaviour of this function.  */
6815
6816 static rtx
6817 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6818                                machine_mode mode,
6819                                const_tree type ATTRIBUTE_UNUSED)
6820 {
6821   if (!use_vfp_abi (pcs_variant, false))
6822     return NULL;
6823
6824   if (mode == BLKmode
6825       || (GET_MODE_CLASS (mode) == MODE_INT
6826           && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6827           && !(TARGET_NEON || TARGET_HAVE_MVE)))
6828     {
6829       int count;
6830       machine_mode ag_mode;
6831       int i;
6832       rtx par;
6833       int shift;
6834
6835       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6836                                              &ag_mode, &count);
6837
6838       if (!(TARGET_NEON || TARGET_HAVE_MVE))
6839         {
6840           if (ag_mode == V2SImode)
6841             ag_mode = DImode;
6842           else if (ag_mode == V4SImode)
6843             {
6844               ag_mode = DImode;
6845               count *= 2;
6846             }
6847         }
6848       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6849       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6850       for (i = 0; i < count; i++)
6851         {
6852           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6853           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6854                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6855           XVECEXP (par, 0, i) = tmp;
6856         }
6857
6858       return par;
6859     }
6860
6861   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6862 }
6863
6864 static void
6865 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6866                    machine_mode mode  ATTRIBUTE_UNUSED,
6867                    const_tree type  ATTRIBUTE_UNUSED)
6868 {
6869   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6870   pcum->aapcs_vfp_reg_alloc = 0;
6871   return;
6872 }
6873
6874 #define AAPCS_CP(X)                             \
6875   {                                             \
6876     aapcs_ ## X ## _cum_init,                   \
6877     aapcs_ ## X ## _is_call_candidate,          \
6878     aapcs_ ## X ## _allocate,                   \
6879     aapcs_ ## X ## _is_return_candidate,        \
6880     aapcs_ ## X ## _allocate_return_reg,        \
6881     aapcs_ ## X ## _advance                     \
6882   }
6883
6884 /* Table of co-processors that can be used to pass arguments in
6885    registers.  Idealy no arugment should be a candidate for more than
6886    one co-processor table entry, but the table is processed in order
6887    and stops after the first match.  If that entry then fails to put
6888    the argument into a co-processor register, the argument will go on
6889    the stack.  */
6890 static struct
6891 {
6892   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6893   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6894
6895   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6896      BLKmode) is a candidate for this co-processor's registers; this
6897      function should ignore any position-dependent state in
6898      CUMULATIVE_ARGS and only use call-type dependent information.  */
6899   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6900
6901   /* Return true if the argument does get a co-processor register; it
6902      should set aapcs_reg to an RTX of the register allocated as is
6903      required for a return from FUNCTION_ARG.  */
6904   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6905
6906   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6907      be returned in this co-processor's registers.  */
6908   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6909
6910   /* Allocate and return an RTX element to hold the return type of a call.  This
6911      routine must not fail and will only be called if is_return_candidate
6912      returned true with the same parameters.  */
6913   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6914
6915   /* Finish processing this argument and prepare to start processing
6916      the next one.  */
6917   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6918 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6919   {
6920     AAPCS_CP(vfp)
6921   };
6922
6923 #undef AAPCS_CP
6924
6925 static int
6926 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6927                           const_tree type)
6928 {
6929   int i;
6930
6931   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6932     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6933       return i;
6934
6935   return -1;
6936 }
6937
6938 static int
6939 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6940 {
6941   /* We aren't passed a decl, so we can't check that a call is local.
6942      However, it isn't clear that that would be a win anyway, since it
6943      might limit some tail-calling opportunities.  */
6944   enum arm_pcs pcs_variant;
6945
6946   if (fntype)
6947     {
6948       const_tree fndecl = NULL_TREE;
6949
6950       if (TREE_CODE (fntype) == FUNCTION_DECL)
6951         {
6952           fndecl = fntype;
6953           fntype = TREE_TYPE (fntype);
6954         }
6955
6956       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6957     }
6958   else
6959     pcs_variant = arm_pcs_default;
6960
6961   if (pcs_variant != ARM_PCS_AAPCS)
6962     {
6963       int i;
6964
6965       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6966         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6967                                                         TYPE_MODE (type),
6968                                                         type))
6969           return i;
6970     }
6971   return -1;
6972 }
6973
6974 static rtx
6975 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6976                            const_tree fntype)
6977 {
6978   /* We aren't passed a decl, so we can't check that a call is local.
6979      However, it isn't clear that that would be a win anyway, since it
6980      might limit some tail-calling opportunities.  */
6981   enum arm_pcs pcs_variant;
6982   int unsignedp ATTRIBUTE_UNUSED;
6983
6984   if (fntype)
6985     {
6986       const_tree fndecl = NULL_TREE;
6987
6988       if (TREE_CODE (fntype) == FUNCTION_DECL)
6989         {
6990           fndecl = fntype;
6991           fntype = TREE_TYPE (fntype);
6992         }
6993
6994       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6995     }
6996   else
6997     pcs_variant = arm_pcs_default;
6998
6999   /* Promote integer types.  */
7000   if (type && INTEGRAL_TYPE_P (type))
7001     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
7002
7003   if (pcs_variant != ARM_PCS_AAPCS)
7004     {
7005       int i;
7006
7007       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7008         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
7009                                                         type))
7010           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
7011                                                              mode, type);
7012     }
7013
7014   /* Promotes small structs returned in a register to full-word size
7015      for big-endian AAPCS.  */
7016   if (type && arm_return_in_msb (type))
7017     {
7018       HOST_WIDE_INT size = int_size_in_bytes (type);
7019       if (size % UNITS_PER_WORD != 0)
7020         {
7021           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
7022           mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
7023         }
7024     }
7025
7026   return gen_rtx_REG (mode, R0_REGNUM);
7027 }
7028
7029 static rtx
7030 aapcs_libcall_value (machine_mode mode)
7031 {
7032   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
7033       && GET_MODE_SIZE (mode) <= 4)
7034     mode = SImode;
7035
7036   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
7037 }
7038
7039 /* Lay out a function argument using the AAPCS rules.  The rule
7040    numbers referred to here are those in the AAPCS.  */
7041 static void
7042 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
7043                   const_tree type, bool named)
7044 {
7045   int nregs, nregs2;
7046   int ncrn;
7047
7048   /* We only need to do this once per argument.  */
7049   if (pcum->aapcs_arg_processed)
7050     return;
7051
7052   pcum->aapcs_arg_processed = true;
7053
7054   /* Special case: if named is false then we are handling an incoming
7055      anonymous argument which is on the stack.  */
7056   if (!named)
7057     return;
7058
7059   /* Is this a potential co-processor register candidate?  */
7060   if (pcum->pcs_variant != ARM_PCS_AAPCS)
7061     {
7062       int slot = aapcs_select_call_coproc (pcum, mode, type);
7063       pcum->aapcs_cprc_slot = slot;
7064
7065       /* We don't have to apply any of the rules from part B of the
7066          preparation phase, these are handled elsewhere in the
7067          compiler.  */
7068
7069       if (slot >= 0)
7070         {
7071           /* A Co-processor register candidate goes either in its own
7072              class of registers or on the stack.  */
7073           if (!pcum->aapcs_cprc_failed[slot])
7074             {
7075               /* C1.cp - Try to allocate the argument to co-processor
7076                  registers.  */
7077               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
7078                 return;
7079
7080               /* C2.cp - Put the argument on the stack and note that we
7081                  can't assign any more candidates in this slot.  We also
7082                  need to note that we have allocated stack space, so that
7083                  we won't later try to split a non-cprc candidate between
7084                  core registers and the stack.  */
7085               pcum->aapcs_cprc_failed[slot] = true;
7086               pcum->can_split = false;
7087             }
7088
7089           /* We didn't get a register, so this argument goes on the
7090              stack.  */
7091           gcc_assert (pcum->can_split == false);
7092           return;
7093         }
7094     }
7095
7096   /* C3 - For double-word aligned arguments, round the NCRN up to the
7097      next even number.  */
7098   ncrn = pcum->aapcs_ncrn;
7099   if (ncrn & 1)
7100     {
7101       int res = arm_needs_doubleword_align (mode, type);
7102       /* Only warn during RTL expansion of call stmts, otherwise we would
7103          warn e.g. during gimplification even on functions that will be
7104          always inlined, and we'd warn multiple times.  Don't warn when
7105          called in expand_function_start either, as we warn instead in
7106          arm_function_arg_boundary in that case.  */
7107       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7108         inform (input_location, "parameter passing for argument of type "
7109                 "%qT changed in GCC 7.1", type);
7110       else if (res > 0)
7111         ncrn++;
7112     }
7113
7114   nregs = ARM_NUM_REGS2(mode, type);
7115
7116   /* Sigh, this test should really assert that nregs > 0, but a GCC
7117      extension allows empty structs and then gives them empty size; it
7118      then allows such a structure to be passed by value.  For some of
7119      the code below we have to pretend that such an argument has
7120      non-zero size so that we 'locate' it correctly either in
7121      registers or on the stack.  */
7122   gcc_assert (nregs >= 0);
7123
7124   nregs2 = nregs ? nregs : 1;
7125
7126   /* C4 - Argument fits entirely in core registers.  */
7127   if (ncrn + nregs2 <= NUM_ARG_REGS)
7128     {
7129       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7130       pcum->aapcs_next_ncrn = ncrn + nregs;
7131       return;
7132     }
7133
7134   /* C5 - Some core registers left and there are no arguments already
7135      on the stack: split this argument between the remaining core
7136      registers and the stack.  */
7137   if (ncrn < NUM_ARG_REGS && pcum->can_split)
7138     {
7139       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7140       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7141       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7142       return;
7143     }
7144
7145   /* C6 - NCRN is set to 4.  */
7146   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7147
7148   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
7149   return;
7150 }
7151
7152 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7153    for a call to a function whose data type is FNTYPE.
7154    For a library call, FNTYPE is NULL.  */
7155 void
7156 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7157                           rtx libname,
7158                           tree fndecl ATTRIBUTE_UNUSED)
7159 {
7160   /* Long call handling.  */
7161   if (fntype)
7162     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7163   else
7164     pcum->pcs_variant = arm_pcs_default;
7165
7166   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7167     {
7168       if (arm_libcall_uses_aapcs_base (libname))
7169         pcum->pcs_variant = ARM_PCS_AAPCS;
7170
7171       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7172       pcum->aapcs_reg = NULL_RTX;
7173       pcum->aapcs_partial = 0;
7174       pcum->aapcs_arg_processed = false;
7175       pcum->aapcs_cprc_slot = -1;
7176       pcum->can_split = true;
7177
7178       if (pcum->pcs_variant != ARM_PCS_AAPCS)
7179         {
7180           int i;
7181
7182           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7183             {
7184               pcum->aapcs_cprc_failed[i] = false;
7185               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7186             }
7187         }
7188       return;
7189     }
7190
7191   /* Legacy ABIs */
7192
7193   /* On the ARM, the offset starts at 0.  */
7194   pcum->nregs = 0;
7195   pcum->iwmmxt_nregs = 0;
7196   pcum->can_split = true;
7197
7198   /* Varargs vectors are treated the same as long long.
7199      named_count avoids having to change the way arm handles 'named' */
7200   pcum->named_count = 0;
7201   pcum->nargs = 0;
7202
7203   if (TARGET_REALLY_IWMMXT && fntype)
7204     {
7205       tree fn_arg;
7206
7207       for (fn_arg = TYPE_ARG_TYPES (fntype);
7208            fn_arg;
7209            fn_arg = TREE_CHAIN (fn_arg))
7210         pcum->named_count += 1;
7211
7212       if (! pcum->named_count)
7213         pcum->named_count = INT_MAX;
7214     }
7215 }
7216
7217 /* Return 2 if double word alignment is required for argument passing,
7218    but wasn't required before the fix for PR88469.
7219    Return 1 if double word alignment is required for argument passing.
7220    Return -1 if double word alignment used to be required for argument
7221    passing before PR77728 ABI fix, but is not required anymore.
7222    Return 0 if double word alignment is not required and wasn't requried
7223    before either.  */
7224 static int
7225 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7226 {
7227   if (!type)
7228     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7229
7230   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
7231   if (!AGGREGATE_TYPE_P (type))
7232     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7233
7234   /* Array types: Use member alignment of element type.  */
7235   if (TREE_CODE (type) == ARRAY_TYPE)
7236     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7237
7238   int ret = 0;
7239   int ret2 = 0;
7240   /* Record/aggregate types: Use greatest member alignment of any member.
7241
7242      Note that we explicitly consider zero-sized fields here, even though
7243      they don't map to AAPCS machine types.  For example, in:
7244
7245          struct __attribute__((aligned(8))) empty {};
7246
7247          struct s {
7248            [[no_unique_address]] empty e;
7249            int x;
7250          };
7251
7252      "s" contains only one Fundamental Data Type (the int field)
7253      but gains 8-byte alignment and size thanks to "e".  */
7254   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7255     if (DECL_ALIGN (field) > PARM_BOUNDARY)
7256       {
7257         if (TREE_CODE (field) == FIELD_DECL)
7258           return 1;
7259         else
7260           /* Before PR77728 fix, we were incorrectly considering also
7261              other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7262              Make sure we can warn about that with -Wpsabi.  */
7263           ret = -1;
7264       }
7265     else if (TREE_CODE (field) == FIELD_DECL
7266              && DECL_BIT_FIELD_TYPE (field)
7267              && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7268       ret2 = 1;
7269
7270   if (ret2)
7271     return 2;
7272
7273   return ret;
7274 }
7275
7276
7277 /* Determine where to put an argument to a function.
7278    Value is zero to push the argument on the stack,
7279    or a hard register in which to store the argument.
7280
7281    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7282     the preceding args and about the function being called.
7283    ARG is a description of the argument.
7284
7285    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7286    other arguments are passed on the stack.  If (NAMED == 0) (which happens
7287    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7288    defined), say it is passed in the stack (function_prologue will
7289    indeed make it pass in the stack if necessary).  */
7290
7291 static rtx
7292 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7293 {
7294   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7295   int nregs;
7296
7297   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
7298      a call insn (op3 of a call_value insn).  */
7299   if (arg.end_marker_p ())
7300     return const0_rtx;
7301
7302   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7303     {
7304       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7305       return pcum->aapcs_reg;
7306     }
7307
7308   /* Varargs vectors are treated the same as long long.
7309      named_count avoids having to change the way arm handles 'named' */
7310   if (TARGET_IWMMXT_ABI
7311       && arm_vector_mode_supported_p (arg.mode)
7312       && pcum->named_count > pcum->nargs + 1)
7313     {
7314       if (pcum->iwmmxt_nregs <= 9)
7315         return gen_rtx_REG (arg.mode,
7316                             pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7317       else
7318         {
7319           pcum->can_split = false;
7320           return NULL_RTX;
7321         }
7322     }
7323
7324   /* Put doubleword aligned quantities in even register pairs.  */
7325   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7326     {
7327       int res = arm_needs_doubleword_align (arg.mode, arg.type);
7328       if (res < 0 && warn_psabi)
7329         inform (input_location, "parameter passing for argument of type "
7330                 "%qT changed in GCC 7.1", arg.type);
7331       else if (res > 0)
7332         {
7333           pcum->nregs++;
7334           if (res > 1 && warn_psabi)
7335             inform (input_location, "parameter passing for argument of type "
7336                     "%qT changed in GCC 9.1", arg.type);
7337         }
7338     }
7339
7340   /* Only allow splitting an arg between regs and memory if all preceding
7341      args were allocated to regs.  For args passed by reference we only count
7342      the reference pointer.  */
7343   if (pcum->can_split)
7344     nregs = 1;
7345   else
7346     nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7347
7348   if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7349     return NULL_RTX;
7350
7351   return gen_rtx_REG (arg.mode, pcum->nregs);
7352 }
7353
7354 static unsigned int
7355 arm_function_arg_boundary (machine_mode mode, const_tree type)
7356 {
7357   if (!ARM_DOUBLEWORD_ALIGN)
7358     return PARM_BOUNDARY;
7359
7360   int res = arm_needs_doubleword_align (mode, type);
7361   if (res < 0 && warn_psabi)
7362     inform (input_location, "parameter passing for argument of type %qT "
7363             "changed in GCC 7.1", type);
7364   if (res > 1 && warn_psabi)
7365     inform (input_location, "parameter passing for argument of type "
7366             "%qT changed in GCC 9.1", type);
7367
7368   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7369 }
7370
7371 static int
7372 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7373 {
7374   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7375   int nregs = pcum->nregs;
7376
7377   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7378     {
7379       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7380       return pcum->aapcs_partial;
7381     }
7382
7383   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7384     return 0;
7385
7386   if (NUM_ARG_REGS > nregs
7387       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7388       && pcum->can_split)
7389     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7390
7391   return 0;
7392 }
7393
7394 /* Update the data in PCUM to advance over argument ARG.  */
7395
7396 static void
7397 arm_function_arg_advance (cumulative_args_t pcum_v,
7398                           const function_arg_info &arg)
7399 {
7400   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7401
7402   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7403     {
7404       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7405
7406       if (pcum->aapcs_cprc_slot >= 0)
7407         {
7408           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7409                                                               arg.type);
7410           pcum->aapcs_cprc_slot = -1;
7411         }
7412
7413       /* Generic stuff.  */
7414       pcum->aapcs_arg_processed = false;
7415       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7416       pcum->aapcs_reg = NULL_RTX;
7417       pcum->aapcs_partial = 0;
7418     }
7419   else
7420     {
7421       pcum->nargs += 1;
7422       if (arm_vector_mode_supported_p (arg.mode)
7423           && pcum->named_count > pcum->nargs
7424           && TARGET_IWMMXT_ABI)
7425         pcum->iwmmxt_nregs += 1;
7426       else
7427         pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7428     }
7429 }
7430
7431 /* Variable sized types are passed by reference.  This is a GCC
7432    extension to the ARM ABI.  */
7433
7434 static bool
7435 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7436 {
7437   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7438 }
7439 \f
7440 /* Encode the current state of the #pragma [no_]long_calls.  */
7441 typedef enum
7442 {
7443   OFF,          /* No #pragma [no_]long_calls is in effect.  */
7444   LONG,         /* #pragma long_calls is in effect.  */
7445   SHORT         /* #pragma no_long_calls is in effect.  */
7446 } arm_pragma_enum;
7447
7448 static arm_pragma_enum arm_pragma_long_calls = OFF;
7449
7450 void
7451 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7452 {
7453   arm_pragma_long_calls = LONG;
7454 }
7455
7456 void
7457 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7458 {
7459   arm_pragma_long_calls = SHORT;
7460 }
7461
7462 void
7463 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7464 {
7465   arm_pragma_long_calls = OFF;
7466 }
7467 \f
7468 /* Handle an attribute requiring a FUNCTION_DECL;
7469    arguments as in struct attribute_spec.handler.  */
7470 static tree
7471 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7472                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7473 {
7474   if (TREE_CODE (*node) != FUNCTION_DECL)
7475     {
7476       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7477                name);
7478       *no_add_attrs = true;
7479     }
7480
7481   return NULL_TREE;
7482 }
7483
7484 /* Handle an "interrupt" or "isr" attribute;
7485    arguments as in struct attribute_spec.handler.  */
7486 static tree
7487 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7488                           bool *no_add_attrs)
7489 {
7490   if (DECL_P (*node))
7491     {
7492       if (TREE_CODE (*node) != FUNCTION_DECL)
7493         {
7494           warning (OPT_Wattributes, "%qE attribute only applies to functions",
7495                    name);
7496           *no_add_attrs = true;
7497         }
7498       else if (TARGET_VFP_BASE)
7499         {
7500           warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7501                    name);
7502         }
7503       /* FIXME: the argument if any is checked for type attributes;
7504          should it be checked for decl ones?  */
7505     }
7506   else
7507     {
7508       if (FUNC_OR_METHOD_TYPE_P (*node))
7509         {
7510           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7511             {
7512               warning (OPT_Wattributes, "%qE attribute ignored",
7513                        name);
7514               *no_add_attrs = true;
7515             }
7516         }
7517       else if (TREE_CODE (*node) == POINTER_TYPE
7518                && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (*node))
7519                && arm_isr_value (args) != ARM_FT_UNKNOWN)
7520         {
7521           *node = build_variant_type_copy (*node);
7522           TREE_TYPE (*node) = build_type_attribute_variant
7523             (TREE_TYPE (*node),
7524              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7525           *no_add_attrs = true;
7526         }
7527       else
7528         {
7529           /* Possibly pass this attribute on from the type to a decl.  */
7530           if (flags & ((int) ATTR_FLAG_DECL_NEXT
7531                        | (int) ATTR_FLAG_FUNCTION_NEXT
7532                        | (int) ATTR_FLAG_ARRAY_NEXT))
7533             {
7534               *no_add_attrs = true;
7535               return tree_cons (name, args, NULL_TREE);
7536             }
7537           else
7538             {
7539               warning (OPT_Wattributes, "%qE attribute ignored",
7540                        name);
7541             }
7542         }
7543     }
7544
7545   return NULL_TREE;
7546 }
7547
7548 /* Handle a "pcs" attribute; arguments as in struct
7549    attribute_spec.handler.  */
7550 static tree
7551 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7552                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7553 {
7554   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7555     {
7556       warning (OPT_Wattributes, "%qE attribute ignored", name);
7557       *no_add_attrs = true;
7558     }
7559   return NULL_TREE;
7560 }
7561
7562 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7563 /* Handle the "notshared" attribute.  This attribute is another way of
7564    requesting hidden visibility.  ARM's compiler supports
7565    "__declspec(notshared)"; we support the same thing via an
7566    attribute.  */
7567
7568 static tree
7569 arm_handle_notshared_attribute (tree *node,
7570                                 tree name ATTRIBUTE_UNUSED,
7571                                 tree args ATTRIBUTE_UNUSED,
7572                                 int flags ATTRIBUTE_UNUSED,
7573                                 bool *no_add_attrs)
7574 {
7575   tree decl = TYPE_NAME (*node);
7576
7577   if (decl)
7578     {
7579       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7580       DECL_VISIBILITY_SPECIFIED (decl) = 1;
7581       *no_add_attrs = false;
7582     }
7583   return NULL_TREE;
7584 }
7585 #endif
7586
7587 /* This function returns true if a function with declaration FNDECL and type
7588    FNTYPE uses the stack to pass arguments or return variables and false
7589    otherwise.  This is used for functions with the attributes
7590    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7591    diagnostic messages if the stack is used.  NAME is the name of the attribute
7592    used.  */
7593
7594 static bool
7595 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7596 {
7597   function_args_iterator args_iter;
7598   CUMULATIVE_ARGS args_so_far_v;
7599   cumulative_args_t args_so_far;
7600   bool first_param = true;
7601   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7602
7603   /* Error out if any argument is passed on the stack.  */
7604   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7605   args_so_far = pack_cumulative_args (&args_so_far_v);
7606   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7607     {
7608       rtx arg_rtx;
7609
7610       prev_arg_type = arg_type;
7611       if (VOID_TYPE_P (arg_type))
7612         continue;
7613
7614       function_arg_info arg (arg_type, /*named=*/true);
7615       if (!first_param)
7616         /* ??? We should advance after processing the argument and pass
7617            the argument we're advancing past.  */
7618         arm_function_arg_advance (args_so_far, arg);
7619       arg_rtx = arm_function_arg (args_so_far, arg);
7620       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7621         {
7622           error ("%qE attribute not available to functions with arguments "
7623                  "passed on the stack", name);
7624           return true;
7625         }
7626       first_param = false;
7627     }
7628
7629   /* Error out for variadic functions since we cannot control how many
7630      arguments will be passed and thus stack could be used.  stdarg_p () is not
7631      used for the checking to avoid browsing arguments twice.  */
7632   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7633     {
7634       error ("%qE attribute not available to functions with variable number "
7635              "of arguments", name);
7636       return true;
7637     }
7638
7639   /* Error out if return value is passed on the stack.  */
7640   ret_type = TREE_TYPE (fntype);
7641   if (arm_return_in_memory (ret_type, fntype))
7642     {
7643       error ("%qE attribute not available to functions that return value on "
7644              "the stack", name);
7645       return true;
7646     }
7647   return false;
7648 }
7649
7650 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7651    function will check whether the attribute is allowed here and will add the
7652    attribute to the function declaration tree or otherwise issue a warning.  */
7653
7654 static tree
7655 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7656                                  tree /* args */,
7657                                  int /* flags */,
7658                                  bool *no_add_attrs)
7659 {
7660   tree fndecl;
7661
7662   if (!use_cmse)
7663     {
7664       *no_add_attrs = true;
7665       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7666                "option", name);
7667       return NULL_TREE;
7668     }
7669
7670   /* Ignore attribute for function types.  */
7671   if (TREE_CODE (*node) != FUNCTION_DECL)
7672     {
7673       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7674                name);
7675       *no_add_attrs = true;
7676       return NULL_TREE;
7677     }
7678
7679   fndecl = *node;
7680
7681   /* Warn for static linkage functions.  */
7682   if (!TREE_PUBLIC (fndecl))
7683     {
7684       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7685                "with static linkage", name);
7686       *no_add_attrs = true;
7687       return NULL_TREE;
7688     }
7689
7690   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7691                                                 TREE_TYPE (fndecl));
7692   return NULL_TREE;
7693 }
7694
7695
7696 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7697    function will check whether the attribute is allowed here and will add the
7698    attribute to the function type tree or otherwise issue a diagnostic.  The
7699    reason we check this at declaration time is to only allow the use of the
7700    attribute with declarations of function pointers and not function
7701    declarations.  This function checks NODE is of the expected type and issues
7702    diagnostics otherwise using NAME.  If it is not of the expected type
7703    *NO_ADD_ATTRS will be set to true.  */
7704
7705 static tree
7706 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7707                                  tree /* args */,
7708                                  int /* flags */,
7709                                  bool *no_add_attrs)
7710 {
7711   tree decl = NULL_TREE;
7712   tree fntype, type;
7713
7714   if (!use_cmse)
7715     {
7716       *no_add_attrs = true;
7717       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7718                "option", name);
7719       return NULL_TREE;
7720     }
7721
7722   if (DECL_P (*node))
7723     {
7724       fntype = TREE_TYPE (*node);
7725
7726       if (VAR_P (*node) || TREE_CODE (*node) == TYPE_DECL)
7727         decl = *node;
7728     }
7729   else
7730     fntype = *node;
7731
7732   while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7733     fntype = TREE_TYPE (fntype);
7734
7735   if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7736     {
7737         warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7738                  "function pointer", name);
7739         *no_add_attrs = true;
7740         return NULL_TREE;
7741     }
7742
7743   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7744
7745   if (*no_add_attrs)
7746     return NULL_TREE;
7747
7748   /* Prevent trees being shared among function types with and without
7749      cmse_nonsecure_call attribute.  */
7750   if (decl)
7751     {
7752       type = build_distinct_type_copy (TREE_TYPE (decl));
7753       TREE_TYPE (decl) = type;
7754     }
7755   else
7756     {
7757       type = build_distinct_type_copy (*node);
7758       *node = type;
7759     }
7760
7761   fntype = type;
7762
7763   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7764     {
7765       type = fntype;
7766       fntype = TREE_TYPE (fntype);
7767       fntype = build_distinct_type_copy (fntype);
7768       TREE_TYPE (type) = fntype;
7769     }
7770
7771   /* Construct a type attribute and add it to the function type.  */
7772   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7773                           TYPE_ATTRIBUTES (fntype));
7774   TYPE_ATTRIBUTES (fntype) = attrs;
7775   return NULL_TREE;
7776 }
7777
7778 /* Return 0 if the attributes for two types are incompatible, 1 if they
7779    are compatible, and 2 if they are nearly compatible (which causes a
7780    warning to be generated).  */
7781 static int
7782 arm_comp_type_attributes (const_tree type1, const_tree type2)
7783 {
7784   int l1, l2, s1, s2;
7785
7786   tree attrs1 = lookup_attribute ("Advanced SIMD type",
7787                                   TYPE_ATTRIBUTES (type1));
7788   tree attrs2 = lookup_attribute ("Advanced SIMD type",
7789                                   TYPE_ATTRIBUTES (type2));
7790   if (bool (attrs1) != bool (attrs2))
7791     return 0;
7792   if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7793     return 0;
7794
7795   /* Check for mismatch of non-default calling convention.  */
7796   if (TREE_CODE (type1) != FUNCTION_TYPE)
7797     return 1;
7798
7799   /* Check for mismatched call attributes.  */
7800   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7801   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7802   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7803   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7804
7805   /* Only bother to check if an attribute is defined.  */
7806   if (l1 | l2 | s1 | s2)
7807     {
7808       /* If one type has an attribute, the other must have the same attribute.  */
7809       if ((l1 != l2) || (s1 != s2))
7810         return 0;
7811
7812       /* Disallow mixed attributes.  */
7813       if ((l1 & s2) || (l2 & s1))
7814         return 0;
7815     }
7816
7817   /* Check for mismatched ISR attribute.  */
7818   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7819   if (! l1)
7820     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7821   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7822   if (! l2)
7823     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7824   if (l1 != l2)
7825     return 0;
7826
7827   l1 = lookup_attribute ("cmse_nonsecure_call",
7828                          TYPE_ATTRIBUTES (type1)) != NULL;
7829   l2 = lookup_attribute ("cmse_nonsecure_call",
7830                          TYPE_ATTRIBUTES (type2)) != NULL;
7831
7832   if (l1 != l2)
7833     return 0;
7834
7835   return 1;
7836 }
7837
7838 /*  Assigns default attributes to newly defined type.  This is used to
7839     set short_call/long_call attributes for function types of
7840     functions defined inside corresponding #pragma scopes.  */
7841 static void
7842 arm_set_default_type_attributes (tree type)
7843 {
7844   /* Add __attribute__ ((long_call)) to all functions, when
7845      inside #pragma long_calls or __attribute__ ((short_call)),
7846      when inside #pragma no_long_calls.  */
7847   if (FUNC_OR_METHOD_TYPE_P (type))
7848     {
7849       tree type_attr_list, attr_name;
7850       type_attr_list = TYPE_ATTRIBUTES (type);
7851
7852       if (arm_pragma_long_calls == LONG)
7853         attr_name = get_identifier ("long_call");
7854       else if (arm_pragma_long_calls == SHORT)
7855         attr_name = get_identifier ("short_call");
7856       else
7857         return;
7858
7859       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7860       TYPE_ATTRIBUTES (type) = type_attr_list;
7861     }
7862 }
7863 \f
7864 /* Return true if DECL is known to be linked into section SECTION.  */
7865
7866 static bool
7867 arm_function_in_section_p (tree decl, section *section)
7868 {
7869   /* We can only be certain about the prevailing symbol definition.  */
7870   if (!decl_binds_to_current_def_p (decl))
7871     return false;
7872
7873   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7874   if (!DECL_SECTION_NAME (decl))
7875     {
7876       /* Make sure that we will not create a unique section for DECL.  */
7877       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7878         return false;
7879     }
7880
7881   return function_section (decl) == section;
7882 }
7883
7884 /* Return nonzero if a 32-bit "long_call" should be generated for
7885    a call from the current function to DECL.  We generate a long_call
7886    if the function:
7887
7888         a.  has an __attribute__((long call))
7889      or b.  is within the scope of a #pragma long_calls
7890      or c.  the -mlong-calls command line switch has been specified
7891
7892    However we do not generate a long call if the function:
7893
7894         d.  has an __attribute__ ((short_call))
7895      or e.  is inside the scope of a #pragma no_long_calls
7896      or f.  is defined in the same section as the current function.  */
7897
7898 bool
7899 arm_is_long_call_p (tree decl)
7900 {
7901   tree attrs;
7902
7903   if (!decl)
7904     return TARGET_LONG_CALLS;
7905
7906   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7907   if (lookup_attribute ("short_call", attrs))
7908     return false;
7909
7910   /* For "f", be conservative, and only cater for cases in which the
7911      whole of the current function is placed in the same section.  */
7912   if (!flag_reorder_blocks_and_partition
7913       && TREE_CODE (decl) == FUNCTION_DECL
7914       && arm_function_in_section_p (decl, current_function_section ()))
7915     return false;
7916
7917   if (lookup_attribute ("long_call", attrs))
7918     return true;
7919
7920   return TARGET_LONG_CALLS;
7921 }
7922
7923 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7924 static bool
7925 arm_function_ok_for_sibcall (tree decl, tree exp)
7926 {
7927   unsigned long func_type;
7928
7929   if (cfun->machine->sibcall_blocked)
7930     return false;
7931
7932   if (TARGET_FDPIC)
7933     {
7934       /* In FDPIC, never tailcall something for which we have no decl:
7935          the target function could be in a different module, requiring
7936          a different FDPIC register value.  */
7937       if (decl == NULL)
7938         return false;
7939     }
7940
7941   /* Never tailcall something if we are generating code for Thumb-1.  */
7942   if (TARGET_THUMB1)
7943     return false;
7944
7945   /* The PIC register is live on entry to VxWorks PLT entries, so we
7946      must make the call before restoring the PIC register.  */
7947   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7948     return false;
7949
7950   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7951      may be used both as target of the call and base register for restoring
7952      the VFP registers  */
7953   if (TARGET_APCS_FRAME && TARGET_ARM
7954       && TARGET_HARD_FLOAT
7955       && decl && arm_is_long_call_p (decl))
7956     return false;
7957
7958   /* If we are interworking and the function is not declared static
7959      then we can't tail-call it unless we know that it exists in this
7960      compilation unit (since it might be a Thumb routine).  */
7961   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7962       && !TREE_ASM_WRITTEN (decl))
7963     return false;
7964
7965   func_type = arm_current_func_type ();
7966   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7967   if (IS_INTERRUPT (func_type))
7968     return false;
7969
7970   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7971      generated for entry functions themselves.  */
7972   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7973     return false;
7974
7975   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7976      this would complicate matters for later code generation.  */
7977   if (TREE_CODE (exp) == CALL_EXPR)
7978     {
7979       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7980       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7981         return false;
7982     }
7983
7984   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7985     {
7986       /* Check that the return value locations are the same.  For
7987          example that we aren't returning a value from the sibling in
7988          a VFP register but then need to transfer it to a core
7989          register.  */
7990       rtx a, b;
7991       tree decl_or_type = decl;
7992
7993       /* If it is an indirect function pointer, get the function type.  */
7994       if (!decl)
7995         decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7996
7997       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7998       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7999                               cfun->decl, false);
8000       if (!rtx_equal_p (a, b))
8001         return false;
8002     }
8003
8004   /* Never tailcall if function may be called with a misaligned SP.  */
8005   if (IS_STACKALIGN (func_type))
8006     return false;
8007
8008   /* The AAPCS says that, on bare-metal, calls to unresolved weak
8009      references should become a NOP.  Don't convert such calls into
8010      sibling calls.  */
8011   if (TARGET_AAPCS_BASED
8012       && arm_abi == ARM_ABI_AAPCS
8013       && decl
8014       && DECL_WEAK (decl))
8015     return false;
8016
8017   /* Indirect tailcalls need a call-clobbered register to hold the function
8018      address.  But we only have r0-r3 and ip in that class.  If r0-r3 all hold
8019      function arguments, then we can only use IP.  But IP may be needed in the
8020      epilogue (for PAC validation), or for passing the static chain.  We have
8021      to disable the tail call if nothing is available.  */
8022   if (!decl
8023       && ((CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
8024           || arm_current_function_pac_enabled_p()))
8025     {
8026       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
8027       CUMULATIVE_ARGS cum;
8028       cumulative_args_t cum_v;
8029
8030       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
8031       cum_v = pack_cumulative_args (&cum);
8032
8033       tree arg;
8034       call_expr_arg_iterator iter;
8035       unsigned used_regs = 0;
8036
8037       /* Layout each actual argument in turn.  If it is allocated to
8038          core regs, note which regs have been allocated.  */
8039       FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
8040         {
8041           tree type = TREE_TYPE (arg);
8042           function_arg_info arg_info (type, /*named=*/true);
8043           rtx reg = arm_function_arg (cum_v, arg_info);
8044           if (reg && REG_P (reg)
8045               && REGNO (reg) <= LAST_ARG_REGNUM)
8046             {
8047               /* Avoid any chance of UB here.  We don't care if TYPE
8048                  is very large since it will use up all the argument regs.  */
8049               unsigned nregs = MIN (ARM_NUM_REGS2 (GET_MODE (reg), type),
8050                                     LAST_ARG_REGNUM + 1);
8051               used_regs |= ((1 << nregs) - 1) << REGNO (reg);
8052             }
8053           arm_function_arg_advance (cum_v, arg_info);
8054         }
8055
8056       /* We've used all the argument regs, and we know IP is live during the
8057          epilogue for some reason, so we can't tailcall.  */
8058       if ((used_regs & ((1 << (LAST_ARG_REGNUM + 1)) - 1))
8059           == ((1 << (LAST_ARG_REGNUM + 1)) - 1))
8060         return false;
8061     }
8062
8063   /* Everything else is ok.  */
8064   return true;
8065 }
8066
8067 \f
8068 /* Addressing mode support functions.  */
8069
8070 /* Return nonzero if X is a legitimate immediate operand when compiling
8071    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
8072 int
8073 legitimate_pic_operand_p (rtx x)
8074 {
8075   if (SYMBOL_REF_P (x)
8076       || (GET_CODE (x) == CONST
8077           && GET_CODE (XEXP (x, 0)) == PLUS
8078           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
8079     return 0;
8080
8081   return 1;
8082 }
8083
8084 /* Record that the current function needs a PIC register.  If PIC_REG is null,
8085    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
8086    both case cfun->machine->pic_reg is initialized if we have not already done
8087    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
8088    PIC register is reloaded in the current position of the instruction stream
8089    irregardless of whether it was loaded before.  Otherwise, it is only loaded
8090    if not already done so (crtl->uses_pic_offset_table is null).  Note that
8091    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8092    is only supported iff COMPUTE_NOW is false.  */
8093
8094 static void
8095 require_pic_register (rtx pic_reg, bool compute_now)
8096 {
8097   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8098
8099   /* A lot of the logic here is made obscure by the fact that this
8100      routine gets called as part of the rtx cost estimation process.
8101      We don't want those calls to affect any assumptions about the real
8102      function; and further, we can't call entry_of_function() until we
8103      start the real expansion process.  */
8104   if (!crtl->uses_pic_offset_table || compute_now)
8105     {
8106       gcc_assert (can_create_pseudo_p ()
8107                   || (pic_reg != NULL_RTX
8108                       && REG_P (pic_reg)
8109                       && GET_MODE (pic_reg) == Pmode));
8110       if (arm_pic_register != INVALID_REGNUM
8111           && !compute_now
8112           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
8113         {
8114           if (!cfun->machine->pic_reg)
8115             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
8116
8117           /* Play games to avoid marking the function as needing pic
8118              if we are being called as part of the cost-estimation
8119              process.  */
8120           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8121             crtl->uses_pic_offset_table = 1;
8122         }
8123       else
8124         {
8125           rtx_insn *seq, *insn;
8126
8127           if (pic_reg == NULL_RTX)
8128             pic_reg = gen_reg_rtx (Pmode);
8129           if (!cfun->machine->pic_reg)
8130             cfun->machine->pic_reg = pic_reg;
8131
8132           /* Play games to avoid marking the function as needing pic
8133              if we are being called as part of the cost-estimation
8134              process.  */
8135           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8136             {
8137               crtl->uses_pic_offset_table = 1;
8138               start_sequence ();
8139
8140               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8141                   && arm_pic_register > LAST_LO_REGNUM
8142                   && !compute_now)
8143                 emit_move_insn (cfun->machine->pic_reg,
8144                                 gen_rtx_REG (Pmode, arm_pic_register));
8145               else
8146                 arm_load_pic_register (0UL, pic_reg);
8147
8148               seq = get_insns ();
8149               end_sequence ();
8150
8151               for (insn = seq; insn; insn = NEXT_INSN (insn))
8152                 if (INSN_P (insn))
8153                   INSN_LOCATION (insn) = prologue_location;
8154
8155               /* We can be called during expansion of PHI nodes, where
8156                  we can't yet emit instructions directly in the final
8157                  insn stream.  Queue the insns on the entry edge, they will
8158                  be committed after everything else is expanded.  */
8159               if (currently_expanding_to_rtl)
8160                 insert_insn_on_edge (seq,
8161                                      single_succ_edge
8162                                      (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8163               else
8164                 emit_insn (seq);
8165             }
8166         }
8167     }
8168 }
8169
8170 /* Generate insns to calculate the address of ORIG in pic mode.  */
8171 static rtx_insn *
8172 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8173 {
8174   rtx pat;
8175   rtx mem;
8176
8177   pat = gen_calculate_pic_address (reg, pic_reg, orig);
8178
8179   /* Make the MEM as close to a constant as possible.  */
8180   mem = SET_SRC (pat);
8181   gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8182   MEM_READONLY_P (mem) = 1;
8183   MEM_NOTRAP_P (mem) = 1;
8184
8185   return emit_insn (pat);
8186 }
8187
8188 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
8189    created to hold the result of the load.  If not NULL, PIC_REG indicates
8190    which register to use as PIC register, otherwise it is decided by register
8191    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
8192    location in the instruction stream, irregardless of whether it was loaded
8193    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8194    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8195
8196    Returns the register REG into which the PIC load is performed.  */
8197
8198 rtx
8199 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8200                         bool compute_now)
8201 {
8202   gcc_assert (compute_now == (pic_reg != NULL_RTX));
8203
8204   if (SYMBOL_REF_P (orig)
8205       || LABEL_REF_P (orig))
8206     {
8207       if (reg == 0)
8208         {
8209           gcc_assert (can_create_pseudo_p ());
8210           reg = gen_reg_rtx (Pmode);
8211         }
8212
8213       /* VxWorks does not impose a fixed gap between segments; the run-time
8214          gap can be different from the object-file gap.  We therefore can't
8215          use GOTOFF unless we are absolutely sure that the symbol is in the
8216          same segment as the GOT.  Unfortunately, the flexibility of linker
8217          scripts means that we can't be sure of that in general, so assume
8218          that GOTOFF is never valid on VxWorks.  */
8219       /* References to weak symbols cannot be resolved locally: they
8220          may be overridden by a non-weak definition at link time.  */
8221       rtx_insn *insn;
8222       if ((LABEL_REF_P (orig)
8223            || (SYMBOL_REF_P (orig)
8224                && SYMBOL_REF_LOCAL_P (orig)
8225                && (SYMBOL_REF_DECL (orig)
8226                    ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8227                && (!SYMBOL_REF_FUNCTION_P (orig)
8228                    || arm_fdpic_local_funcdesc_p (orig))))
8229           && NEED_GOT_RELOC
8230           && arm_pic_data_is_text_relative)
8231         insn = arm_pic_static_addr (orig, reg);
8232       else
8233         {
8234           /* If this function doesn't have a pic register, create one now.  */
8235           require_pic_register (pic_reg, compute_now);
8236
8237           if (pic_reg == NULL_RTX)
8238             pic_reg = cfun->machine->pic_reg;
8239
8240           insn = calculate_pic_address_constant (reg, pic_reg, orig);
8241         }
8242
8243       /* Put a REG_EQUAL note on this insn, so that it can be optimized
8244          by loop.  */
8245       set_unique_reg_note (insn, REG_EQUAL, orig);
8246
8247       return reg;
8248     }
8249   else if (GET_CODE (orig) == CONST)
8250     {
8251       rtx base, offset;
8252
8253       if (GET_CODE (XEXP (orig, 0)) == PLUS
8254           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8255         return orig;
8256
8257       /* Handle the case where we have: const (UNSPEC_TLS).  */
8258       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8259           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8260         return orig;
8261
8262       /* Handle the case where we have:
8263          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
8264          CONST_INT.  */
8265       if (GET_CODE (XEXP (orig, 0)) == PLUS
8266           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8267           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8268         {
8269           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8270           return orig;
8271         }
8272
8273       if (reg == 0)
8274         {
8275           gcc_assert (can_create_pseudo_p ());
8276           reg = gen_reg_rtx (Pmode);
8277         }
8278
8279       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8280
8281       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8282                                      pic_reg, compute_now);
8283       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8284                                        base == reg ? 0 : reg, pic_reg,
8285                                        compute_now);
8286
8287       if (CONST_INT_P (offset))
8288         {
8289           /* The base register doesn't really matter, we only want to
8290              test the index for the appropriate mode.  */
8291           if (!arm_legitimate_index_p (mode, offset, SET, 0))
8292             {
8293               gcc_assert (can_create_pseudo_p ());
8294               offset = force_reg (Pmode, offset);
8295             }
8296
8297           if (CONST_INT_P (offset))
8298             return plus_constant (Pmode, base, INTVAL (offset));
8299         }
8300
8301       if (GET_MODE_SIZE (mode) > 4
8302           && (GET_MODE_CLASS (mode) == MODE_INT
8303               || TARGET_SOFT_FLOAT))
8304         {
8305           emit_insn (gen_addsi3 (reg, base, offset));
8306           return reg;
8307         }
8308
8309       return gen_rtx_PLUS (Pmode, base, offset);
8310     }
8311
8312   return orig;
8313 }
8314
8315
8316 /* Generate insns that produce the address of the stack canary */
8317 rtx
8318 arm_stack_protect_tls_canary_mem (bool reload)
8319 {
8320   rtx tp = gen_reg_rtx (SImode);
8321   if (reload)
8322     emit_insn (gen_reload_tp_hard (tp));
8323   else
8324     emit_insn (gen_load_tp_hard (tp));
8325
8326   rtx reg = gen_reg_rtx (SImode);
8327   rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8328   emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8329   return gen_rtx_MEM (SImode, reg);
8330 }
8331
8332
8333 /* Whether a register is callee saved or not.  This is necessary because high
8334    registers are marked as caller saved when optimizing for size on Thumb-1
8335    targets despite being callee saved in order to avoid using them.  */
8336 #define callee_saved_reg_p(reg) \
8337   (!call_used_or_fixed_reg_p (reg) \
8338    || (TARGET_THUMB1 && optimize_size \
8339        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8340
8341 /* Return a mask for the call-clobbered low registers that are unused
8342    at the end of the prologue.  */
8343 static unsigned long
8344 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8345 {
8346   unsigned long mask = 0;
8347   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8348
8349   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8350     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8351       mask |= 1 << (reg - FIRST_LO_REGNUM);
8352   return mask;
8353 }
8354
8355 /* Similarly for the start of the epilogue.  */
8356 static unsigned long
8357 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8358 {
8359   unsigned long mask = 0;
8360   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8361
8362   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8363     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8364       mask |= 1 << (reg - FIRST_LO_REGNUM);
8365   return mask;
8366 }
8367
8368 /* Find a spare register to use during the prolog of a function.  */
8369
8370 static int
8371 thumb_find_work_register (unsigned long pushed_regs_mask)
8372 {
8373   int reg;
8374
8375   unsigned long unused_regs
8376     = thumb1_prologue_unused_call_clobbered_lo_regs ();
8377
8378   /* Check the argument registers first as these are call-used.  The
8379      register allocation order means that sometimes r3 might be used
8380      but earlier argument registers might not, so check them all.  */
8381   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8382     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8383       return reg;
8384
8385   /* Otherwise look for a call-saved register that is going to be pushed.  */
8386   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8387     if (pushed_regs_mask & (1 << reg))
8388       return reg;
8389
8390   if (TARGET_THUMB2)
8391     {
8392       /* Thumb-2 can use high regs.  */
8393       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8394         if (pushed_regs_mask & (1 << reg))
8395           return reg;
8396     }
8397   /* Something went wrong - thumb_compute_save_reg_mask()
8398      should have arranged for a suitable register to be pushed.  */
8399   gcc_unreachable ();
8400 }
8401
8402 static GTY(()) int pic_labelno;
8403
8404 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
8405    low register.  */
8406
8407 void
8408 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8409 {
8410   rtx l1, labelno, pic_tmp, pic_rtx;
8411
8412   if (crtl->uses_pic_offset_table == 0
8413       || TARGET_SINGLE_PIC_BASE
8414       || TARGET_FDPIC)
8415     return;
8416
8417   gcc_assert (flag_pic);
8418
8419   if (pic_reg == NULL_RTX)
8420     pic_reg = cfun->machine->pic_reg;
8421   if (TARGET_VXWORKS_RTP)
8422     {
8423       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8424       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8425       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8426
8427       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8428
8429       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8430       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8431     }
8432   else
8433     {
8434       /* We use an UNSPEC rather than a LABEL_REF because this label
8435          never appears in the code stream.  */
8436
8437       labelno = GEN_INT (pic_labelno++);
8438       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8439       l1 = gen_rtx_CONST (VOIDmode, l1);
8440
8441       /* On the ARM the PC register contains 'dot + 8' at the time of the
8442          addition, on the Thumb it is 'dot + 4'.  */
8443       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8444       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8445                                 UNSPEC_GOTSYM_OFF);
8446       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8447
8448       if (TARGET_32BIT)
8449         {
8450           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8451         }
8452       else /* TARGET_THUMB1 */
8453         {
8454           if (arm_pic_register != INVALID_REGNUM
8455               && REGNO (pic_reg) > LAST_LO_REGNUM)
8456             {
8457               /* We will have pushed the pic register, so we should always be
8458                  able to find a work register.  */
8459               pic_tmp = gen_rtx_REG (SImode,
8460                                      thumb_find_work_register (saved_regs));
8461               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8462               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8463               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8464             }
8465           else if (arm_pic_register != INVALID_REGNUM
8466                    && arm_pic_register > LAST_LO_REGNUM
8467                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
8468             {
8469               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8470               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8471               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8472             }
8473           else
8474             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8475         }
8476     }
8477
8478   /* Need to emit this whether or not we obey regdecls,
8479      since setjmp/longjmp can cause life info to screw up.  */
8480   emit_use (pic_reg);
8481 }
8482
8483 /* Try to determine whether an object, referenced via ORIG, will be
8484    placed in the text or data segment.  This is used in FDPIC mode, to
8485    decide which relocations to use when accessing ORIG.  *IS_READONLY
8486    is set to true if ORIG is a read-only location, false otherwise.
8487    Return true if we could determine the location of ORIG, false
8488    otherwise.  *IS_READONLY is valid only when we return true.  */
8489 static bool
8490 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8491 {
8492   *is_readonly = false;
8493
8494   if (LABEL_REF_P (orig))
8495     {
8496       *is_readonly = true;
8497       return true;
8498     }
8499
8500   if (SYMBOL_REF_P (orig))
8501     {
8502       if (CONSTANT_POOL_ADDRESS_P (orig))
8503         {
8504           *is_readonly = true;
8505           return true;
8506         }
8507       if (SYMBOL_REF_LOCAL_P (orig)
8508           && !SYMBOL_REF_EXTERNAL_P (orig)
8509           && SYMBOL_REF_DECL (orig)
8510           && (!DECL_P (SYMBOL_REF_DECL (orig))
8511               || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8512         {
8513           tree decl = SYMBOL_REF_DECL (orig);
8514           tree init = VAR_P (decl)
8515             ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8516             ? decl : 0;
8517           int reloc = 0;
8518           bool named_section, readonly;
8519
8520           if (init && init != error_mark_node)
8521             reloc = compute_reloc_for_constant (init);
8522
8523           named_section = VAR_P (decl)
8524             && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8525           readonly = decl_readonly_section (decl, reloc);
8526
8527           /* We don't know where the link script will put a named
8528              section, so return false in such a case.  */
8529           if (named_section)
8530             return false;
8531
8532           *is_readonly = readonly;
8533           return true;
8534         }
8535
8536       /* We don't know.  */
8537       return false;
8538     }
8539
8540   gcc_unreachable ();
8541 }
8542
8543 /* Generate code to load the address of a static var when flag_pic is set.  */
8544 static rtx_insn *
8545 arm_pic_static_addr (rtx orig, rtx reg)
8546 {
8547   rtx l1, labelno, offset_rtx;
8548   rtx_insn *insn;
8549
8550   gcc_assert (flag_pic);
8551
8552   bool is_readonly = false;
8553   bool info_known = false;
8554
8555   if (TARGET_FDPIC
8556       && SYMBOL_REF_P (orig)
8557       && !SYMBOL_REF_FUNCTION_P (orig))
8558     info_known = arm_is_segment_info_known (orig, &is_readonly);
8559
8560   if (TARGET_FDPIC
8561       && SYMBOL_REF_P (orig)
8562       && !SYMBOL_REF_FUNCTION_P (orig)
8563       && !info_known)
8564     {
8565       /* We don't know where orig is stored, so we have be
8566          pessimistic and use a GOT relocation.  */
8567       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8568
8569       insn = calculate_pic_address_constant (reg, pic_reg, orig);
8570     }
8571   else if (TARGET_FDPIC
8572            && SYMBOL_REF_P (orig)
8573            && (SYMBOL_REF_FUNCTION_P (orig)
8574                || !is_readonly))
8575     {
8576       /* We use the GOTOFF relocation.  */
8577       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8578
8579       rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8580       emit_insn (gen_movsi (reg, l1));
8581       insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8582     }
8583   else
8584     {
8585       /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8586          PC-relative access.  */
8587       /* We use an UNSPEC rather than a LABEL_REF because this label
8588          never appears in the code stream.  */
8589       labelno = GEN_INT (pic_labelno++);
8590       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8591       l1 = gen_rtx_CONST (VOIDmode, l1);
8592
8593       /* On the ARM the PC register contains 'dot + 8' at the time of the
8594          addition, on the Thumb it is 'dot + 4'.  */
8595       offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8596       offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8597                                    UNSPEC_SYMBOL_OFFSET);
8598       offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8599
8600       insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8601                                                    labelno));
8602     }
8603
8604   return insn;
8605 }
8606
8607 /* Return nonzero if X is valid as an ARM state addressing register.  */
8608 static int
8609 arm_address_register_rtx_p (rtx x, int strict_p)
8610 {
8611   int regno;
8612
8613   if (!REG_P (x))
8614     return 0;
8615
8616   regno = REGNO (x);
8617
8618   if (strict_p)
8619     return ARM_REGNO_OK_FOR_BASE_P (regno);
8620
8621   return (regno <= LAST_ARM_REGNUM
8622           || regno >= FIRST_PSEUDO_REGISTER
8623           || regno == FRAME_POINTER_REGNUM
8624           || regno == ARG_POINTER_REGNUM);
8625 }
8626
8627 /* Return TRUE if this rtx is the difference of a symbol and a label,
8628    and will reduce to a PC-relative relocation in the object file.
8629    Expressions like this can be left alone when generating PIC, rather
8630    than forced through the GOT.  */
8631 static int
8632 pcrel_constant_p (rtx x)
8633 {
8634   if (GET_CODE (x) == MINUS)
8635     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8636
8637   return FALSE;
8638 }
8639
8640 /* Return true if X will surely end up in an index register after next
8641    splitting pass.  */
8642 static bool
8643 will_be_in_index_register (const_rtx x)
8644 {
8645   /* arm.md: calculate_pic_address will split this into a register.  */
8646   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8647 }
8648
8649 /* Return nonzero if X is a valid ARM state address operand.  */
8650 int
8651 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8652                                 int strict_p)
8653 {
8654   bool use_ldrd;
8655   enum rtx_code code = GET_CODE (x);
8656
8657   if (arm_address_register_rtx_p (x, strict_p))
8658     return 1;
8659
8660   use_ldrd = (TARGET_LDRD
8661               && (mode == DImode || mode == DFmode));
8662
8663   if (code == POST_INC || code == PRE_DEC
8664       || ((code == PRE_INC || code == POST_DEC)
8665           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8666     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8667
8668   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8669            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8670            && GET_CODE (XEXP (x, 1)) == PLUS
8671            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8672     {
8673       rtx addend = XEXP (XEXP (x, 1), 1);
8674
8675       /* Don't allow ldrd post increment by register because it's hard
8676          to fixup invalid register choices.  */
8677       if (use_ldrd
8678           && GET_CODE (x) == POST_MODIFY
8679           && REG_P (addend))
8680         return 0;
8681
8682       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8683               && arm_legitimate_index_p (mode, addend, outer, strict_p));
8684     }
8685
8686   /* After reload constants split into minipools will have addresses
8687      from a LABEL_REF.  */
8688   else if (reload_completed
8689            && (code == LABEL_REF
8690                || (code == CONST
8691                    && GET_CODE (XEXP (x, 0)) == PLUS
8692                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8693                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8694     return 1;
8695
8696   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8697     return 0;
8698
8699   else if (code == PLUS)
8700     {
8701       rtx xop0 = XEXP (x, 0);
8702       rtx xop1 = XEXP (x, 1);
8703
8704       return ((arm_address_register_rtx_p (xop0, strict_p)
8705                && ((CONST_INT_P (xop1)
8706                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8707                    || (!strict_p && will_be_in_index_register (xop1))))
8708               || (arm_address_register_rtx_p (xop1, strict_p)
8709                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8710     }
8711
8712 #if 0
8713   /* Reload currently can't handle MINUS, so disable this for now */
8714   else if (GET_CODE (x) == MINUS)
8715     {
8716       rtx xop0 = XEXP (x, 0);
8717       rtx xop1 = XEXP (x, 1);
8718
8719       return (arm_address_register_rtx_p (xop0, strict_p)
8720               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8721     }
8722 #endif
8723
8724   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8725            && code == SYMBOL_REF
8726            && CONSTANT_POOL_ADDRESS_P (x)
8727            && ! (flag_pic
8728                  && symbol_mentioned_p (get_pool_constant (x))
8729                  && ! pcrel_constant_p (get_pool_constant (x))))
8730     return 1;
8731
8732   return 0;
8733 }
8734
8735 /* Return true if we can avoid creating a constant pool entry for x.  */
8736 static bool
8737 can_avoid_literal_pool_for_label_p (rtx x)
8738 {
8739   /* Normally we can assign constant values to target registers without
8740      the help of constant pool.  But there are cases we have to use constant
8741      pool like:
8742      1) assign a label to register.
8743      2) sign-extend a 8bit value to 32bit and then assign to register.
8744
8745      Constant pool access in format:
8746      (set (reg r0) (mem (symbol_ref (".LC0"))))
8747      will cause the use of literal pool (later in function arm_reorg).
8748      So here we mark such format as an invalid format, then the compiler
8749      will adjust it into:
8750      (set (reg r0) (symbol_ref (".LC0")))
8751      (set (reg r0) (mem (reg r0))).
8752      No extra register is required, and (mem (reg r0)) won't cause the use
8753      of literal pools.  */
8754   if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8755       && CONSTANT_POOL_ADDRESS_P (x))
8756     return 1;
8757   return 0;
8758 }
8759
8760
8761 /* Return nonzero if X is a valid Thumb-2 address operand.  */
8762 static int
8763 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8764 {
8765   bool use_ldrd;
8766   enum rtx_code code = GET_CODE (x);
8767
8768   /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8769      can store and load it like any other 16-bit value.  */
8770   if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode))
8771     mode = HImode;
8772
8773   if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8774     return mve_vector_mem_operand (mode, x, strict_p);
8775
8776   if (arm_address_register_rtx_p (x, strict_p))
8777     return 1;
8778
8779   use_ldrd = (TARGET_LDRD
8780               && (mode == DImode || mode == DFmode));
8781
8782   if (code == POST_INC || code == PRE_DEC
8783       || ((code == PRE_INC || code == POST_DEC)
8784           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8785     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8786
8787   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8788            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8789            && GET_CODE (XEXP (x, 1)) == PLUS
8790            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8791     {
8792       /* Thumb-2 only has autoincrement by constant.  */
8793       rtx addend = XEXP (XEXP (x, 1), 1);
8794       HOST_WIDE_INT offset;
8795
8796       if (!CONST_INT_P (addend))
8797         return 0;
8798
8799       offset = INTVAL(addend);
8800       if (GET_MODE_SIZE (mode) <= 4)
8801         return (offset > -256 && offset < 256);
8802
8803       return (use_ldrd && offset > -1024 && offset < 1024
8804               && (offset & 3) == 0);
8805     }
8806
8807   /* After reload constants split into minipools will have addresses
8808      from a LABEL_REF.  */
8809   else if (reload_completed
8810            && (code == LABEL_REF
8811                || (code == CONST
8812                    && GET_CODE (XEXP (x, 0)) == PLUS
8813                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8814                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8815     return 1;
8816
8817   else if (mode == TImode
8818            || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8819            || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8820     return 0;
8821
8822   else if (code == PLUS)
8823     {
8824       rtx xop0 = XEXP (x, 0);
8825       rtx xop1 = XEXP (x, 1);
8826
8827       return ((arm_address_register_rtx_p (xop0, strict_p)
8828                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8829                    || (!strict_p && will_be_in_index_register (xop1))))
8830               || (arm_address_register_rtx_p (xop1, strict_p)
8831                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8832     }
8833
8834   else if (can_avoid_literal_pool_for_label_p (x))
8835     return 0;
8836
8837   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8838            && code == SYMBOL_REF
8839            && CONSTANT_POOL_ADDRESS_P (x)
8840            && ! (flag_pic
8841                  && symbol_mentioned_p (get_pool_constant (x))
8842                  && ! pcrel_constant_p (get_pool_constant (x))))
8843     return 1;
8844
8845   return 0;
8846 }
8847
8848 /* Return nonzero if INDEX is valid for an address index operand in
8849    ARM state.  */
8850 static int
8851 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8852                         int strict_p)
8853 {
8854   HOST_WIDE_INT range;
8855   enum rtx_code code = GET_CODE (index);
8856
8857   /* Standard coprocessor addressing modes.  */
8858   if (TARGET_HARD_FLOAT
8859       && (mode == SFmode || mode == DFmode))
8860     return (code == CONST_INT && INTVAL (index) < 1024
8861             && INTVAL (index) > -1024
8862             && (INTVAL (index) & 3) == 0);
8863
8864   if (arm_address_register_rtx_p (index, strict_p)
8865       && (GET_MODE_SIZE (mode) <= 4))
8866     return 1;
8867
8868   /* This handles DFmode only if !TARGET_HARD_FLOAT.  */
8869   if (mode == DImode || mode == DFmode)
8870     {
8871       if (code == CONST_INT)
8872         {
8873           HOST_WIDE_INT val = INTVAL (index);
8874
8875           /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8876              If vldr is selected it uses arm_coproc_mem_operand.  */
8877           if (TARGET_LDRD)
8878             return val > -256 && val < 256;
8879           else
8880             return val > -4096 && val < 4092;
8881         }
8882
8883       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8884     }
8885
8886   /* For quad modes, we restrict the constant offset to be slightly less
8887      than what the instruction format permits.  We do this because for
8888      quad mode moves, we will actually decompose them into two separate
8889      double-mode reads or writes.  INDEX must therefore be a valid
8890      (double-mode) offset and so should INDEX+8.  */
8891   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8892     return (code == CONST_INT
8893             && INTVAL (index) < 1016
8894             && INTVAL (index) > -1024
8895             && (INTVAL (index) & 3) == 0);
8896
8897   /* We have no such constraint on double mode offsets, so we permit the
8898      full range of the instruction format.  Note DImode is included here.  */
8899   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8900     return (code == CONST_INT
8901             && INTVAL (index) < 1024
8902             && INTVAL (index) > -1024
8903             && (INTVAL (index) & 3) == 0);
8904
8905   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8906     return (code == CONST_INT
8907             && INTVAL (index) < 1024
8908             && INTVAL (index) > -1024
8909             && (INTVAL (index) & 3) == 0);
8910
8911   if (GET_MODE_SIZE (mode) <= 4
8912       && ! (arm_arch4
8913             && (mode == HImode
8914                 || mode == HFmode
8915                 || (mode == QImode && outer == SIGN_EXTEND))))
8916     {
8917       if (code == MULT)
8918         {
8919           rtx xiop0 = XEXP (index, 0);
8920           rtx xiop1 = XEXP (index, 1);
8921
8922           return ((arm_address_register_rtx_p (xiop0, strict_p)
8923                    && power_of_two_operand (xiop1, SImode))
8924                   || (arm_address_register_rtx_p (xiop1, strict_p)
8925                       && power_of_two_operand (xiop0, SImode)));
8926         }
8927       else if (code == LSHIFTRT || code == ASHIFTRT
8928                || code == ASHIFT || code == ROTATERT)
8929         {
8930           rtx op = XEXP (index, 1);
8931
8932           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8933                   && CONST_INT_P (op)
8934                   && INTVAL (op) > 0
8935                   && INTVAL (op) <= 31);
8936         }
8937     }
8938
8939   /* For ARM v4 we may be doing a sign-extend operation during the
8940      load.  */
8941   if (arm_arch4)
8942     {
8943       if (mode == HImode
8944           || mode == HFmode
8945           || (outer == SIGN_EXTEND && mode == QImode))
8946         range = 256;
8947       else
8948         range = 4096;
8949     }
8950   else
8951     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8952
8953   return (code == CONST_INT
8954           && INTVAL (index) < range
8955           && INTVAL (index) > -range);
8956 }
8957
8958 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8959    index operand.  i.e. 1, 2, 4 or 8.  */
8960 static bool
8961 thumb2_index_mul_operand (rtx op)
8962 {
8963   HOST_WIDE_INT val;
8964
8965   if (!CONST_INT_P (op))
8966     return false;
8967
8968   val = INTVAL(op);
8969   return (val == 1 || val == 2 || val == 4 || val == 8);
8970 }
8971
8972 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8973 static int
8974 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8975 {
8976   enum rtx_code code = GET_CODE (index);
8977
8978   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8979   /* Standard coprocessor addressing modes.  */
8980   if (TARGET_VFP_BASE
8981       && (mode == SFmode || mode == DFmode))
8982     return (code == CONST_INT && INTVAL (index) < 1024
8983             /* Thumb-2 allows only > -256 index range for it's core register
8984                load/stores. Since we allow SF/DF in core registers, we have
8985                to use the intersection between -256~4096 (core) and -1024~1024
8986                (coprocessor).  */
8987             && INTVAL (index) > -256
8988             && (INTVAL (index) & 3) == 0);
8989
8990   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8991     {
8992       /* For DImode assume values will usually live in core regs
8993          and only allow LDRD addressing modes.  */
8994       if (!TARGET_LDRD || mode != DImode)
8995         return (code == CONST_INT
8996                 && INTVAL (index) < 1024
8997                 && INTVAL (index) > -1024
8998                 && (INTVAL (index) & 3) == 0);
8999     }
9000
9001   /* For quad modes, we restrict the constant offset to be slightly less
9002      than what the instruction format permits.  We do this because for
9003      quad mode moves, we will actually decompose them into two separate
9004      double-mode reads or writes.  INDEX must therefore be a valid
9005      (double-mode) offset and so should INDEX+8.  */
9006   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
9007     return (code == CONST_INT
9008             && INTVAL (index) < 1016
9009             && INTVAL (index) > -1024
9010             && (INTVAL (index) & 3) == 0);
9011
9012   /* We have no such constraint on double mode offsets, so we permit the
9013      full range of the instruction format.  Note DImode is included here.  */
9014   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
9015     return (code == CONST_INT
9016             && INTVAL (index) < 1024
9017             && INTVAL (index) > -1024
9018             && (INTVAL (index) & 3) == 0);
9019
9020   if (arm_address_register_rtx_p (index, strict_p)
9021       && (GET_MODE_SIZE (mode) <= 4))
9022     return 1;
9023
9024   /* This handles DImode if !TARGET_NEON, and DFmode if !TARGET_VFP_BASE.  */
9025   if (mode == DImode || mode == DFmode)
9026     {
9027       if (code == CONST_INT)
9028         {
9029           HOST_WIDE_INT val = INTVAL (index);
9030           /* Thumb-2 ldrd only has reg+const addressing modes.
9031              Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
9032              If vldr is selected it uses arm_coproc_mem_operand.  */
9033           if (TARGET_LDRD)
9034             return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
9035           else
9036             return IN_RANGE (val, -255, 4095 - 4);
9037         }
9038       else
9039         return 0;
9040     }
9041
9042   if (code == MULT)
9043     {
9044       rtx xiop0 = XEXP (index, 0);
9045       rtx xiop1 = XEXP (index, 1);
9046
9047       return ((arm_address_register_rtx_p (xiop0, strict_p)
9048                && thumb2_index_mul_operand (xiop1))
9049               || (arm_address_register_rtx_p (xiop1, strict_p)
9050                   && thumb2_index_mul_operand (xiop0)));
9051     }
9052   else if (code == ASHIFT)
9053     {
9054       rtx op = XEXP (index, 1);
9055
9056       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
9057               && CONST_INT_P (op)
9058               && INTVAL (op) > 0
9059               && INTVAL (op) <= 3);
9060     }
9061
9062   return (code == CONST_INT
9063           && INTVAL (index) < 4096
9064           && INTVAL (index) > -256);
9065 }
9066
9067 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
9068 static int
9069 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
9070 {
9071   int regno;
9072
9073   if (!REG_P (x))
9074     return 0;
9075
9076   regno = REGNO (x);
9077
9078   if (strict_p)
9079     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
9080
9081   return (regno <= LAST_LO_REGNUM
9082           || regno > LAST_VIRTUAL_REGISTER
9083           || regno == FRAME_POINTER_REGNUM
9084           || (GET_MODE_SIZE (mode) >= 4
9085               && (regno == STACK_POINTER_REGNUM
9086                   || regno >= FIRST_PSEUDO_REGISTER
9087                   || x == hard_frame_pointer_rtx
9088                   || x == arg_pointer_rtx)));
9089 }
9090
9091 /* Return nonzero if x is a legitimate index register.  This is the case
9092    for any base register that can access a QImode object.  */
9093 inline static int
9094 thumb1_index_register_rtx_p (rtx x, int strict_p)
9095 {
9096   return thumb1_base_register_rtx_p (x, QImode, strict_p);
9097 }
9098
9099 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9100
9101    The AP may be eliminated to either the SP or the FP, so we use the
9102    least common denominator, e.g. SImode, and offsets from 0 to 64.
9103
9104    ??? Verify whether the above is the right approach.
9105
9106    ??? Also, the FP may be eliminated to the SP, so perhaps that
9107    needs special handling also.
9108
9109    ??? Look at how the mips16 port solves this problem.  It probably uses
9110    better ways to solve some of these problems.
9111
9112    Although it is not incorrect, we don't accept QImode and HImode
9113    addresses based on the frame pointer or arg pointer until the
9114    reload pass starts.  This is so that eliminating such addresses
9115    into stack based ones won't produce impossible code.  */
9116 int
9117 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
9118 {
9119   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
9120     return 0;
9121
9122   /* ??? Not clear if this is right.  Experiment.  */
9123   if (GET_MODE_SIZE (mode) < 4
9124       && !(reload_in_progress || reload_completed)
9125       && (reg_mentioned_p (frame_pointer_rtx, x)
9126           || reg_mentioned_p (arg_pointer_rtx, x)
9127           || reg_mentioned_p (virtual_incoming_args_rtx, x)
9128           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9129           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9130           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9131     return 0;
9132
9133   /* Accept any base register.  SP only in SImode or larger.  */
9134   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9135     return 1;
9136
9137   /* This is PC relative data before arm_reorg runs.  */
9138   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9139            && SYMBOL_REF_P (x)
9140            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9141            && !arm_disable_literal_pool)
9142     return 1;
9143
9144   /* This is PC relative data after arm_reorg runs.  */
9145   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9146            && reload_completed
9147            && (LABEL_REF_P (x)
9148                || (GET_CODE (x) == CONST
9149                    && GET_CODE (XEXP (x, 0)) == PLUS
9150                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9151                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9152     return 1;
9153
9154   /* Post-inc indexing only supported for SImode and larger.  */
9155   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9156            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9157     return 1;
9158
9159   else if (GET_CODE (x) == PLUS)
9160     {
9161       /* REG+REG address can be any two index registers.  */
9162       /* We disallow FRAME+REG addressing since we know that FRAME
9163          will be replaced with STACK, and SP relative addressing only
9164          permits SP+OFFSET.  */
9165       if (GET_MODE_SIZE (mode) <= 4
9166           && XEXP (x, 0) != frame_pointer_rtx
9167           && XEXP (x, 1) != frame_pointer_rtx
9168           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9169           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9170               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9171         return 1;
9172
9173       /* REG+const has 5-7 bit offset for non-SP registers.  */
9174       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9175                 || XEXP (x, 0) == arg_pointer_rtx)
9176                && CONST_INT_P (XEXP (x, 1))
9177                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9178         return 1;
9179
9180       /* REG+const has 10-bit offset for SP, but only SImode and
9181          larger is supported.  */
9182       /* ??? Should probably check for DI/DFmode overflow here
9183          just like GO_IF_LEGITIMATE_OFFSET does.  */
9184       else if (REG_P (XEXP (x, 0))
9185                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9186                && GET_MODE_SIZE (mode) >= 4
9187                && CONST_INT_P (XEXP (x, 1))
9188                && INTVAL (XEXP (x, 1)) >= 0
9189                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9190                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9191         return 1;
9192
9193       else if (REG_P (XEXP (x, 0))
9194                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9195                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9196                    || VIRTUAL_REGISTER_P (XEXP (x, 0)))
9197                && GET_MODE_SIZE (mode) >= 4
9198                && CONST_INT_P (XEXP (x, 1))
9199                && (INTVAL (XEXP (x, 1)) & 3) == 0)
9200         return 1;
9201     }
9202
9203   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9204            && GET_MODE_SIZE (mode) == 4
9205            && SYMBOL_REF_P (x)
9206            && CONSTANT_POOL_ADDRESS_P (x)
9207            && !arm_disable_literal_pool
9208            && ! (flag_pic
9209                  && symbol_mentioned_p (get_pool_constant (x))
9210                  && ! pcrel_constant_p (get_pool_constant (x))))
9211     return 1;
9212
9213   return 0;
9214 }
9215
9216 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9217    instruction of mode MODE.  */
9218 int
9219 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9220 {
9221   switch (GET_MODE_SIZE (mode))
9222     {
9223     case 1:
9224       return val >= 0 && val < 32;
9225
9226     case 2:
9227       return val >= 0 && val < 64 && (val & 1) == 0;
9228
9229     default:
9230       return (val >= 0
9231               && (val + GET_MODE_SIZE (mode)) <= 128
9232               && (val & 3) == 0);
9233     }
9234 }
9235
9236 bool
9237 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p, code_helper)
9238 {
9239   if (TARGET_ARM)
9240     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9241   else if (TARGET_THUMB2)
9242     return thumb2_legitimate_address_p (mode, x, strict_p);
9243   else /* if (TARGET_THUMB1) */
9244     return thumb1_legitimate_address_p (mode, x, strict_p);
9245 }
9246
9247 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9248
9249    Given an rtx X being reloaded into a reg required to be
9250    in class CLASS, return the class of reg to actually use.
9251    In general this is just CLASS, but for the Thumb core registers and
9252    immediate constants we prefer a LO_REGS class or a subset.  */
9253
9254 static reg_class_t
9255 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9256 {
9257   if (TARGET_32BIT)
9258     return rclass;
9259   else
9260     {
9261       if (rclass == GENERAL_REGS)
9262         return LO_REGS;
9263       else
9264         return rclass;
9265     }
9266 }
9267
9268 /* Build the SYMBOL_REF for __tls_get_addr.  */
9269
9270 static GTY(()) rtx tls_get_addr_libfunc;
9271
9272 static rtx
9273 get_tls_get_addr (void)
9274 {
9275   if (!tls_get_addr_libfunc)
9276     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9277   return tls_get_addr_libfunc;
9278 }
9279
9280 rtx
9281 arm_load_tp (rtx target)
9282 {
9283   if (!target)
9284     target = gen_reg_rtx (SImode);
9285
9286   if (TARGET_HARD_TP)
9287     {
9288       /* Can return in any reg.  */
9289       emit_insn (gen_load_tp_hard (target));
9290     }
9291   else
9292     {
9293       /* Always returned in r0.  Immediately copy the result into a pseudo,
9294          otherwise other uses of r0 (e.g. setting up function arguments) may
9295          clobber the value.  */
9296
9297       rtx tmp;
9298
9299       if (TARGET_FDPIC)
9300         {
9301           rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9302           rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9303
9304           emit_insn (gen_load_tp_soft_fdpic ());
9305
9306           /* Restore r9.  */
9307           emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9308         }
9309       else
9310         emit_insn (gen_load_tp_soft ());
9311
9312       tmp = gen_rtx_REG (SImode, R0_REGNUM);
9313       emit_move_insn (target, tmp);
9314     }
9315   return target;
9316 }
9317
9318 static rtx
9319 load_tls_operand (rtx x, rtx reg)
9320 {
9321   rtx tmp;
9322
9323   if (reg == NULL_RTX)
9324     reg = gen_reg_rtx (SImode);
9325
9326   tmp = gen_rtx_CONST (SImode, x);
9327
9328   emit_move_insn (reg, tmp);
9329
9330   return reg;
9331 }
9332
9333 static rtx_insn *
9334 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9335 {
9336   rtx label, labelno = NULL_RTX, sum;
9337
9338   gcc_assert (reloc != TLS_DESCSEQ);
9339   start_sequence ();
9340
9341   if (TARGET_FDPIC)
9342     {
9343       sum = gen_rtx_UNSPEC (Pmode,
9344                             gen_rtvec (2, x, GEN_INT (reloc)),
9345                             UNSPEC_TLS);
9346     }
9347   else
9348     {
9349       labelno = GEN_INT (pic_labelno++);
9350       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9351       label = gen_rtx_CONST (VOIDmode, label);
9352
9353       sum = gen_rtx_UNSPEC (Pmode,
9354                             gen_rtvec (4, x, GEN_INT (reloc), label,
9355                                        GEN_INT (TARGET_ARM ? 8 : 4)),
9356                             UNSPEC_TLS);
9357     }
9358   reg = load_tls_operand (sum, reg);
9359
9360   if (TARGET_FDPIC)
9361       emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9362   else if (TARGET_ARM)
9363     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9364   else
9365     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9366
9367   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9368                                      LCT_PURE, /* LCT_CONST?  */
9369                                      Pmode, reg, Pmode);
9370
9371   rtx_insn *insns = get_insns ();
9372   end_sequence ();
9373
9374   return insns;
9375 }
9376
9377 static rtx
9378 arm_tls_descseq_addr (rtx x, rtx reg)
9379 {
9380   rtx labelno = GEN_INT (pic_labelno++);
9381   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9382   rtx sum = gen_rtx_UNSPEC (Pmode,
9383                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9384                                        gen_rtx_CONST (VOIDmode, label),
9385                                        GEN_INT (!TARGET_ARM)),
9386                             UNSPEC_TLS);
9387   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9388
9389   emit_insn (gen_tlscall (x, labelno));
9390   if (!reg)
9391     reg = gen_reg_rtx (SImode);
9392   else
9393     gcc_assert (REGNO (reg) != R0_REGNUM);
9394
9395   emit_move_insn (reg, reg0);
9396
9397   return reg;
9398 }
9399
9400
9401 rtx
9402 legitimize_tls_address (rtx x, rtx reg)
9403 {
9404   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9405   rtx_insn *insns;
9406   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9407
9408   switch (model)
9409     {
9410     case TLS_MODEL_GLOBAL_DYNAMIC:
9411       if (TARGET_GNU2_TLS)
9412         {
9413           gcc_assert (!TARGET_FDPIC);
9414
9415           reg = arm_tls_descseq_addr (x, reg);
9416
9417           tp = arm_load_tp (NULL_RTX);
9418
9419           dest = gen_rtx_PLUS (Pmode, tp, reg);
9420         }
9421       else
9422         {
9423           /* Original scheme */
9424           if (TARGET_FDPIC)
9425             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9426           else
9427             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9428           dest = gen_reg_rtx (Pmode);
9429           emit_libcall_block (insns, dest, ret, x);
9430         }
9431       return dest;
9432
9433     case TLS_MODEL_LOCAL_DYNAMIC:
9434       if (TARGET_GNU2_TLS)
9435         {
9436           gcc_assert (!TARGET_FDPIC);
9437
9438           reg = arm_tls_descseq_addr (x, reg);
9439
9440           tp = arm_load_tp (NULL_RTX);
9441
9442           dest = gen_rtx_PLUS (Pmode, tp, reg);
9443         }
9444       else
9445         {
9446           if (TARGET_FDPIC)
9447             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9448           else
9449             insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9450
9451           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9452              share the LDM result with other LD model accesses.  */
9453           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9454                                 UNSPEC_TLS);
9455           dest = gen_reg_rtx (Pmode);
9456           emit_libcall_block (insns, dest, ret, eqv);
9457
9458           /* Load the addend.  */
9459           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9460                                                      GEN_INT (TLS_LDO32)),
9461                                    UNSPEC_TLS);
9462           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9463           dest = gen_rtx_PLUS (Pmode, dest, addend);
9464         }
9465       return dest;
9466
9467     case TLS_MODEL_INITIAL_EXEC:
9468       if (TARGET_FDPIC)
9469         {
9470           sum = gen_rtx_UNSPEC (Pmode,
9471                                 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9472                                 UNSPEC_TLS);
9473           reg = load_tls_operand (sum, reg);
9474           emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9475           emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9476         }
9477       else
9478         {
9479           labelno = GEN_INT (pic_labelno++);
9480           label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9481           label = gen_rtx_CONST (VOIDmode, label);
9482           sum = gen_rtx_UNSPEC (Pmode,
9483                                 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9484                                            GEN_INT (TARGET_ARM ? 8 : 4)),
9485                                 UNSPEC_TLS);
9486           reg = load_tls_operand (sum, reg);
9487
9488           if (TARGET_ARM)
9489             emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9490           else if (TARGET_THUMB2)
9491             emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9492           else
9493             {
9494               emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9495               emit_move_insn (reg, gen_const_mem (SImode, reg));
9496             }
9497         }
9498
9499       tp = arm_load_tp (NULL_RTX);
9500
9501       return gen_rtx_PLUS (Pmode, tp, reg);
9502
9503     case TLS_MODEL_LOCAL_EXEC:
9504       tp = arm_load_tp (NULL_RTX);
9505
9506       reg = gen_rtx_UNSPEC (Pmode,
9507                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9508                             UNSPEC_TLS);
9509       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9510
9511       return gen_rtx_PLUS (Pmode, tp, reg);
9512
9513     default:
9514       abort ();
9515     }
9516 }
9517
9518 /* Try machine-dependent ways of modifying an illegitimate address
9519    to be legitimate.  If we find one, return the new, valid address.  */
9520 rtx
9521 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9522 {
9523   if (arm_tls_referenced_p (x))
9524     {
9525       rtx addend = NULL;
9526
9527       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9528         {
9529           addend = XEXP (XEXP (x, 0), 1);
9530           x = XEXP (XEXP (x, 0), 0);
9531         }
9532
9533       if (!SYMBOL_REF_P (x))
9534         return x;
9535
9536       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9537
9538       x = legitimize_tls_address (x, NULL_RTX);
9539
9540       if (addend)
9541         {
9542           x = gen_rtx_PLUS (SImode, x, addend);
9543           orig_x = x;
9544         }
9545       else
9546         return x;
9547     }
9548
9549   if (TARGET_THUMB1)
9550     return thumb_legitimize_address (x, orig_x, mode);
9551
9552   if (GET_CODE (x) == PLUS)
9553     {
9554       rtx xop0 = XEXP (x, 0);
9555       rtx xop1 = XEXP (x, 1);
9556
9557       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9558         xop0 = force_reg (SImode, xop0);
9559
9560       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9561           && !symbol_mentioned_p (xop1))
9562         xop1 = force_reg (SImode, xop1);
9563
9564       if (ARM_BASE_REGISTER_RTX_P (xop0)
9565           && CONST_INT_P (xop1))
9566         {
9567           HOST_WIDE_INT n, low_n;
9568           rtx base_reg, val;
9569           n = INTVAL (xop1);
9570
9571           /* VFP addressing modes actually allow greater offsets, but for
9572              now we just stick with the lowest common denominator.  */
9573           if (mode == DImode || mode == DFmode)
9574             {
9575               low_n = n & 0x0f;
9576               n &= ~0x0f;
9577               if (low_n > 4)
9578                 {
9579                   n += 16;
9580                   low_n -= 16;
9581                 }
9582             }
9583           else
9584             {
9585               low_n = ((mode) == TImode ? 0
9586                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9587               n -= low_n;
9588             }
9589
9590           base_reg = gen_reg_rtx (SImode);
9591           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9592           emit_move_insn (base_reg, val);
9593           x = plus_constant (Pmode, base_reg, low_n);
9594         }
9595       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9596         x = gen_rtx_PLUS (SImode, xop0, xop1);
9597     }
9598
9599   /* XXX We don't allow MINUS any more -- see comment in
9600      arm_legitimate_address_outer_p ().  */
9601   else if (GET_CODE (x) == MINUS)
9602     {
9603       rtx xop0 = XEXP (x, 0);
9604       rtx xop1 = XEXP (x, 1);
9605
9606       if (CONSTANT_P (xop0))
9607         xop0 = force_reg (SImode, xop0);
9608
9609       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9610         xop1 = force_reg (SImode, xop1);
9611
9612       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9613         x = gen_rtx_MINUS (SImode, xop0, xop1);
9614     }
9615
9616   /* Make sure to take full advantage of the pre-indexed addressing mode
9617      with absolute addresses which often allows for the base register to
9618      be factorized for multiple adjacent memory references, and it might
9619      even allows for the mini pool to be avoided entirely. */
9620   else if (CONST_INT_P (x) && optimize > 0)
9621     {
9622       unsigned int bits;
9623       HOST_WIDE_INT mask, base, index;
9624       rtx base_reg;
9625
9626       /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9627          only use a 8-bit index. So let's use a 12-bit index for
9628          SImode only and hope that arm_gen_constant will enable LDRB
9629          to use more bits. */
9630       bits = (mode == SImode) ? 12 : 8;
9631       mask = (1 << bits) - 1;
9632       base = INTVAL (x) & ~mask;
9633       index = INTVAL (x) & mask;
9634       if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9635         {
9636           /* It'll most probably be more efficient to generate the
9637              base with more bits set and use a negative index instead.
9638              Don't do this for Thumb as negative offsets are much more
9639              limited.  */
9640           base |= mask;
9641           index -= mask;
9642         }
9643       base_reg = force_reg (SImode, GEN_INT (base));
9644       x = plus_constant (Pmode, base_reg, index);
9645     }
9646
9647   if (flag_pic)
9648     {
9649       /* We need to find and carefully transform any SYMBOL and LABEL
9650          references; so go back to the original address expression.  */
9651       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9652                                           false /*compute_now*/);
9653
9654       if (new_x != orig_x)
9655         x = new_x;
9656     }
9657
9658   return x;
9659 }
9660
9661
9662 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9663    to be legitimate.  If we find one, return the new, valid address.  */
9664 rtx
9665 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9666 {
9667   if (GET_CODE (x) == PLUS
9668       && CONST_INT_P (XEXP (x, 1))
9669       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9670           || INTVAL (XEXP (x, 1)) < 0))
9671     {
9672       rtx xop0 = XEXP (x, 0);
9673       rtx xop1 = XEXP (x, 1);
9674       HOST_WIDE_INT offset = INTVAL (xop1);
9675
9676       /* Try and fold the offset into a biasing of the base register and
9677          then offsetting that.  Don't do this when optimizing for space
9678          since it can cause too many CSEs.  */
9679       if (optimize_size && offset >= 0
9680           && offset < 256 + 31 * GET_MODE_SIZE (mode))
9681         {
9682           HOST_WIDE_INT delta;
9683
9684           if (offset >= 256)
9685             delta = offset - (256 - GET_MODE_SIZE (mode));
9686           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9687             delta = 31 * GET_MODE_SIZE (mode);
9688           else
9689             delta = offset & (~31 * GET_MODE_SIZE (mode));
9690
9691           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9692                                 NULL_RTX);
9693           x = plus_constant (Pmode, xop0, delta);
9694         }
9695       else if (offset < 0 && offset > -256)
9696         /* Small negative offsets are best done with a subtract before the
9697            dereference, forcing these into a register normally takes two
9698            instructions.  */
9699         x = force_operand (x, NULL_RTX);
9700       else
9701         {
9702           /* For the remaining cases, force the constant into a register.  */
9703           xop1 = force_reg (SImode, xop1);
9704           x = gen_rtx_PLUS (SImode, xop0, xop1);
9705         }
9706     }
9707   else if (GET_CODE (x) == PLUS
9708            && s_register_operand (XEXP (x, 1), SImode)
9709            && !s_register_operand (XEXP (x, 0), SImode))
9710     {
9711       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9712
9713       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9714     }
9715
9716   if (flag_pic)
9717     {
9718       /* We need to find and carefully transform any SYMBOL and LABEL
9719          references; so go back to the original address expression.  */
9720       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9721                                           false /*compute_now*/);
9722
9723       if (new_x != orig_x)
9724         x = new_x;
9725     }
9726
9727   return x;
9728 }
9729
9730 /* Return TRUE if X contains any TLS symbol references.  */
9731
9732 bool
9733 arm_tls_referenced_p (rtx x)
9734 {
9735   if (! TARGET_HAVE_TLS)
9736     return false;
9737
9738   subrtx_iterator::array_type array;
9739   FOR_EACH_SUBRTX (iter, array, x, ALL)
9740     {
9741       const_rtx x = *iter;
9742       if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9743         {
9744           /* ARM currently does not provide relocations to encode TLS variables
9745              into AArch32 instructions, only data, so there is no way to
9746              currently implement these if a literal pool is disabled.  */
9747           if (arm_disable_literal_pool)
9748             sorry ("accessing thread-local storage is not currently supported "
9749                    "with %<-mpure-code%> or %<-mslow-flash-data%>");
9750
9751           return true;
9752         }
9753
9754       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9755          TLS offsets, not real symbol references.  */
9756       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9757         iter.skip_subrtxes ();
9758     }
9759   return false;
9760 }
9761
9762 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9763
9764    On the ARM, allow any integer (invalid ones are removed later by insn
9765    patterns), nice doubles and symbol_refs which refer to the function's
9766    constant pool XXX.
9767
9768    When generating pic allow anything.  */
9769
9770 static bool
9771 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9772 {
9773   if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9774     return false;
9775
9776   return flag_pic || !label_mentioned_p (x);
9777 }
9778
9779 static bool
9780 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9781 {
9782   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9783      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
9784      for ARMv8-M Baseline or later the result is valid.  */
9785   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9786     x = XEXP (x, 0);
9787
9788   return (CONST_INT_P (x)
9789           || CONST_DOUBLE_P (x)
9790           || CONSTANT_ADDRESS_P (x)
9791           || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9792           /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9793              we build the symbol address with upper/lower
9794              relocations.  */
9795           || (TARGET_THUMB1
9796               && !label_mentioned_p (x)
9797               && arm_valid_symbolic_address_p (x)
9798               && arm_disable_literal_pool)
9799           || flag_pic);
9800 }
9801
9802 static bool
9803 arm_legitimate_constant_p (machine_mode mode, rtx x)
9804 {
9805   return (!arm_cannot_force_const_mem (mode, x)
9806           && (TARGET_32BIT
9807               ? arm_legitimate_constant_p_1 (mode, x)
9808               : thumb_legitimate_constant_p (mode, x)));
9809 }
9810
9811 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9812
9813 static bool
9814 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9815 {
9816   rtx base, offset;
9817   split_const (x, &base, &offset);
9818
9819   if (SYMBOL_REF_P (base))
9820     {
9821       /* Function symbols cannot have an offset due to the Thumb bit.  */
9822       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9823           && INTVAL (offset) != 0)
9824         return true;
9825
9826       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9827           && !offset_within_block_p (base, INTVAL (offset)))
9828         return true;
9829     }
9830   return arm_tls_referenced_p (x);
9831 }
9832 \f
9833 #define REG_OR_SUBREG_REG(X)                                            \
9834   (REG_P (X)                                                    \
9835    || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9836
9837 #define REG_OR_SUBREG_RTX(X)                    \
9838    (REG_P (X) ? (X) : SUBREG_REG (X))
9839
9840 static inline int
9841 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9842 {
9843   machine_mode mode = GET_MODE (x);
9844   int total, words;
9845
9846   switch (code)
9847     {
9848     case ASHIFT:
9849     case ASHIFTRT:
9850     case LSHIFTRT:
9851     case ROTATERT:
9852       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9853
9854     case PLUS:
9855     case MINUS:
9856     case COMPARE:
9857     case NEG:
9858     case NOT:
9859       return COSTS_N_INSNS (1);
9860
9861     case MULT:
9862       if (arm_arch6m && arm_m_profile_small_mul)
9863         return COSTS_N_INSNS (32);
9864
9865       if (CONST_INT_P (XEXP (x, 1)))
9866         {
9867           int cycles = 0;
9868           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9869
9870           while (i)
9871             {
9872               i >>= 2;
9873               cycles++;
9874             }
9875           return COSTS_N_INSNS (2) + cycles;
9876         }
9877       return COSTS_N_INSNS (1) + 16;
9878
9879     case SET:
9880       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9881          the mode.  */
9882       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9883       return (COSTS_N_INSNS (words)
9884               + 4 * ((MEM_P (SET_SRC (x)))
9885                      + MEM_P (SET_DEST (x))));
9886
9887     case CONST_INT:
9888       if (outer == SET)
9889         {
9890           if (UINTVAL (x) < 256
9891               /* 16-bit constant.  */
9892               || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9893             return 0;
9894           if (thumb_shiftable_const (INTVAL (x)))
9895             return COSTS_N_INSNS (2);
9896           return arm_disable_literal_pool
9897             ? COSTS_N_INSNS (8)
9898             : COSTS_N_INSNS (3);
9899         }
9900       else if ((outer == PLUS || outer == COMPARE)
9901                && INTVAL (x) < 256 && INTVAL (x) > -256)
9902         return 0;
9903       else if ((outer == IOR || outer == XOR || outer == AND)
9904                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9905         return COSTS_N_INSNS (1);
9906       else if (outer == AND)
9907         {
9908           int i;
9909           /* This duplicates the tests in the andsi3 expander.  */
9910           for (i = 9; i <= 31; i++)
9911             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9912                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9913               return COSTS_N_INSNS (2);
9914         }
9915       else if (outer == ASHIFT || outer == ASHIFTRT
9916                || outer == LSHIFTRT)
9917         return 0;
9918       return COSTS_N_INSNS (2);
9919
9920     case CONST:
9921     case CONST_DOUBLE:
9922     case LABEL_REF:
9923     case SYMBOL_REF:
9924       return COSTS_N_INSNS (3);
9925
9926     case UDIV:
9927     case UMOD:
9928     case DIV:
9929     case MOD:
9930       return 100;
9931
9932     case TRUNCATE:
9933       return 99;
9934
9935     case AND:
9936     case XOR:
9937     case IOR:
9938       /* XXX guess.  */
9939       return 8;
9940
9941     case MEM:
9942       /* XXX another guess.  */
9943       /* Memory costs quite a lot for the first word, but subsequent words
9944          load at the equivalent of a single insn each.  */
9945       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9946               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9947                  ? 4 : 0));
9948
9949     case IF_THEN_ELSE:
9950       /* XXX a guess.  */
9951       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9952         return 14;
9953       return 2;
9954
9955     case SIGN_EXTEND:
9956     case ZERO_EXTEND:
9957       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9958       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9959
9960       if (mode == SImode)
9961         return total;
9962
9963       if (arm_arch6)
9964         return total + COSTS_N_INSNS (1);
9965
9966       /* Assume a two-shift sequence.  Increase the cost slightly so
9967          we prefer actual shifts over an extend operation.  */
9968       return total + 1 + COSTS_N_INSNS (2);
9969
9970     default:
9971       return 99;
9972     }
9973 }
9974
9975 /* Estimates the size cost of thumb1 instructions.
9976    For now most of the code is copied from thumb1_rtx_costs. We need more
9977    fine grain tuning when we have more related test cases.  */
9978 static inline int
9979 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9980 {
9981   machine_mode mode = GET_MODE (x);
9982   int words, cost;
9983
9984   switch (code)
9985     {
9986     case ASHIFT:
9987     case ASHIFTRT:
9988     case LSHIFTRT:
9989     case ROTATERT:
9990       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9991
9992     case PLUS:
9993     case MINUS:
9994       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9995          defined by RTL expansion, especially for the expansion of
9996          multiplication.  */
9997       if ((GET_CODE (XEXP (x, 0)) == MULT
9998            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9999           || (GET_CODE (XEXP (x, 1)) == MULT
10000               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
10001         return COSTS_N_INSNS (2);
10002       /* Fall through.  */
10003     case COMPARE:
10004     case NEG:
10005     case NOT:
10006       return COSTS_N_INSNS (1);
10007
10008     case MULT:
10009       if (CONST_INT_P (XEXP (x, 1)))
10010         {
10011           /* Thumb1 mul instruction can't operate on const. We must Load it
10012              into a register first.  */
10013           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
10014           /* For the targets which have a very small and high-latency multiply
10015              unit, we prefer to synthesize the mult with up to 5 instructions,
10016              giving a good balance between size and performance.  */
10017           if (arm_arch6m && arm_m_profile_small_mul)
10018             return COSTS_N_INSNS (5);
10019           else
10020             return COSTS_N_INSNS (1) + const_size;
10021         }
10022       return COSTS_N_INSNS (1);
10023
10024     case SET:
10025       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
10026          the mode.  */
10027       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
10028       cost = COSTS_N_INSNS (words);
10029       if (satisfies_constraint_J (SET_SRC (x))
10030           || satisfies_constraint_K (SET_SRC (x))
10031              /* Too big an immediate for a 2-byte mov, using MOVT.  */
10032           || (CONST_INT_P (SET_SRC (x))
10033               && UINTVAL (SET_SRC (x)) >= 256
10034               && TARGET_HAVE_MOVT
10035               && satisfies_constraint_j (SET_SRC (x)))
10036              /* thumb1_movdi_insn.  */
10037           || ((words > 1) && MEM_P (SET_SRC (x))))
10038         cost += COSTS_N_INSNS (1);
10039       return cost;
10040
10041     case CONST_INT:
10042       if (outer == SET)
10043         {
10044           if (UINTVAL (x) < 256)
10045             return COSTS_N_INSNS (1);
10046           /* movw is 4byte long.  */
10047           if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
10048             return COSTS_N_INSNS (2);
10049           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
10050           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
10051             return COSTS_N_INSNS (2);
10052           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
10053           if (thumb_shiftable_const (INTVAL (x)))
10054             return COSTS_N_INSNS (2);
10055           return arm_disable_literal_pool
10056             ? COSTS_N_INSNS (8)
10057             : COSTS_N_INSNS (3);
10058         }
10059       else if ((outer == PLUS || outer == COMPARE)
10060                && INTVAL (x) < 256 && INTVAL (x) > -256)
10061         return 0;
10062       else if ((outer == IOR || outer == XOR || outer == AND)
10063                && INTVAL (x) < 256 && INTVAL (x) >= -256)
10064         return COSTS_N_INSNS (1);
10065       else if (outer == AND)
10066         {
10067           int i;
10068           /* This duplicates the tests in the andsi3 expander.  */
10069           for (i = 9; i <= 31; i++)
10070             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
10071                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
10072               return COSTS_N_INSNS (2);
10073         }
10074       else if (outer == ASHIFT || outer == ASHIFTRT
10075                || outer == LSHIFTRT)
10076         return 0;
10077       return COSTS_N_INSNS (2);
10078
10079     case CONST:
10080     case CONST_DOUBLE:
10081     case LABEL_REF:
10082     case SYMBOL_REF:
10083       return COSTS_N_INSNS (3);
10084
10085     case UDIV:
10086     case UMOD:
10087     case DIV:
10088     case MOD:
10089       return 100;
10090
10091     case TRUNCATE:
10092       return 99;
10093
10094     case AND:
10095     case XOR:
10096     case IOR:
10097       return COSTS_N_INSNS (1);
10098
10099     case MEM:
10100       return (COSTS_N_INSNS (1)
10101               + COSTS_N_INSNS (1)
10102                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
10103               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
10104                  ? COSTS_N_INSNS (1) : 0));
10105
10106     case IF_THEN_ELSE:
10107       /* XXX a guess.  */
10108       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10109         return 14;
10110       return 2;
10111
10112     case ZERO_EXTEND:
10113       /* XXX still guessing.  */
10114       switch (GET_MODE (XEXP (x, 0)))
10115         {
10116           case E_QImode:
10117             return (1 + (mode == DImode ? 4 : 0)
10118                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10119
10120           case E_HImode:
10121             return (4 + (mode == DImode ? 4 : 0)
10122                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10123
10124           case E_SImode:
10125             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10126
10127           default:
10128             return 99;
10129         }
10130
10131     default:
10132       return 99;
10133     }
10134 }
10135
10136 /* Helper function for arm_rtx_costs.  If one operand of the OP, a
10137    PLUS, adds the carry flag, then return the other operand.  If
10138    neither is a carry, return OP unchanged.  */
10139 static rtx
10140 strip_carry_operation (rtx op)
10141 {
10142   gcc_assert (GET_CODE (op) == PLUS);
10143   if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10144     return XEXP (op, 1);
10145   else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10146     return XEXP (op, 0);
10147   return op;
10148 }
10149
10150 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
10151    operand, then return the operand that is being shifted.  If the shift
10152    is not by a constant, then set SHIFT_REG to point to the operand.
10153    Return NULL if OP is not a shifter operand.  */
10154 static rtx
10155 shifter_op_p (rtx op, rtx *shift_reg)
10156 {
10157   enum rtx_code code = GET_CODE (op);
10158
10159   if (code == MULT && CONST_INT_P (XEXP (op, 1))
10160       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10161     return XEXP (op, 0);
10162   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10163     return XEXP (op, 0);
10164   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10165            || code == ASHIFTRT)
10166     {
10167       if (!CONST_INT_P (XEXP (op, 1)))
10168         *shift_reg = XEXP (op, 1);
10169       return XEXP (op, 0);
10170     }
10171
10172   return NULL;
10173 }
10174
10175 static bool
10176 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10177 {
10178   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10179   rtx_code code = GET_CODE (x);
10180   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10181
10182   switch (XINT (x, 1))
10183     {
10184     case UNSPEC_UNALIGNED_LOAD:
10185       /* We can only do unaligned loads into the integer unit, and we can't
10186          use LDM or LDRD.  */
10187       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10188       if (speed_p)
10189         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10190                   + extra_cost->ldst.load_unaligned);
10191
10192 #ifdef NOT_YET
10193       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10194                                  ADDR_SPACE_GENERIC, speed_p);
10195 #endif
10196       return true;
10197
10198     case UNSPEC_UNALIGNED_STORE:
10199       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10200       if (speed_p)
10201         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10202                   + extra_cost->ldst.store_unaligned);
10203
10204       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10205 #ifdef NOT_YET
10206       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10207                                  ADDR_SPACE_GENERIC, speed_p);
10208 #endif
10209       return true;
10210
10211     case UNSPEC_VRINTZ:
10212     case UNSPEC_VRINTP:
10213     case UNSPEC_VRINTM:
10214     case UNSPEC_VRINTR:
10215     case UNSPEC_VRINTX:
10216     case UNSPEC_VRINTA:
10217       if (speed_p)
10218         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10219
10220       return true;
10221     default:
10222       *cost = COSTS_N_INSNS (2);
10223       break;
10224     }
10225   return true;
10226 }
10227
10228 /* Cost of a libcall.  We assume one insn per argument, an amount for the
10229    call (one insn for -Os) and then one for processing the result.  */
10230 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10231
10232 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
10233         do                                                              \
10234           {                                                             \
10235             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
10236             if (shift_op != NULL                                        \
10237                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
10238               {                                                         \
10239                 if (shift_reg)                                          \
10240                   {                                                     \
10241                     if (speed_p)                                        \
10242                       *cost += extra_cost->alu.arith_shift_reg;         \
10243                     *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10244                                        ASHIFT, 1, speed_p);             \
10245                   }                                                     \
10246                 else if (speed_p)                                       \
10247                   *cost += extra_cost->alu.arith_shift;                 \
10248                                                                         \
10249                 *cost += (rtx_cost (shift_op, GET_MODE (shift_op),      \
10250                                     ASHIFT, 0, speed_p)                 \
10251                           + rtx_cost (XEXP (x, 1 - IDX),                \
10252                                       GET_MODE (shift_op),              \
10253                                       OP, 1, speed_p));                 \
10254                 return true;                                            \
10255               }                                                         \
10256           }                                                             \
10257         while (0)
10258
10259 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
10260    considering the costs of the addressing mode and memory access
10261    separately.  */
10262 static bool
10263 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10264                int *cost, bool speed_p)
10265 {
10266   machine_mode mode = GET_MODE (x);
10267
10268   *cost = COSTS_N_INSNS (1);
10269
10270   if (flag_pic
10271       && GET_CODE (XEXP (x, 0)) == PLUS
10272       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10273     /* This will be split into two instructions.  Add the cost of the
10274        additional instruction here.  The cost of the memory access is computed
10275        below.  See arm.md:calculate_pic_address.  */
10276     *cost += COSTS_N_INSNS (1);
10277
10278   /* Calculate cost of the addressing mode.  */
10279   if (speed_p)
10280     {
10281       arm_addr_mode_op op_type;
10282       switch (GET_CODE (XEXP (x, 0)))
10283         {
10284         default:
10285         case REG:
10286           op_type = AMO_DEFAULT;
10287           break;
10288         case MINUS:
10289           /* MINUS does not appear in RTL, but the architecture supports it,
10290              so handle this case defensively.  */
10291           /* fall through */
10292         case PLUS:
10293           op_type = AMO_NO_WB;
10294           break;
10295         case PRE_INC:
10296         case PRE_DEC:
10297         case POST_INC:
10298         case POST_DEC:
10299         case PRE_MODIFY:
10300         case POST_MODIFY:
10301           op_type = AMO_WB;
10302           break;
10303         }
10304
10305       if (VECTOR_MODE_P (mode))
10306           *cost += current_tune->addr_mode_costs->vector[op_type];
10307       else if (FLOAT_MODE_P (mode))
10308           *cost += current_tune->addr_mode_costs->fp[op_type];
10309       else
10310           *cost += current_tune->addr_mode_costs->integer[op_type];
10311     }
10312
10313   /* Calculate cost of memory access.  */
10314   if (speed_p)
10315     {
10316       if (FLOAT_MODE_P (mode))
10317         {
10318           if (GET_MODE_SIZE (mode) == 8)
10319             *cost += extra_cost->ldst.loadd;
10320           else
10321             *cost += extra_cost->ldst.loadf;
10322         }
10323       else if (VECTOR_MODE_P (mode))
10324         *cost += extra_cost->ldst.loadv;
10325       else
10326         {
10327           /* Integer modes */
10328           if (GET_MODE_SIZE (mode) == 8)
10329             *cost += extra_cost->ldst.ldrd;
10330           else
10331             *cost += extra_cost->ldst.load;
10332         }
10333     }
10334
10335   return true;
10336 }
10337
10338 /* Helper for arm_bfi_p.  */
10339 static bool
10340 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10341 {
10342   unsigned HOST_WIDE_INT const1;
10343   unsigned HOST_WIDE_INT const2 = 0;
10344
10345   if (!CONST_INT_P (XEXP (op0, 1)))
10346     return false;
10347
10348   const1 = UINTVAL (XEXP (op0, 1));
10349   if (!CONST_INT_P (XEXP (op1, 1))
10350       || ~UINTVAL (XEXP (op1, 1)) != const1)
10351     return false;
10352
10353   if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10354       && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10355     {
10356       const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10357       *sub0 = XEXP (XEXP (op0, 0), 0);
10358     }
10359   else
10360     *sub0 = XEXP (op0, 0);
10361
10362   if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10363     return false;
10364
10365   *sub1 = XEXP (op1, 0);
10366   return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10367 }
10368
10369 /* Recognize a BFI idiom.  Helper for arm_rtx_costs_internal.  The
10370    format looks something like:
10371
10372    (IOR (AND (reg1) (~const1))
10373         (AND (ASHIFT (reg2) (const2))
10374              (const1)))
10375
10376    where const1 is a consecutive sequence of 1-bits with the
10377    least-significant non-zero bit starting at bit position const2.  If
10378    const2 is zero, then the shift will not appear at all, due to
10379    canonicalization.  The two arms of the IOR expression may be
10380    flipped.  */
10381 static bool
10382 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10383 {
10384   if (GET_CODE (x) != IOR)
10385     return false;
10386   if (GET_CODE (XEXP (x, 0)) != AND
10387       || GET_CODE (XEXP (x, 1)) != AND)
10388     return false;
10389   return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10390           || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10391 }
10392
10393 /* RTX costs.  Make an estimate of the cost of executing the operation
10394    X, which is contained within an operation with code OUTER_CODE.
10395    SPEED_P indicates whether the cost desired is the performance cost,
10396    or the size cost.  The estimate is stored in COST and the return
10397    value is TRUE if the cost calculation is final, or FALSE if the
10398    caller should recurse through the operands of X to add additional
10399    costs.
10400
10401    We currently make no attempt to model the size savings of Thumb-2
10402    16-bit instructions.  At the normal points in compilation where
10403    this code is called we have no measure of whether the condition
10404    flags are live or not, and thus no realistic way to determine what
10405    the size will eventually be.  */
10406 static bool
10407 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10408                    const struct cpu_cost_table *extra_cost,
10409                    int *cost, bool speed_p)
10410 {
10411   machine_mode mode = GET_MODE (x);
10412
10413   *cost = COSTS_N_INSNS (1);
10414
10415   if (TARGET_THUMB1)
10416     {
10417       if (speed_p)
10418         *cost = thumb1_rtx_costs (x, code, outer_code);
10419       else
10420         *cost = thumb1_size_rtx_costs (x, code, outer_code);
10421       return true;
10422     }
10423
10424   switch (code)
10425     {
10426     case SET:
10427       *cost = 0;
10428       /* SET RTXs don't have a mode so we get it from the destination.  */
10429       mode = GET_MODE (SET_DEST (x));
10430
10431       if (REG_P (SET_SRC (x))
10432           && REG_P (SET_DEST (x)))
10433         {
10434           /* Assume that most copies can be done with a single insn,
10435              unless we don't have HW FP, in which case everything
10436              larger than word mode will require two insns.  */
10437           *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10438                                    && GET_MODE_SIZE (mode) > 4)
10439                                   || mode == DImode)
10440                                  ? 2 : 1);
10441           /* Conditional register moves can be encoded
10442              in 16 bits in Thumb mode.  */
10443           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10444             *cost >>= 1;
10445
10446           return true;
10447         }
10448
10449       if (CONST_INT_P (SET_SRC (x)))
10450         {
10451           /* Handle CONST_INT here, since the value doesn't have a mode
10452              and we would otherwise be unable to work out the true cost.  */
10453           *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10454                             0, speed_p);
10455           outer_code = SET;
10456           /* Slightly lower the cost of setting a core reg to a constant.
10457              This helps break up chains and allows for better scheduling.  */
10458           if (REG_P (SET_DEST (x))
10459               && REGNO (SET_DEST (x)) <= LR_REGNUM)
10460             *cost -= 1;
10461           x = SET_SRC (x);
10462           /* Immediate moves with an immediate in the range [0, 255] can be
10463              encoded in 16 bits in Thumb mode.  */
10464           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10465               && INTVAL (x) >= 0 && INTVAL (x) <=255)
10466             *cost >>= 1;
10467           goto const_int_cost;
10468         }
10469
10470       return false;
10471
10472     case MEM:
10473       return arm_mem_costs (x, extra_cost, cost, speed_p);
10474
10475     case PARALLEL:
10476     {
10477    /* Calculations of LDM costs are complex.  We assume an initial cost
10478    (ldm_1st) which will load the number of registers mentioned in
10479    ldm_regs_per_insn_1st registers; then each additional
10480    ldm_regs_per_insn_subsequent registers cost one more insn.  The
10481    formula for N regs is thus:
10482
10483    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10484                              + ldm_regs_per_insn_subsequent - 1)
10485                             / ldm_regs_per_insn_subsequent).
10486
10487    Additional costs may also be added for addressing.  A similar
10488    formula is used for STM.  */
10489
10490       bool is_ldm = load_multiple_operation (x, SImode);
10491       bool is_stm = store_multiple_operation (x, SImode);
10492
10493       if (is_ldm || is_stm)
10494         {
10495           if (speed_p)
10496             {
10497               HOST_WIDE_INT nregs = XVECLEN (x, 0);
10498               HOST_WIDE_INT regs_per_insn_1st = is_ldm
10499                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
10500                                       : extra_cost->ldst.stm_regs_per_insn_1st;
10501               HOST_WIDE_INT regs_per_insn_sub = is_ldm
10502                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10503                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
10504
10505               *cost += regs_per_insn_1st
10506                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10507                                             + regs_per_insn_sub - 1)
10508                                           / regs_per_insn_sub);
10509               return true;
10510             }
10511
10512         }
10513       return false;
10514     }
10515     case DIV:
10516     case UDIV:
10517       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10518           && (mode == SFmode || !TARGET_VFP_SINGLE))
10519         *cost += COSTS_N_INSNS (speed_p
10520                                ? extra_cost->fp[mode != SFmode].div : 0);
10521       else if (mode == SImode && TARGET_IDIV)
10522         *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10523       else
10524         *cost = LIBCALL_COST (2);
10525
10526       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10527          possible udiv is prefered.  */
10528       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10529       return false;     /* All arguments must be in registers.  */
10530
10531     case MOD:
10532       /* MOD by a power of 2 can be expanded as:
10533          rsbs    r1, r0, #0
10534          and     r0, r0, #(n - 1)
10535          and     r1, r1, #(n - 1)
10536          rsbpl   r0, r1, #0.  */
10537       if (CONST_INT_P (XEXP (x, 1))
10538           && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10539           && mode == SImode)
10540         {
10541           *cost += COSTS_N_INSNS (3);
10542
10543           if (speed_p)
10544             *cost += 2 * extra_cost->alu.logical
10545                      + extra_cost->alu.arith;
10546           return true;
10547         }
10548
10549     /* Fall-through.  */
10550     case UMOD:
10551       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10552          possible udiv is prefered.  */
10553       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10554       return false;     /* All arguments must be in registers.  */
10555
10556     case ROTATE:
10557       if (mode == SImode && REG_P (XEXP (x, 1)))
10558         {
10559           *cost += (COSTS_N_INSNS (1)
10560                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10561           if (speed_p)
10562             *cost += extra_cost->alu.shift_reg;
10563           return true;
10564         }
10565       /* Fall through */
10566     case ROTATERT:
10567     case ASHIFT:
10568     case LSHIFTRT:
10569     case ASHIFTRT:
10570       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10571         {
10572           *cost += (COSTS_N_INSNS (2)
10573                    + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10574           if (speed_p)
10575             *cost += 2 * extra_cost->alu.shift;
10576           /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
10577           if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10578             *cost += 1;
10579           return true;
10580         }
10581       else if (mode == SImode)
10582         {
10583           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10584           /* Slightly disparage register shifts at -Os, but not by much.  */
10585           if (!CONST_INT_P (XEXP (x, 1)))
10586             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10587                       + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10588           return true;
10589         }
10590       else if (GET_MODE_CLASS (mode) == MODE_INT
10591                && GET_MODE_SIZE (mode) < 4)
10592         {
10593           if (code == ASHIFT)
10594             {
10595               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10596               /* Slightly disparage register shifts at -Os, but not by
10597                  much.  */
10598               if (!CONST_INT_P (XEXP (x, 1)))
10599                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10600                           + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10601             }
10602           else if (code == LSHIFTRT || code == ASHIFTRT)
10603             {
10604               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10605                 {
10606                   /* Can use SBFX/UBFX.  */
10607                   if (speed_p)
10608                     *cost += extra_cost->alu.bfx;
10609                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10610                 }
10611               else
10612                 {
10613                   *cost += COSTS_N_INSNS (1);
10614                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10615                   if (speed_p)
10616                     {
10617                       if (CONST_INT_P (XEXP (x, 1)))
10618                         *cost += 2 * extra_cost->alu.shift;
10619                       else
10620                         *cost += (extra_cost->alu.shift
10621                                   + extra_cost->alu.shift_reg);
10622                     }
10623                   else
10624                     /* Slightly disparage register shifts.  */
10625                     *cost += !CONST_INT_P (XEXP (x, 1));
10626                 }
10627             }
10628           else /* Rotates.  */
10629             {
10630               *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10631               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10632               if (speed_p)
10633                 {
10634                   if (CONST_INT_P (XEXP (x, 1)))
10635                     *cost += (2 * extra_cost->alu.shift
10636                               + extra_cost->alu.log_shift);
10637                   else
10638                     *cost += (extra_cost->alu.shift
10639                               + extra_cost->alu.shift_reg
10640                               + extra_cost->alu.log_shift_reg);
10641                 }
10642             }
10643           return true;
10644         }
10645
10646       *cost = LIBCALL_COST (2);
10647       return false;
10648
10649     case BSWAP:
10650       if (arm_arch6)
10651         {
10652           if (mode == SImode)
10653             {
10654               if (speed_p)
10655                 *cost += extra_cost->alu.rev;
10656
10657               return false;
10658             }
10659         }
10660       else
10661         {
10662         /* No rev instruction available.  Look at arm_legacy_rev
10663            and thumb_legacy_rev for the form of RTL used then.  */
10664           if (TARGET_THUMB)
10665             {
10666               *cost += COSTS_N_INSNS (9);
10667
10668               if (speed_p)
10669                 {
10670                   *cost += 6 * extra_cost->alu.shift;
10671                   *cost += 3 * extra_cost->alu.logical;
10672                 }
10673             }
10674           else
10675             {
10676               *cost += COSTS_N_INSNS (4);
10677
10678               if (speed_p)
10679                 {
10680                   *cost += 2 * extra_cost->alu.shift;
10681                   *cost += extra_cost->alu.arith_shift;
10682                   *cost += 2 * extra_cost->alu.logical;
10683                 }
10684             }
10685           return true;
10686         }
10687       return false;
10688
10689     case MINUS:
10690       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10691           && (mode == SFmode || !TARGET_VFP_SINGLE))
10692         {
10693           if (GET_CODE (XEXP (x, 0)) == MULT
10694               || GET_CODE (XEXP (x, 1)) == MULT)
10695             {
10696               rtx mul_op0, mul_op1, sub_op;
10697
10698               if (speed_p)
10699                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10700
10701               if (GET_CODE (XEXP (x, 0)) == MULT)
10702                 {
10703                   mul_op0 = XEXP (XEXP (x, 0), 0);
10704                   mul_op1 = XEXP (XEXP (x, 0), 1);
10705                   sub_op = XEXP (x, 1);
10706                 }
10707               else
10708                 {
10709                   mul_op0 = XEXP (XEXP (x, 1), 0);
10710                   mul_op1 = XEXP (XEXP (x, 1), 1);
10711                   sub_op = XEXP (x, 0);
10712                 }
10713
10714               /* The first operand of the multiply may be optionally
10715                  negated.  */
10716               if (GET_CODE (mul_op0) == NEG)
10717                 mul_op0 = XEXP (mul_op0, 0);
10718
10719               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10720                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10721                         + rtx_cost (sub_op, mode, code, 0, speed_p));
10722
10723               return true;
10724             }
10725
10726           if (speed_p)
10727             *cost += extra_cost->fp[mode != SFmode].addsub;
10728           return false;
10729         }
10730
10731       if (mode == SImode)
10732         {
10733           rtx shift_by_reg = NULL;
10734           rtx shift_op;
10735           rtx non_shift_op;
10736           rtx op0 = XEXP (x, 0);
10737           rtx op1 = XEXP (x, 1);
10738
10739           /* Factor out any borrow operation.  There's more than one way
10740              of expressing this; try to recognize them all.  */
10741           if (GET_CODE (op0) == MINUS)
10742             {
10743               if (arm_borrow_operation (op1, SImode))
10744                 {
10745                   op1 = XEXP (op0, 1);
10746                   op0 = XEXP (op0, 0);
10747                 }
10748               else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10749                 op0 = XEXP (op0, 0);
10750             }
10751           else if (GET_CODE (op1) == PLUS
10752                    && arm_borrow_operation (XEXP (op1, 0), SImode))
10753             op1 = XEXP (op1, 0);
10754           else if (GET_CODE (op0) == NEG
10755                    && arm_borrow_operation (op1, SImode))
10756             {
10757               /* Negate with carry-in.  For Thumb2 this is done with
10758                  SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10759                  RSC instruction that exists in Arm mode.  */
10760               if (speed_p)
10761                 *cost += (TARGET_THUMB2
10762                           ? extra_cost->alu.arith_shift
10763                           : extra_cost->alu.arith);
10764               *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10765               return true;
10766             }
10767           /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10768              Note we do mean ~borrow here.  */
10769           else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10770             {
10771               *cost += rtx_cost (op1, mode, code, 1, speed_p);
10772               return true;
10773             }
10774
10775           shift_op = shifter_op_p (op0, &shift_by_reg);
10776           if (shift_op == NULL)
10777             {
10778               shift_op = shifter_op_p (op1, &shift_by_reg);
10779               non_shift_op = op0;
10780             }
10781           else
10782             non_shift_op = op1;
10783
10784           if (shift_op != NULL)
10785             {
10786               if (shift_by_reg != NULL)
10787                 {
10788                   if (speed_p)
10789                     *cost += extra_cost->alu.arith_shift_reg;
10790                   *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10791                 }
10792               else if (speed_p)
10793                 *cost += extra_cost->alu.arith_shift;
10794
10795               *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10796               *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10797               return true;
10798             }
10799
10800           if (arm_arch_thumb2
10801               && GET_CODE (XEXP (x, 1)) == MULT)
10802             {
10803               /* MLS.  */
10804               if (speed_p)
10805                 *cost += extra_cost->mult[0].add;
10806               *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10807               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10808               *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10809               return true;
10810             }
10811
10812           if (CONST_INT_P (op0))
10813             {
10814               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10815                                             INTVAL (op0), NULL_RTX,
10816                                             NULL_RTX, 1, 0);
10817               *cost = COSTS_N_INSNS (insns);
10818               if (speed_p)
10819                 *cost += insns * extra_cost->alu.arith;
10820               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10821               return true;
10822             }
10823           else if (speed_p)
10824             *cost += extra_cost->alu.arith;
10825
10826           /* Don't recurse as we don't want to cost any borrow that
10827              we've stripped.  */
10828           *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10829           *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10830           return true;
10831         }
10832
10833       if (GET_MODE_CLASS (mode) == MODE_INT
10834           && GET_MODE_SIZE (mode) < 4)
10835         {
10836           rtx shift_op, shift_reg;
10837           shift_reg = NULL;
10838
10839           /* We check both sides of the MINUS for shifter operands since,
10840              unlike PLUS, it's not commutative.  */
10841
10842           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10843           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10844
10845           /* Slightly disparage, as we might need to widen the result.  */
10846           *cost += 1;
10847           if (speed_p)
10848             *cost += extra_cost->alu.arith;
10849
10850           if (CONST_INT_P (XEXP (x, 0)))
10851             {
10852               *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10853               return true;
10854             }
10855
10856           return false;
10857         }
10858
10859       if (mode == DImode)
10860         {
10861           *cost += COSTS_N_INSNS (1);
10862
10863           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10864             {
10865               rtx op1 = XEXP (x, 1);
10866
10867               if (speed_p)
10868                 *cost += 2 * extra_cost->alu.arith;
10869
10870               if (GET_CODE (op1) == ZERO_EXTEND)
10871                 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10872                                    0, speed_p);
10873               else
10874                 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10875               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10876                                  0, speed_p);
10877               return true;
10878             }
10879           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10880             {
10881               if (speed_p)
10882                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10883               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10884                                   0, speed_p)
10885                         + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10886               return true;
10887             }
10888           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10889                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10890             {
10891               if (speed_p)
10892                 *cost += (extra_cost->alu.arith
10893                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10894                              ? extra_cost->alu.arith
10895                              : extra_cost->alu.arith_shift));
10896               *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10897                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10898                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
10899               return true;
10900             }
10901
10902           if (speed_p)
10903             *cost += 2 * extra_cost->alu.arith;
10904           return false;
10905         }
10906
10907       /* Vector mode?  */
10908
10909       *cost = LIBCALL_COST (2);
10910       return false;
10911
10912     case PLUS:
10913       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10914           && (mode == SFmode || !TARGET_VFP_SINGLE))
10915         {
10916           if (GET_CODE (XEXP (x, 0)) == MULT)
10917             {
10918               rtx mul_op0, mul_op1, add_op;
10919
10920               if (speed_p)
10921                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10922
10923               mul_op0 = XEXP (XEXP (x, 0), 0);
10924               mul_op1 = XEXP (XEXP (x, 0), 1);
10925               add_op = XEXP (x, 1);
10926
10927               *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10928                         + rtx_cost (mul_op1, mode, code, 0, speed_p)
10929                         + rtx_cost (add_op, mode, code, 0, speed_p));
10930
10931               return true;
10932             }
10933
10934           if (speed_p)
10935             *cost += extra_cost->fp[mode != SFmode].addsub;
10936           return false;
10937         }
10938       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10939         {
10940           *cost = LIBCALL_COST (2);
10941           return false;
10942         }
10943
10944         /* Narrow modes can be synthesized in SImode, but the range
10945            of useful sub-operations is limited.  Check for shift operations
10946            on one of the operands.  Only left shifts can be used in the
10947            narrow modes.  */
10948       if (GET_MODE_CLASS (mode) == MODE_INT
10949           && GET_MODE_SIZE (mode) < 4)
10950         {
10951           rtx shift_op, shift_reg;
10952           shift_reg = NULL;
10953
10954           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10955
10956           if (CONST_INT_P (XEXP (x, 1)))
10957             {
10958               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10959                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10960                                             NULL_RTX, 1, 0);
10961               *cost = COSTS_N_INSNS (insns);
10962               if (speed_p)
10963                 *cost += insns * extra_cost->alu.arith;
10964               /* Slightly penalize a narrow operation as the result may
10965                  need widening.  */
10966               *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10967               return true;
10968             }
10969
10970           /* Slightly penalize a narrow operation as the result may
10971              need widening.  */
10972           *cost += 1;
10973           if (speed_p)
10974             *cost += extra_cost->alu.arith;
10975
10976           return false;
10977         }
10978
10979       if (mode == SImode)
10980         {
10981           rtx shift_op, shift_reg;
10982
10983           if (TARGET_INT_SIMD
10984               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10985                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10986             {
10987               /* UXTA[BH] or SXTA[BH].  */
10988               if (speed_p)
10989                 *cost += extra_cost->alu.extend_arith;
10990               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10991                                   0, speed_p)
10992                         + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10993               return true;
10994             }
10995
10996           rtx op0 = XEXP (x, 0);
10997           rtx op1 = XEXP (x, 1);
10998
10999           /* Handle a side effect of adding in the carry to an addition.  */
11000           if (GET_CODE (op0) == PLUS
11001               && arm_carry_operation (op1, mode))
11002             {
11003               op1 = XEXP (op0, 1);
11004               op0 = XEXP (op0, 0);
11005             }
11006           else if (GET_CODE (op1) == PLUS
11007                    && arm_carry_operation (op0, mode))
11008             {
11009               op0 = XEXP (op1, 0);
11010               op1 = XEXP (op1, 1);
11011             }
11012           else if (GET_CODE (op0) == PLUS)
11013             {
11014               op0 = strip_carry_operation (op0);
11015               if (swap_commutative_operands_p (op0, op1))
11016                 std::swap (op0, op1);
11017             }
11018
11019           if (arm_carry_operation (op0, mode))
11020             {
11021               /* Adding the carry to a register is a canonicalization of
11022                  adding 0 to the register plus the carry.  */
11023               if (speed_p)
11024                 *cost += extra_cost->alu.arith;
11025               *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11026               return true;
11027             }
11028
11029           shift_reg = NULL;
11030           shift_op = shifter_op_p (op0, &shift_reg);
11031           if (shift_op != NULL)
11032             {
11033               if (shift_reg)
11034                 {
11035                   if (speed_p)
11036                     *cost += extra_cost->alu.arith_shift_reg;
11037                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11038                 }
11039               else if (speed_p)
11040                 *cost += extra_cost->alu.arith_shift;
11041
11042               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11043                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
11044               return true;
11045             }
11046
11047           if (GET_CODE (op0) == MULT)
11048             {
11049               rtx mul_op = op0;
11050
11051               if (TARGET_DSP_MULTIPLY
11052                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
11053                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
11054                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11055                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11056                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
11057                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
11058                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
11059                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
11060                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
11061                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11062                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11063                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
11064                                       == 16))))))
11065                 {
11066                   /* SMLA[BT][BT].  */
11067                   if (speed_p)
11068                     *cost += extra_cost->mult[0].extend_add;
11069                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
11070                                       SIGN_EXTEND, 0, speed_p)
11071                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
11072                                         SIGN_EXTEND, 0, speed_p)
11073                             + rtx_cost (op1, mode, PLUS, 1, speed_p));
11074                   return true;
11075                 }
11076
11077               if (speed_p)
11078                 *cost += extra_cost->mult[0].add;
11079               *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
11080                         + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
11081                         + rtx_cost (op1, mode, PLUS, 1, speed_p));
11082               return true;
11083             }
11084
11085           if (CONST_INT_P (op1))
11086             {
11087               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
11088                                             INTVAL (op1), NULL_RTX,
11089                                             NULL_RTX, 1, 0);
11090               *cost = COSTS_N_INSNS (insns);
11091               if (speed_p)
11092                 *cost += insns * extra_cost->alu.arith;
11093               *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11094               return true;
11095             }
11096
11097           if (speed_p)
11098             *cost += extra_cost->alu.arith;
11099
11100           /* Don't recurse here because we want to test the operands
11101              without any carry operation.  */
11102           *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11103           *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11104           return true;
11105         }
11106
11107       if (mode == DImode)
11108         {
11109           if (GET_CODE (XEXP (x, 0)) == MULT
11110               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
11111                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
11112                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
11113                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
11114             {
11115               if (speed_p)
11116                 *cost += extra_cost->mult[1].extend_add;
11117               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11118                                   ZERO_EXTEND, 0, speed_p)
11119                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
11120                                     ZERO_EXTEND, 0, speed_p)
11121                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11122               return true;
11123             }
11124
11125           *cost += COSTS_N_INSNS (1);
11126
11127           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11128               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11129             {
11130               if (speed_p)
11131                 *cost += (extra_cost->alu.arith
11132                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11133                              ? extra_cost->alu.arith
11134                              : extra_cost->alu.arith_shift));
11135
11136               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11137                                   0, speed_p)
11138                         + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11139               return true;
11140             }
11141
11142           if (speed_p)
11143             *cost += 2 * extra_cost->alu.arith;
11144           return false;
11145         }
11146
11147       /* Vector mode?  */
11148       *cost = LIBCALL_COST (2);
11149       return false;
11150     case IOR:
11151       {
11152         rtx sub0, sub1;
11153         if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11154           {
11155             if (speed_p)
11156               *cost += extra_cost->alu.rev;
11157
11158             return true;
11159           }
11160         else if (mode == SImode && arm_arch_thumb2
11161                  && arm_bfi_p (x, &sub0, &sub1))
11162           {
11163             *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11164             *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11165             if (speed_p)
11166               *cost += extra_cost->alu.bfi;
11167
11168             return true;
11169           }
11170       }
11171
11172       /* Fall through.  */
11173     case AND: case XOR:
11174       if (mode == SImode)
11175         {
11176           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11177           rtx op0 = XEXP (x, 0);
11178           rtx shift_op, shift_reg;
11179
11180           if (subcode == NOT
11181               && (code == AND
11182                   || (code == IOR && TARGET_THUMB2)))
11183             op0 = XEXP (op0, 0);
11184
11185           shift_reg = NULL;
11186           shift_op = shifter_op_p (op0, &shift_reg);
11187           if (shift_op != NULL)
11188             {
11189               if (shift_reg)
11190                 {
11191                   if (speed_p)
11192                     *cost += extra_cost->alu.log_shift_reg;
11193                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11194                 }
11195               else if (speed_p)
11196                 *cost += extra_cost->alu.log_shift;
11197
11198               *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11199                         + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11200               return true;
11201             }
11202
11203           if (CONST_INT_P (XEXP (x, 1)))
11204             {
11205               int insns = arm_gen_constant (code, SImode, NULL_RTX,
11206                                             INTVAL (XEXP (x, 1)), NULL_RTX,
11207                                             NULL_RTX, 1, 0);
11208
11209               *cost = COSTS_N_INSNS (insns);
11210               if (speed_p)
11211                 *cost += insns * extra_cost->alu.logical;
11212               *cost += rtx_cost (op0, mode, code, 0, speed_p);
11213               return true;
11214             }
11215
11216           if (speed_p)
11217             *cost += extra_cost->alu.logical;
11218           *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11219                     + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11220           return true;
11221         }
11222
11223       if (mode == DImode)
11224         {
11225           rtx op0 = XEXP (x, 0);
11226           enum rtx_code subcode = GET_CODE (op0);
11227
11228           *cost += COSTS_N_INSNS (1);
11229
11230           if (subcode == NOT
11231               && (code == AND
11232                   || (code == IOR && TARGET_THUMB2)))
11233             op0 = XEXP (op0, 0);
11234
11235           if (GET_CODE (op0) == ZERO_EXTEND)
11236             {
11237               if (speed_p)
11238                 *cost += 2 * extra_cost->alu.logical;
11239
11240               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11241                                   0, speed_p)
11242                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11243               return true;
11244             }
11245           else if (GET_CODE (op0) == SIGN_EXTEND)
11246             {
11247               if (speed_p)
11248                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11249
11250               *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11251                                   0, speed_p)
11252                         + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11253               return true;
11254             }
11255
11256           if (speed_p)
11257             *cost += 2 * extra_cost->alu.logical;
11258
11259           return true;
11260         }
11261       /* Vector mode?  */
11262
11263       *cost = LIBCALL_COST (2);
11264       return false;
11265
11266     case MULT:
11267       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11268           && (mode == SFmode || !TARGET_VFP_SINGLE))
11269         {
11270           rtx op0 = XEXP (x, 0);
11271
11272           if (GET_CODE (op0) == NEG && !flag_rounding_math)
11273             op0 = XEXP (op0, 0);
11274
11275           if (speed_p)
11276             *cost += extra_cost->fp[mode != SFmode].mult;
11277
11278           *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11279                     + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11280           return true;
11281         }
11282       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11283         {
11284           *cost = LIBCALL_COST (2);
11285           return false;
11286         }
11287
11288       if (mode == SImode)
11289         {
11290           if (TARGET_DSP_MULTIPLY
11291               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11292                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11293                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11294                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11295                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11296                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11297                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11298                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11299                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11300                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11301                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11302                               && (INTVAL (XEXP (XEXP (x, 1), 1))
11303                                   == 16))))))
11304             {
11305               /* SMUL[TB][TB].  */
11306               if (speed_p)
11307                 *cost += extra_cost->mult[0].extend;
11308               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11309                                  SIGN_EXTEND, 0, speed_p);
11310               *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11311                                  SIGN_EXTEND, 1, speed_p);
11312               return true;
11313             }
11314           if (speed_p)
11315             *cost += extra_cost->mult[0].simple;
11316           return false;
11317         }
11318
11319       if (mode == DImode)
11320         {
11321           if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11322                 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11323                || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11324                    && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11325             {
11326               if (speed_p)
11327                 *cost += extra_cost->mult[1].extend;
11328               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11329                                   ZERO_EXTEND, 0, speed_p)
11330                         + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11331                                     ZERO_EXTEND, 0, speed_p));
11332               return true;
11333             }
11334
11335           *cost = LIBCALL_COST (2);
11336           return false;
11337         }
11338
11339       /* Vector mode?  */
11340       *cost = LIBCALL_COST (2);
11341       return false;
11342
11343     case NEG:
11344       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11345           && (mode == SFmode || !TARGET_VFP_SINGLE))
11346         {
11347           if (GET_CODE (XEXP (x, 0)) == MULT)
11348             {
11349               /* VNMUL.  */
11350               *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11351               return true;
11352             }
11353
11354           if (speed_p)
11355             *cost += extra_cost->fp[mode != SFmode].neg;
11356
11357           return false;
11358         }
11359       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11360         {
11361           *cost = LIBCALL_COST (1);
11362           return false;
11363         }
11364
11365       if (mode == SImode)
11366         {
11367           if (GET_CODE (XEXP (x, 0)) == ABS)
11368             {
11369               *cost += COSTS_N_INSNS (1);
11370               /* Assume the non-flag-changing variant.  */
11371               if (speed_p)
11372                 *cost += (extra_cost->alu.log_shift
11373                           + extra_cost->alu.arith_shift);
11374               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11375               return true;
11376             }
11377
11378           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11379               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11380             {
11381               *cost += COSTS_N_INSNS (1);
11382               /* No extra cost for MOV imm and MVN imm.  */
11383               /* If the comparison op is using the flags, there's no further
11384                  cost, otherwise we need to add the cost of the comparison.  */
11385               if (!(REG_P (XEXP (XEXP (x, 0), 0))
11386                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11387                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
11388                 {
11389                   mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11390                   *cost += (COSTS_N_INSNS (1)
11391                             + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11392                                         0, speed_p)
11393                             + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11394                                         1, speed_p));
11395                   if (speed_p)
11396                     *cost += extra_cost->alu.arith;
11397                 }
11398               return true;
11399             }
11400
11401           if (speed_p)
11402             *cost += extra_cost->alu.arith;
11403           return false;
11404         }
11405
11406       if (GET_MODE_CLASS (mode) == MODE_INT
11407           && GET_MODE_SIZE (mode) < 4)
11408         {
11409           /* Slightly disparage, as we might need an extend operation.  */
11410           *cost += 1;
11411           if (speed_p)
11412             *cost += extra_cost->alu.arith;
11413           return false;
11414         }
11415
11416       if (mode == DImode)
11417         {
11418           *cost += COSTS_N_INSNS (1);
11419           if (speed_p)
11420             *cost += 2 * extra_cost->alu.arith;
11421           return false;
11422         }
11423
11424       /* Vector mode?  */
11425       *cost = LIBCALL_COST (1);
11426       return false;
11427
11428     case NOT:
11429       if (mode == SImode)
11430         {
11431           rtx shift_op;
11432           rtx shift_reg = NULL;
11433
11434           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11435
11436           if (shift_op)
11437             {
11438               if (shift_reg != NULL)
11439                 {
11440                   if (speed_p)
11441                     *cost += extra_cost->alu.log_shift_reg;
11442                   *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11443                 }
11444               else if (speed_p)
11445                 *cost += extra_cost->alu.log_shift;
11446               *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11447               return true;
11448             }
11449
11450           if (speed_p)
11451             *cost += extra_cost->alu.logical;
11452           return false;
11453         }
11454       if (mode == DImode)
11455         {
11456           *cost += COSTS_N_INSNS (1);
11457           return false;
11458         }
11459
11460       /* Vector mode?  */
11461
11462       *cost += LIBCALL_COST (1);
11463       return false;
11464
11465     case IF_THEN_ELSE:
11466       {
11467         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11468           {
11469             *cost += COSTS_N_INSNS (3);
11470             return true;
11471           }
11472         int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11473         int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11474
11475         *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11476         /* Assume that if one arm of the if_then_else is a register,
11477            that it will be tied with the result and eliminate the
11478            conditional insn.  */
11479         if (REG_P (XEXP (x, 1)))
11480           *cost += op2cost;
11481         else if (REG_P (XEXP (x, 2)))
11482           *cost += op1cost;
11483         else
11484           {
11485             if (speed_p)
11486               {
11487                 if (extra_cost->alu.non_exec_costs_exec)
11488                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11489                 else
11490                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11491               }
11492             else
11493               *cost += op1cost + op2cost;
11494           }
11495       }
11496       return true;
11497
11498     case COMPARE:
11499       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11500         *cost = 0;
11501       else
11502         {
11503           machine_mode op0mode;
11504           /* We'll mostly assume that the cost of a compare is the cost of the
11505              LHS.  However, there are some notable exceptions.  */
11506
11507           /* Floating point compares are never done as side-effects.  */
11508           op0mode = GET_MODE (XEXP (x, 0));
11509           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11510               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11511             {
11512               if (speed_p)
11513                 *cost += extra_cost->fp[op0mode != SFmode].compare;
11514
11515               if (XEXP (x, 1) == CONST0_RTX (op0mode))
11516                 {
11517                   *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11518                   return true;
11519                 }
11520
11521               return false;
11522             }
11523           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11524             {
11525               *cost = LIBCALL_COST (2);
11526               return false;
11527             }
11528
11529           /* DImode compares normally take two insns.  */
11530           if (op0mode == DImode)
11531             {
11532               *cost += COSTS_N_INSNS (1);
11533               if (speed_p)
11534                 *cost += 2 * extra_cost->alu.arith;
11535               return false;
11536             }
11537
11538           if (op0mode == SImode)
11539             {
11540               rtx shift_op;
11541               rtx shift_reg;
11542
11543               if (XEXP (x, 1) == const0_rtx
11544                   && !(REG_P (XEXP (x, 0))
11545                        || (GET_CODE (XEXP (x, 0)) == SUBREG
11546                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
11547                 {
11548                   *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11549
11550                   /* Multiply operations that set the flags are often
11551                      significantly more expensive.  */
11552                   if (speed_p
11553                       && GET_CODE (XEXP (x, 0)) == MULT
11554                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11555                     *cost += extra_cost->mult[0].flag_setting;
11556
11557                   if (speed_p
11558                       && GET_CODE (XEXP (x, 0)) == PLUS
11559                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11560                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11561                                                             0), 1), mode))
11562                     *cost += extra_cost->mult[0].flag_setting;
11563                   return true;
11564                 }
11565
11566               shift_reg = NULL;
11567               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11568               if (shift_op != NULL)
11569                 {
11570                   if (shift_reg != NULL)
11571                     {
11572                       *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11573                                          1, speed_p);
11574                       if (speed_p)
11575                         *cost += extra_cost->alu.arith_shift_reg;
11576                     }
11577                   else if (speed_p)
11578                     *cost += extra_cost->alu.arith_shift;
11579                   *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11580                   *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11581                   return true;
11582                 }
11583
11584               if (speed_p)
11585                 *cost += extra_cost->alu.arith;
11586               if (CONST_INT_P (XEXP (x, 1))
11587                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11588                 {
11589                   *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11590                   return true;
11591                 }
11592               return false;
11593             }
11594
11595           /* Vector mode?  */
11596
11597           *cost = LIBCALL_COST (2);
11598           return false;
11599         }
11600       return true;
11601
11602     case EQ:
11603     case GE:
11604     case GT:
11605     case LE:
11606     case LT:
11607       /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11608          vcle and vclt). */
11609       if (TARGET_NEON
11610           && TARGET_HARD_FLOAT
11611           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11612           && (XEXP (x, 1) == CONST0_RTX (mode)))
11613         {
11614           *cost = 0;
11615           return true;
11616         }
11617
11618       /* Fall through.  */
11619     case NE:
11620     case LTU:
11621     case LEU:
11622     case GEU:
11623     case GTU:
11624     case ORDERED:
11625     case UNORDERED:
11626     case UNEQ:
11627     case UNLE:
11628     case UNLT:
11629     case UNGE:
11630     case UNGT:
11631     case LTGT:
11632       if (outer_code == SET)
11633         {
11634           /* Is it a store-flag operation?  */
11635           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11636               && XEXP (x, 1) == const0_rtx)
11637             {
11638               /* Thumb also needs an IT insn.  */
11639               *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11640               return true;
11641             }
11642           if (XEXP (x, 1) == const0_rtx)
11643             {
11644               switch (code)
11645                 {
11646                 case LT:
11647                   /* LSR Rd, Rn, #31.  */
11648                   if (speed_p)
11649                     *cost += extra_cost->alu.shift;
11650                   break;
11651
11652                 case EQ:
11653                   /* RSBS T1, Rn, #0
11654                      ADC  Rd, Rn, T1.  */
11655
11656                 case NE:
11657                   /* SUBS T1, Rn, #1
11658                      SBC  Rd, Rn, T1.  */
11659                   *cost += COSTS_N_INSNS (1);
11660                   break;
11661
11662                 case LE:
11663                   /* RSBS T1, Rn, Rn, LSR #31
11664                      ADC  Rd, Rn, T1. */
11665                   *cost += COSTS_N_INSNS (1);
11666                   if (speed_p)
11667                     *cost += extra_cost->alu.arith_shift;
11668                   break;
11669
11670                 case GT:
11671                   /* RSB  Rd, Rn, Rn, ASR #1
11672                      LSR  Rd, Rd, #31.  */
11673                   *cost += COSTS_N_INSNS (1);
11674                   if (speed_p)
11675                     *cost += (extra_cost->alu.arith_shift
11676                               + extra_cost->alu.shift);
11677                   break;
11678
11679                 case GE:
11680                   /* ASR  Rd, Rn, #31
11681                      ADD  Rd, Rn, #1.  */
11682                   *cost += COSTS_N_INSNS (1);
11683                   if (speed_p)
11684                     *cost += extra_cost->alu.shift;
11685                   break;
11686
11687                 default:
11688                   /* Remaining cases are either meaningless or would take
11689                      three insns anyway.  */
11690                   *cost = COSTS_N_INSNS (3);
11691                   break;
11692                 }
11693               *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11694               return true;
11695             }
11696           else
11697             {
11698               *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11699               if (CONST_INT_P (XEXP (x, 1))
11700                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11701                 {
11702                   *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11703                   return true;
11704                 }
11705
11706               return false;
11707             }
11708         }
11709       /* Not directly inside a set.  If it involves the condition code
11710          register it must be the condition for a branch, cond_exec or
11711          I_T_E operation.  Since the comparison is performed elsewhere
11712          this is just the control part which has no additional
11713          cost.  */
11714       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11715                && XEXP (x, 1) == const0_rtx)
11716         {
11717           *cost = 0;
11718           return true;
11719         }
11720       return false;
11721
11722     case ABS:
11723       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11724           && (mode == SFmode || !TARGET_VFP_SINGLE))
11725         {
11726           if (speed_p)
11727             *cost += extra_cost->fp[mode != SFmode].neg;
11728
11729           return false;
11730         }
11731       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11732         {
11733           *cost = LIBCALL_COST (1);
11734           return false;
11735         }
11736
11737       if (mode == SImode)
11738         {
11739           if (speed_p)
11740             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11741           return false;
11742         }
11743       /* Vector mode?  */
11744       *cost = LIBCALL_COST (1);
11745       return false;
11746
11747     case SIGN_EXTEND:
11748       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11749           && MEM_P (XEXP (x, 0)))
11750         {
11751           if (mode == DImode)
11752             *cost += COSTS_N_INSNS (1);
11753
11754           if (!speed_p)
11755             return true;
11756
11757           if (GET_MODE (XEXP (x, 0)) == SImode)
11758             *cost += extra_cost->ldst.load;
11759           else
11760             *cost += extra_cost->ldst.load_sign_extend;
11761
11762           if (mode == DImode)
11763             *cost += extra_cost->alu.shift;
11764
11765           return true;
11766         }
11767
11768       /* Widening from less than 32-bits requires an extend operation.  */
11769       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11770         {
11771           /* We have SXTB/SXTH.  */
11772           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11773           if (speed_p)
11774             *cost += extra_cost->alu.extend;
11775         }
11776       else if (GET_MODE (XEXP (x, 0)) != SImode)
11777         {
11778           /* Needs two shifts.  */
11779           *cost += COSTS_N_INSNS (1);
11780           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11781           if (speed_p)
11782             *cost += 2 * extra_cost->alu.shift;
11783         }
11784
11785       /* Widening beyond 32-bits requires one more insn.  */
11786       if (mode == DImode)
11787         {
11788           *cost += COSTS_N_INSNS (1);
11789           if (speed_p)
11790             *cost += extra_cost->alu.shift;
11791         }
11792
11793       return true;
11794
11795     case ZERO_EXTEND:
11796       if ((arm_arch4
11797            || GET_MODE (XEXP (x, 0)) == SImode
11798            || GET_MODE (XEXP (x, 0)) == QImode)
11799           && MEM_P (XEXP (x, 0)))
11800         {
11801           *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11802
11803           if (mode == DImode)
11804             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
11805
11806           return true;
11807         }
11808
11809       /* Widening from less than 32-bits requires an extend operation.  */
11810       if (GET_MODE (XEXP (x, 0)) == QImode)
11811         {
11812           /* UXTB can be a shorter instruction in Thumb2, but it might
11813              be slower than the AND Rd, Rn, #255 alternative.  When
11814              optimizing for speed it should never be slower to use
11815              AND, and we don't really model 16-bit vs 32-bit insns
11816              here.  */
11817           if (speed_p)
11818             *cost += extra_cost->alu.logical;
11819         }
11820       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11821         {
11822           /* We have UXTB/UXTH.  */
11823           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11824           if (speed_p)
11825             *cost += extra_cost->alu.extend;
11826         }
11827       else if (GET_MODE (XEXP (x, 0)) != SImode)
11828         {
11829           /* Needs two shifts.  It's marginally preferable to use
11830              shifts rather than two BIC instructions as the second
11831              shift may merge with a subsequent insn as a shifter
11832              op.  */
11833           *cost = COSTS_N_INSNS (2);
11834           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11835           if (speed_p)
11836             *cost += 2 * extra_cost->alu.shift;
11837         }
11838
11839       /* Widening beyond 32-bits requires one more insn.  */
11840       if (mode == DImode)
11841         {
11842           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
11843         }
11844
11845       return true;
11846
11847     case CONST_INT:
11848       *cost = 0;
11849       /* CONST_INT has no mode, so we cannot tell for sure how many
11850          insns are really going to be needed.  The best we can do is
11851          look at the value passed.  If it fits in SImode, then assume
11852          that's the mode it will be used for.  Otherwise assume it
11853          will be used in DImode.  */
11854       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11855         mode = SImode;
11856       else
11857         mode = DImode;
11858
11859       /* Avoid blowing up in arm_gen_constant ().  */
11860       if (!(outer_code == PLUS
11861             || outer_code == AND
11862             || outer_code == IOR
11863             || outer_code == XOR
11864             || outer_code == MINUS))
11865         outer_code = SET;
11866
11867     const_int_cost:
11868       if (mode == SImode)
11869         {
11870           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11871                                                     INTVAL (x), NULL, NULL,
11872                                                     0, 0));
11873           /* Extra costs?  */
11874         }
11875       else
11876         {
11877           *cost += COSTS_N_INSNS (arm_gen_constant
11878                                   (outer_code, SImode, NULL,
11879                                    trunc_int_for_mode (INTVAL (x), SImode),
11880                                    NULL, NULL, 0, 0)
11881                                   + arm_gen_constant (outer_code, SImode, NULL,
11882                                                       INTVAL (x) >> 32, NULL,
11883                                                       NULL, 0, 0));
11884           /* Extra costs?  */
11885         }
11886
11887       return true;
11888
11889     case CONST:
11890     case LABEL_REF:
11891     case SYMBOL_REF:
11892       if (speed_p)
11893         {
11894           if (arm_arch_thumb2 && !flag_pic)
11895             *cost += COSTS_N_INSNS (1);
11896           else
11897             *cost += extra_cost->ldst.load;
11898         }
11899       else
11900         *cost += COSTS_N_INSNS (1);
11901
11902       if (flag_pic)
11903         {
11904           *cost += COSTS_N_INSNS (1);
11905           if (speed_p)
11906             *cost += extra_cost->alu.arith;
11907         }
11908
11909       return true;
11910
11911     case CONST_FIXED:
11912       *cost = COSTS_N_INSNS (4);
11913       /* Fixme.  */
11914       return true;
11915
11916     case CONST_DOUBLE:
11917       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11918           && (mode == SFmode || mode == HFmode || !TARGET_VFP_SINGLE))
11919         {
11920           if (vfp3_const_double_rtx (x))
11921             {
11922               if (speed_p)
11923                 *cost += extra_cost->fp[mode == DFmode].fpconst;
11924               return true;
11925             }
11926
11927           if (speed_p)
11928             {
11929               if (mode == DFmode)
11930                 *cost += extra_cost->ldst.loadd;
11931               else
11932                 *cost += extra_cost->ldst.loadf;
11933             }
11934           else
11935             *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11936
11937           return true;
11938         }
11939       *cost = COSTS_N_INSNS (4);
11940       return true;
11941
11942     case CONST_VECTOR:
11943       if (((TARGET_NEON && TARGET_HARD_FLOAT
11944             && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11945            || TARGET_HAVE_MVE)
11946           && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11947         *cost = COSTS_N_INSNS (1);
11948       else if (TARGET_HAVE_MVE)
11949         {
11950           /* 128-bit vector requires two vldr.64 on MVE.  */
11951           *cost = COSTS_N_INSNS (2);
11952           if (speed_p)
11953             *cost += extra_cost->ldst.loadd * 2;
11954         }
11955       else
11956         *cost = COSTS_N_INSNS (4);
11957       return true;
11958
11959     case HIGH:
11960     case LO_SUM:
11961       /* When optimizing for size, we prefer constant pool entries to
11962          MOVW/MOVT pairs, so bump the cost of these slightly.  */
11963       if (!speed_p)
11964         *cost += 1;
11965       return true;
11966
11967     case CLZ:
11968       if (speed_p)
11969         *cost += extra_cost->alu.clz;
11970       return false;
11971
11972     case SMIN:
11973       if (XEXP (x, 1) == const0_rtx)
11974         {
11975           if (speed_p)
11976             *cost += extra_cost->alu.log_shift;
11977           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11978           return true;
11979         }
11980       /* Fall through.  */
11981     case SMAX:
11982     case UMIN:
11983     case UMAX:
11984       *cost += COSTS_N_INSNS (1);
11985       return false;
11986
11987     case TRUNCATE:
11988       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11989           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11990           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11991           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11992           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11993                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11994               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11995                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11996                       == ZERO_EXTEND))))
11997         {
11998           if (speed_p)
11999             *cost += extra_cost->mult[1].extend;
12000           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
12001                               ZERO_EXTEND, 0, speed_p)
12002                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
12003                                 ZERO_EXTEND, 0, speed_p));
12004           return true;
12005         }
12006       *cost = LIBCALL_COST (1);
12007       return false;
12008
12009     case UNSPEC_VOLATILE:
12010     case UNSPEC:
12011       return arm_unspec_cost (x, outer_code, speed_p, cost);
12012
12013     case PC:
12014       /* Reading the PC is like reading any other register.  Writing it
12015          is more expensive, but we take that into account elsewhere.  */
12016       *cost = 0;
12017       return true;
12018
12019     case ZERO_EXTRACT:
12020       /* TODO: Simple zero_extract of bottom bits using AND.  */
12021       /* Fall through.  */
12022     case SIGN_EXTRACT:
12023       if (arm_arch6
12024           && mode == SImode
12025           && CONST_INT_P (XEXP (x, 1))
12026           && CONST_INT_P (XEXP (x, 2)))
12027         {
12028           if (speed_p)
12029             *cost += extra_cost->alu.bfx;
12030           *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12031           return true;
12032         }
12033       /* Without UBFX/SBFX, need to resort to shift operations.  */
12034       *cost += COSTS_N_INSNS (1);
12035       if (speed_p)
12036         *cost += 2 * extra_cost->alu.shift;
12037       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
12038       return true;
12039
12040     case FLOAT_EXTEND:
12041       if (TARGET_HARD_FLOAT)
12042         {
12043           if (speed_p)
12044             *cost += extra_cost->fp[mode == DFmode].widen;
12045           if (!TARGET_VFP5
12046               && GET_MODE (XEXP (x, 0)) == HFmode)
12047             {
12048               /* Pre v8, widening HF->DF is a two-step process, first
12049                  widening to SFmode.  */
12050               *cost += COSTS_N_INSNS (1);
12051               if (speed_p)
12052                 *cost += extra_cost->fp[0].widen;
12053             }
12054           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
12055           return true;
12056         }
12057
12058       *cost = LIBCALL_COST (1);
12059       return false;
12060
12061     case FLOAT_TRUNCATE:
12062       if (TARGET_HARD_FLOAT)
12063         {
12064           if (speed_p)
12065             *cost += extra_cost->fp[mode == DFmode].narrow;
12066           *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
12067           return true;
12068           /* Vector modes?  */
12069         }
12070       *cost = LIBCALL_COST (1);
12071       return false;
12072
12073     case FMA:
12074       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
12075         {
12076           rtx op0 = XEXP (x, 0);
12077           rtx op1 = XEXP (x, 1);
12078           rtx op2 = XEXP (x, 2);
12079
12080
12081           /* vfms or vfnma.  */
12082           if (GET_CODE (op0) == NEG)
12083             op0 = XEXP (op0, 0);
12084
12085           /* vfnms or vfnma.  */
12086           if (GET_CODE (op2) == NEG)
12087             op2 = XEXP (op2, 0);
12088
12089           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
12090           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
12091           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
12092
12093           if (speed_p)
12094             *cost += extra_cost->fp[mode ==DFmode].fma;
12095
12096           return true;
12097         }
12098
12099       *cost = LIBCALL_COST (3);
12100       return false;
12101
12102     case FIX:
12103     case UNSIGNED_FIX:
12104       if (TARGET_HARD_FLOAT)
12105         {
12106           /* The *combine_vcvtf2i reduces a vmul+vcvt into
12107              a vcvt fixed-point conversion.  */
12108           if (code == FIX && mode == SImode
12109               && GET_CODE (XEXP (x, 0)) == FIX
12110               && GET_MODE (XEXP (x, 0)) == SFmode
12111               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12112               && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
12113                  > 0)
12114             {
12115               if (speed_p)
12116                 *cost += extra_cost->fp[0].toint;
12117
12118               *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
12119                                  code, 0, speed_p);
12120               return true;
12121             }
12122
12123           if (GET_MODE_CLASS (mode) == MODE_INT)
12124             {
12125               mode = GET_MODE (XEXP (x, 0));
12126               if (speed_p)
12127                 *cost += extra_cost->fp[mode == DFmode].toint;
12128               /* Strip of the 'cost' of rounding towards zero.  */
12129               if (GET_CODE (XEXP (x, 0)) == FIX)
12130                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
12131                                    0, speed_p);
12132               else
12133                 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12134               /* ??? Increase the cost to deal with transferring from
12135                  FP -> CORE registers?  */
12136               return true;
12137             }
12138           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12139                    && TARGET_VFP5)
12140             {
12141               if (speed_p)
12142                 *cost += extra_cost->fp[mode == DFmode].roundint;
12143               return false;
12144             }
12145           /* Vector costs? */
12146         }
12147       *cost = LIBCALL_COST (1);
12148       return false;
12149
12150     case FLOAT:
12151     case UNSIGNED_FLOAT:
12152       if (TARGET_HARD_FLOAT)
12153         {
12154           /* ??? Increase the cost to deal with transferring from CORE
12155              -> FP registers?  */
12156           if (speed_p)
12157             *cost += extra_cost->fp[mode == DFmode].fromint;
12158           return false;
12159         }
12160       *cost = LIBCALL_COST (1);
12161       return false;
12162
12163     case CALL:
12164       return true;
12165
12166     case ASM_OPERANDS:
12167       {
12168       /* Just a guess.  Guess number of instructions in the asm
12169          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
12170          though (see PR60663).  */
12171         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12172         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12173
12174         *cost = COSTS_N_INSNS (asm_length + num_operands);
12175         return true;
12176       }
12177     default:
12178       if (mode != VOIDmode)
12179         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12180       else
12181         *cost = COSTS_N_INSNS (4); /* Who knows?  */
12182       return false;
12183     }
12184 }
12185
12186 #undef HANDLE_NARROW_SHIFT_ARITH
12187
12188 /* RTX costs entry point.  */
12189
12190 static bool
12191 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12192                int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12193 {
12194   bool result;
12195   int code = GET_CODE (x);
12196   gcc_assert (current_tune->insn_extra_cost);
12197
12198   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
12199                                 (enum rtx_code) outer_code,
12200                                 current_tune->insn_extra_cost,
12201                                 total, speed);
12202
12203   if (dump_file && arm_verbose_cost)
12204     {
12205       print_rtl_single (dump_file, x);
12206       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12207                *total, result ? "final" : "partial");
12208     }
12209   return result;
12210 }
12211
12212 static int
12213 arm_insn_cost (rtx_insn *insn, bool speed)
12214 {
12215   int cost;
12216
12217   /* Don't cost a simple reg-reg move at a full insn cost: such moves
12218      will likely disappear during register allocation.  */
12219   if (!reload_completed
12220       && GET_CODE (PATTERN (insn)) == SET
12221       && REG_P (SET_DEST (PATTERN (insn)))
12222       && REG_P (SET_SRC (PATTERN (insn))))
12223     return 2;
12224   cost = pattern_cost (PATTERN (insn), speed);
12225   /* If the cost is zero, then it's likely a complex insn.  We don't want the
12226      cost of these to be less than something we know about.  */
12227   return cost ? cost : COSTS_N_INSNS (2);
12228 }
12229
12230 /* All address computations that can be done are free, but rtx cost returns
12231    the same for practically all of them.  So we weight the different types
12232    of address here in the order (most pref first):
12233    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
12234 static inline int
12235 arm_arm_address_cost (rtx x)
12236 {
12237   enum rtx_code c  = GET_CODE (x);
12238
12239   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12240     return 0;
12241   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12242     return 10;
12243
12244   if (c == PLUS)
12245     {
12246       if (CONST_INT_P (XEXP (x, 1)))
12247         return 2;
12248
12249       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12250         return 3;
12251
12252       return 4;
12253     }
12254
12255   return 6;
12256 }
12257
12258 static inline int
12259 arm_thumb_address_cost (rtx x)
12260 {
12261   enum rtx_code c  = GET_CODE (x);
12262
12263   if (c == REG)
12264     return 1;
12265   if (c == PLUS
12266       && REG_P (XEXP (x, 0))
12267       && CONST_INT_P (XEXP (x, 1)))
12268     return 1;
12269
12270   return 2;
12271 }
12272
12273 static int
12274 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12275                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12276 {
12277   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12278 }
12279
12280 /* Adjust cost hook for XScale.  */
12281 static bool
12282 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12283                           int * cost)
12284 {
12285   /* Some true dependencies can have a higher cost depending
12286      on precisely how certain input operands are used.  */
12287   if (dep_type == 0
12288       && recog_memoized (insn) >= 0
12289       && recog_memoized (dep) >= 0)
12290     {
12291       int shift_opnum = get_attr_shift (insn);
12292       enum attr_type attr_type = get_attr_type (dep);
12293
12294       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12295          operand for INSN.  If we have a shifted input operand and the
12296          instruction we depend on is another ALU instruction, then we may
12297          have to account for an additional stall.  */
12298       if (shift_opnum != 0
12299           && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12300               || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12301               || attr_type == TYPE_ALUS_SHIFT_IMM
12302               || attr_type == TYPE_LOGIC_SHIFT_IMM
12303               || attr_type == TYPE_LOGICS_SHIFT_IMM
12304               || attr_type == TYPE_ALU_SHIFT_REG
12305               || attr_type == TYPE_ALUS_SHIFT_REG
12306               || attr_type == TYPE_LOGIC_SHIFT_REG
12307               || attr_type == TYPE_LOGICS_SHIFT_REG
12308               || attr_type == TYPE_MOV_SHIFT
12309               || attr_type == TYPE_MVN_SHIFT
12310               || attr_type == TYPE_MOV_SHIFT_REG
12311               || attr_type == TYPE_MVN_SHIFT_REG))
12312         {
12313           rtx shifted_operand;
12314           int opno;
12315
12316           /* Get the shifted operand.  */
12317           extract_insn (insn);
12318           shifted_operand = recog_data.operand[shift_opnum];
12319
12320           /* Iterate over all the operands in DEP.  If we write an operand
12321              that overlaps with SHIFTED_OPERAND, then we have increase the
12322              cost of this dependency.  */
12323           extract_insn (dep);
12324           preprocess_constraints (dep);
12325           for (opno = 0; opno < recog_data.n_operands; opno++)
12326             {
12327               /* We can ignore strict inputs.  */
12328               if (recog_data.operand_type[opno] == OP_IN)
12329                 continue;
12330
12331               if (reg_overlap_mentioned_p (recog_data.operand[opno],
12332                                            shifted_operand))
12333                 {
12334                   *cost = 2;
12335                   return false;
12336                 }
12337             }
12338         }
12339     }
12340   return true;
12341 }
12342
12343 /* Adjust cost hook for Cortex A9.  */
12344 static bool
12345 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12346                              int * cost)
12347 {
12348   switch (dep_type)
12349     {
12350     case REG_DEP_ANTI:
12351       *cost = 0;
12352       return false;
12353
12354     case REG_DEP_TRUE:
12355     case REG_DEP_OUTPUT:
12356         if (recog_memoized (insn) >= 0
12357             && recog_memoized (dep) >= 0)
12358           {
12359             if (GET_CODE (PATTERN (insn)) == SET)
12360               {
12361                 if (GET_MODE_CLASS
12362                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12363                   || GET_MODE_CLASS
12364                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12365                   {
12366                     enum attr_type attr_type_insn = get_attr_type (insn);
12367                     enum attr_type attr_type_dep = get_attr_type (dep);
12368
12369                     /* By default all dependencies of the form
12370                        s0 = s0 <op> s1
12371                        s0 = s0 <op> s2
12372                        have an extra latency of 1 cycle because
12373                        of the input and output dependency in this
12374                        case. However this gets modeled as an true
12375                        dependency and hence all these checks.  */
12376                     if (REG_P (SET_DEST (PATTERN (insn)))
12377                         && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12378                       {
12379                         /* FMACS is a special case where the dependent
12380                            instruction can be issued 3 cycles before
12381                            the normal latency in case of an output
12382                            dependency.  */
12383                         if ((attr_type_insn == TYPE_FMACS
12384                              || attr_type_insn == TYPE_FMACD)
12385                             && (attr_type_dep == TYPE_FMACS
12386                                 || attr_type_dep == TYPE_FMACD))
12387                           {
12388                             if (dep_type == REG_DEP_OUTPUT)
12389                               *cost = insn_default_latency (dep) - 3;
12390                             else
12391                               *cost = insn_default_latency (dep);
12392                             return false;
12393                           }
12394                         else
12395                           {
12396                             if (dep_type == REG_DEP_OUTPUT)
12397                               *cost = insn_default_latency (dep) + 1;
12398                             else
12399                               *cost = insn_default_latency (dep);
12400                           }
12401                         return false;
12402                       }
12403                   }
12404               }
12405           }
12406         break;
12407
12408     default:
12409       gcc_unreachable ();
12410     }
12411
12412   return true;
12413 }
12414
12415 /* Adjust cost hook for FA726TE.  */
12416 static bool
12417 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12418                            int * cost)
12419 {
12420   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12421      have penalty of 3.  */
12422   if (dep_type == REG_DEP_TRUE
12423       && recog_memoized (insn) >= 0
12424       && recog_memoized (dep) >= 0
12425       && get_attr_conds (dep) == CONDS_SET)
12426     {
12427       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
12428       if (get_attr_conds (insn) == CONDS_USE
12429           && get_attr_type (insn) != TYPE_BRANCH)
12430         {
12431           *cost = 3;
12432           return false;
12433         }
12434
12435       if (GET_CODE (PATTERN (insn)) == COND_EXEC
12436           || get_attr_conds (insn) == CONDS_USE)
12437         {
12438           *cost = 0;
12439           return false;
12440         }
12441     }
12442
12443   return true;
12444 }
12445
12446 /* Implement TARGET_REGISTER_MOVE_COST.
12447
12448    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12449    it is typically more expensive than a single memory access.  We set
12450    the cost to less than two memory accesses so that floating
12451    point to integer conversion does not go through memory.  */
12452
12453 int
12454 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12455                         reg_class_t from, reg_class_t to)
12456 {
12457   if (TARGET_32BIT)
12458     {
12459       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12460           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12461         return 15;
12462       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12463                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12464         return 4;
12465       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12466         return 20;
12467       else
12468         return 2;
12469     }
12470   else
12471     {
12472       if (from == HI_REGS || to == HI_REGS)
12473         return 4;
12474       else
12475         return 2;
12476     }
12477 }
12478
12479 /* Implement TARGET_MEMORY_MOVE_COST.  */
12480
12481 int
12482 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12483                       bool in ATTRIBUTE_UNUSED)
12484 {
12485   if (TARGET_32BIT)
12486     return 10;
12487   else
12488     {
12489       if (GET_MODE_SIZE (mode) < 4)
12490         return 8;
12491       else
12492         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12493     }
12494 }
12495
12496 /* Vectorizer cost model implementation.  */
12497
12498 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
12499 static int
12500 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12501                                 tree vectype,
12502                                 int misalign ATTRIBUTE_UNUSED)
12503 {
12504   unsigned elements;
12505
12506   switch (type_of_cost)
12507     {
12508       case scalar_stmt:
12509         return current_tune->vec_costs->scalar_stmt_cost;
12510
12511       case scalar_load:
12512         return current_tune->vec_costs->scalar_load_cost;
12513
12514       case scalar_store:
12515         return current_tune->vec_costs->scalar_store_cost;
12516
12517       case vector_stmt:
12518         return current_tune->vec_costs->vec_stmt_cost;
12519
12520       case vector_load:
12521         return current_tune->vec_costs->vec_align_load_cost;
12522
12523       case vector_store:
12524         return current_tune->vec_costs->vec_store_cost;
12525
12526       case vec_to_scalar:
12527         return current_tune->vec_costs->vec_to_scalar_cost;
12528
12529       case scalar_to_vec:
12530         return current_tune->vec_costs->scalar_to_vec_cost;
12531
12532       case unaligned_load:
12533       case vector_gather_load:
12534         return current_tune->vec_costs->vec_unalign_load_cost;
12535
12536       case unaligned_store:
12537       case vector_scatter_store:
12538         return current_tune->vec_costs->vec_unalign_store_cost;
12539
12540       case cond_branch_taken:
12541         return current_tune->vec_costs->cond_taken_branch_cost;
12542
12543       case cond_branch_not_taken:
12544         return current_tune->vec_costs->cond_not_taken_branch_cost;
12545
12546       case vec_perm:
12547       case vec_promote_demote:
12548         return current_tune->vec_costs->vec_stmt_cost;
12549
12550       case vec_construct:
12551         elements = TYPE_VECTOR_SUBPARTS (vectype);
12552         return elements / 2 + 1;
12553
12554       default:
12555         gcc_unreachable ();
12556     }
12557 }
12558
12559 /* Return true if and only if this insn can dual-issue only as older.  */
12560 static bool
12561 cortexa7_older_only (rtx_insn *insn)
12562 {
12563   if (recog_memoized (insn) < 0)
12564     return false;
12565
12566   switch (get_attr_type (insn))
12567     {
12568     case TYPE_ALU_DSP_REG:
12569     case TYPE_ALU_SREG:
12570     case TYPE_ALUS_SREG:
12571     case TYPE_LOGIC_REG:
12572     case TYPE_LOGICS_REG:
12573     case TYPE_ADC_REG:
12574     case TYPE_ADCS_REG:
12575     case TYPE_ADR:
12576     case TYPE_BFM:
12577     case TYPE_REV:
12578     case TYPE_MVN_REG:
12579     case TYPE_SHIFT_IMM:
12580     case TYPE_SHIFT_REG:
12581     case TYPE_LOAD_BYTE:
12582     case TYPE_LOAD_4:
12583     case TYPE_STORE_4:
12584     case TYPE_FFARITHS:
12585     case TYPE_FADDS:
12586     case TYPE_FFARITHD:
12587     case TYPE_FADDD:
12588     case TYPE_FMOV:
12589     case TYPE_F_CVT:
12590     case TYPE_FCMPS:
12591     case TYPE_FCMPD:
12592     case TYPE_FCONSTS:
12593     case TYPE_FCONSTD:
12594     case TYPE_FMULS:
12595     case TYPE_FMACS:
12596     case TYPE_FMULD:
12597     case TYPE_FMACD:
12598     case TYPE_FDIVS:
12599     case TYPE_FDIVD:
12600     case TYPE_F_MRC:
12601     case TYPE_F_MRRC:
12602     case TYPE_F_FLAG:
12603     case TYPE_F_LOADS:
12604     case TYPE_F_STORES:
12605       return true;
12606     default:
12607       return false;
12608     }
12609 }
12610
12611 /* Return true if and only if this insn can dual-issue as younger.  */
12612 static bool
12613 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12614 {
12615   if (recog_memoized (insn) < 0)
12616     {
12617       if (verbose > 5)
12618         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12619       return false;
12620     }
12621
12622   switch (get_attr_type (insn))
12623     {
12624     case TYPE_ALU_IMM:
12625     case TYPE_ALUS_IMM:
12626     case TYPE_LOGIC_IMM:
12627     case TYPE_LOGICS_IMM:
12628     case TYPE_EXTEND:
12629     case TYPE_MVN_IMM:
12630     case TYPE_MOV_IMM:
12631     case TYPE_MOV_REG:
12632     case TYPE_MOV_SHIFT:
12633     case TYPE_MOV_SHIFT_REG:
12634     case TYPE_BRANCH:
12635     case TYPE_CALL:
12636       return true;
12637     default:
12638       return false;
12639     }
12640 }
12641
12642
12643 /* Look for an instruction that can dual issue only as an older
12644    instruction, and move it in front of any instructions that can
12645    dual-issue as younger, while preserving the relative order of all
12646    other instructions in the ready list.  This is a hueuristic to help
12647    dual-issue in later cycles, by postponing issue of more flexible
12648    instructions.  This heuristic may affect dual issue opportunities
12649    in the current cycle.  */
12650 static void
12651 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12652                         int *n_readyp, int clock)
12653 {
12654   int i;
12655   int first_older_only = -1, first_younger = -1;
12656
12657   if (verbose > 5)
12658     fprintf (file,
12659              ";; sched_reorder for cycle %d with %d insns in ready list\n",
12660              clock,
12661              *n_readyp);
12662
12663   /* Traverse the ready list from the head (the instruction to issue
12664      first), and looking for the first instruction that can issue as
12665      younger and the first instruction that can dual-issue only as
12666      older.  */
12667   for (i = *n_readyp - 1; i >= 0; i--)
12668     {
12669       rtx_insn *insn = ready[i];
12670       if (cortexa7_older_only (insn))
12671         {
12672           first_older_only = i;
12673           if (verbose > 5)
12674             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12675           break;
12676         }
12677       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12678         first_younger = i;
12679     }
12680
12681   /* Nothing to reorder because either no younger insn found or insn
12682      that can dual-issue only as older appears before any insn that
12683      can dual-issue as younger.  */
12684   if (first_younger == -1)
12685     {
12686       if (verbose > 5)
12687         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12688       return;
12689     }
12690
12691   /* Nothing to reorder because no older-only insn in the ready list.  */
12692   if (first_older_only == -1)
12693     {
12694       if (verbose > 5)
12695         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12696       return;
12697     }
12698
12699   /* Move first_older_only insn before first_younger.  */
12700   if (verbose > 5)
12701     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12702              INSN_UID(ready [first_older_only]),
12703              INSN_UID(ready [first_younger]));
12704   rtx_insn *first_older_only_insn = ready [first_older_only];
12705   for (i = first_older_only; i < first_younger; i++)
12706     {
12707       ready[i] = ready[i+1];
12708     }
12709
12710   ready[i] = first_older_only_insn;
12711   return;
12712 }
12713
12714 /* Implement TARGET_SCHED_REORDER. */
12715 static int
12716 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12717                    int clock)
12718 {
12719   switch (arm_tune)
12720     {
12721     case TARGET_CPU_cortexa7:
12722       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12723       break;
12724     default:
12725       /* Do nothing for other cores.  */
12726       break;
12727     }
12728
12729   return arm_issue_rate ();
12730 }
12731
12732 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12733    It corrects the value of COST based on the relationship between
12734    INSN and DEP through the dependence LINK.  It returns the new
12735    value. There is a per-core adjust_cost hook to adjust scheduler costs
12736    and the per-core hook can choose to completely override the generic
12737    adjust_cost function. Only put bits of code into arm_adjust_cost that
12738    are common across all cores.  */
12739 static int
12740 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12741                  unsigned int)
12742 {
12743   rtx i_pat, d_pat;
12744
12745  /* When generating Thumb-1 code, we want to place flag-setting operations
12746     close to a conditional branch which depends on them, so that we can
12747     omit the comparison. */
12748   if (TARGET_THUMB1
12749       && dep_type == 0
12750       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12751       && recog_memoized (dep) >= 0
12752       && get_attr_conds (dep) == CONDS_SET)
12753     return 0;
12754
12755   if (current_tune->sched_adjust_cost != NULL)
12756     {
12757       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12758         return cost;
12759     }
12760
12761   /* XXX Is this strictly true?  */
12762   if (dep_type == REG_DEP_ANTI
12763       || dep_type == REG_DEP_OUTPUT)
12764     return 0;
12765
12766   /* Call insns don't incur a stall, even if they follow a load.  */
12767   if (dep_type == 0
12768       && CALL_P (insn))
12769     return 1;
12770
12771   if ((i_pat = single_set (insn)) != NULL
12772       && MEM_P (SET_SRC (i_pat))
12773       && (d_pat = single_set (dep)) != NULL
12774       && MEM_P (SET_DEST (d_pat)))
12775     {
12776       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12777       /* This is a load after a store, there is no conflict if the load reads
12778          from a cached area.  Assume that loads from the stack, and from the
12779          constant pool are cached, and that others will miss.  This is a
12780          hack.  */
12781
12782       if ((SYMBOL_REF_P (src_mem)
12783            && CONSTANT_POOL_ADDRESS_P (src_mem))
12784           || reg_mentioned_p (stack_pointer_rtx, src_mem)
12785           || reg_mentioned_p (frame_pointer_rtx, src_mem)
12786           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12787         return 1;
12788     }
12789
12790   return cost;
12791 }
12792
12793 int
12794 arm_max_conditional_execute (void)
12795 {
12796   return max_insns_skipped;
12797 }
12798
12799 static int
12800 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12801 {
12802   if (TARGET_32BIT)
12803     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12804   else
12805     return (optimize > 0) ? 2 : 0;
12806 }
12807
12808 static int
12809 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12810 {
12811   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12812 }
12813
12814 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12815    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12816    sequences of non-executed instructions in IT blocks probably take the same
12817    amount of time as executed instructions (and the IT instruction itself takes
12818    space in icache).  This function was experimentally determined to give good
12819    results on a popular embedded benchmark.  */
12820
12821 static int
12822 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12823 {
12824   return (TARGET_32BIT && speed_p) ? 1
12825          : arm_default_branch_cost (speed_p, predictable_p);
12826 }
12827
12828 static int
12829 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12830 {
12831   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12832 }
12833
12834 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12835 int
12836 arm_const_double_rtx (rtx x)
12837 {
12838   return (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT
12839           && x == CONST0_RTX (GET_MODE (x)));
12840 }
12841
12842 /* VFPv3 has a fairly wide range of representable immediates, formed from
12843    "quarter-precision" floating-point values. These can be evaluated using this
12844    formula (with ^ for exponentiation):
12845
12846      -1^s * n * 2^-r
12847
12848    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12849    16 <= n <= 31 and 0 <= r <= 7.
12850
12851    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12852
12853      - A (most-significant) is the sign bit.
12854      - BCD are the exponent (encoded as r XOR 3).
12855      - EFGH are the mantissa (encoded as n - 16).
12856 */
12857
12858 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12859    fconst[sd] instruction, or -1 if X isn't suitable.  */
12860 static int
12861 vfp3_const_double_index (rtx x)
12862 {
12863   REAL_VALUE_TYPE r, m;
12864   int sign, exponent;
12865   unsigned HOST_WIDE_INT mantissa, mant_hi;
12866   unsigned HOST_WIDE_INT mask;
12867   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12868   bool fail;
12869
12870   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12871     return -1;
12872
12873   r = *CONST_DOUBLE_REAL_VALUE (x);
12874
12875   /* We can't represent these things, so detect them first.  */
12876   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12877     return -1;
12878
12879   /* Extract sign, exponent and mantissa.  */
12880   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12881   r = real_value_abs (&r);
12882   exponent = REAL_EXP (&r);
12883   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12884      highest (sign) bit, with a fixed binary point at bit point_pos.
12885      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12886      bits for the mantissa, this may fail (low bits would be lost).  */
12887   real_ldexp (&m, &r, point_pos - exponent);
12888   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12889   mantissa = w.elt (0);
12890   mant_hi = w.elt (1);
12891
12892   /* If there are bits set in the low part of the mantissa, we can't
12893      represent this value.  */
12894   if (mantissa != 0)
12895     return -1;
12896
12897   /* Now make it so that mantissa contains the most-significant bits, and move
12898      the point_pos to indicate that the least-significant bits have been
12899      discarded.  */
12900   point_pos -= HOST_BITS_PER_WIDE_INT;
12901   mantissa = mant_hi;
12902
12903   /* We can permit four significant bits of mantissa only, plus a high bit
12904      which is always 1.  */
12905   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12906   if ((mantissa & mask) != 0)
12907     return -1;
12908
12909   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12910   mantissa >>= point_pos - 5;
12911
12912   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12913      floating-point immediate zero with Neon using an integer-zero load, but
12914      that case is handled elsewhere.)  */
12915   if (mantissa == 0)
12916     return -1;
12917
12918   gcc_assert (mantissa >= 16 && mantissa <= 31);
12919
12920   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12921      normalized significands are in the range [1, 2). (Our mantissa is shifted
12922      left 4 places at this point relative to normalized IEEE754 values).  GCC
12923      internally uses [0.5, 1) (see real.cc), so the exponent returned from
12924      REAL_EXP must be altered.  */
12925   exponent = 5 - exponent;
12926
12927   if (exponent < 0 || exponent > 7)
12928     return -1;
12929
12930   /* Sign, mantissa and exponent are now in the correct form to plug into the
12931      formula described in the comment above.  */
12932   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12933 }
12934
12935 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12936 int
12937 vfp3_const_double_rtx (rtx x)
12938 {
12939   if (!TARGET_VFP3)
12940     return 0;
12941
12942   return vfp3_const_double_index (x) != -1;
12943 }
12944
12945 /* Recognize immediates which can be used in various Neon and MVE instructions.
12946    Legal immediates are described by the following table (for VMVN variants, the
12947    bitwise inverse of the constant shown is recognized. In either case, VMOV
12948    is output and the correct instruction to use for a given constant is chosen
12949    by the assembler). The constant shown is replicated across all elements of
12950    the destination vector.
12951
12952    insn elems variant constant (binary)
12953    ---- ----- ------- -----------------
12954    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12955    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12956    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12957    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12958    vmov  i16     4    00000000 abcdefgh
12959    vmov  i16     5    abcdefgh 00000000
12960    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12961    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12962    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12963    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12964    vmvn  i16    10    00000000 abcdefgh
12965    vmvn  i16    11    abcdefgh 00000000
12966    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12967    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12968    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12969    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12970    vmov   i8    16    abcdefgh
12971    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12972                       eeeeeeee ffffffff gggggggg hhhhhhhh
12973    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12974    vmov  f32    19    00000000 00000000 00000000 00000000
12975
12976    For case 18, B = !b. Representable values are exactly those accepted by
12977    vfp3_const_double_index, but are output as floating-point numbers rather
12978    than indices.
12979
12980    For case 19, we will change it to vmov.i32 when assembling.
12981
12982    Variants 0-5 (inclusive) may also be used as immediates for the second
12983    operand of VORR/VBIC instructions.
12984
12985    The INVERSE argument causes the bitwise inverse of the given operand to be
12986    recognized instead (used for recognizing legal immediates for the VAND/VORN
12987    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12988    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12989    output, rather than the real insns vbic/vorr).
12990
12991    INVERSE makes no difference to the recognition of float vectors.
12992
12993    The return value is the variant of immediate as shown in the above table, or
12994    -1 if the given value doesn't match any of the listed patterns.
12995 */
12996 static int
12997 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12998                       rtx *modconst, int *elementwidth)
12999 {
13000 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
13001   matches = 1;                                  \
13002   for (i = 0; i < idx; i += (STRIDE))           \
13003     if (!(TEST))                                \
13004       matches = 0;                              \
13005   if (matches)                                  \
13006     {                                           \
13007       immtype = (CLASS);                        \
13008       elsize = (ELSIZE);                        \
13009       break;                                    \
13010     }
13011
13012   unsigned int i, elsize = 0, idx = 0, n_elts;
13013   unsigned int innersize;
13014   unsigned char bytes[16] = {};
13015   int immtype = -1, matches;
13016   unsigned int invmask = inverse ? 0xff : 0;
13017   bool vector = GET_CODE (op) == CONST_VECTOR;
13018
13019   if (vector)
13020     n_elts = CONST_VECTOR_NUNITS (op);
13021   else
13022     {
13023       n_elts = 1;
13024       gcc_assert (mode != VOIDmode);
13025     }
13026
13027   innersize = GET_MODE_UNIT_SIZE (mode);
13028
13029   /* Only support 128-bit vectors for MVE.  */
13030   if (TARGET_HAVE_MVE
13031       && (!vector
13032           || VALID_MVE_PRED_MODE (mode)
13033           || n_elts * innersize != 16))
13034     return -1;
13035
13036   if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
13037     return -1;
13038
13039   /* Vectors of float constants.  */
13040   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
13041     {
13042       rtx el0 = CONST_VECTOR_ELT (op, 0);
13043
13044       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
13045         return -1;
13046
13047       /* FP16 vectors cannot be represented.  */
13048       if (GET_MODE_INNER (mode) == HFmode)
13049         return -1;
13050
13051       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
13052          are distinct in this context.  */
13053       if (!const_vec_duplicate_p (op))
13054         return -1;
13055
13056       if (modconst)
13057         *modconst = CONST_VECTOR_ELT (op, 0);
13058
13059       if (elementwidth)
13060         *elementwidth = 0;
13061
13062       if (el0 == CONST0_RTX (GET_MODE (el0)))
13063         return 19;
13064       else
13065         return 18;
13066     }
13067
13068   /* The tricks done in the code below apply for little-endian vector layout.
13069      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13070      FIXME: Implement logic for big-endian vectors.  */
13071   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
13072     return -1;
13073
13074   /* Splat vector constant out into a byte vector.  */
13075   for (i = 0; i < n_elts; i++)
13076     {
13077       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
13078       unsigned HOST_WIDE_INT elpart;
13079
13080       gcc_assert (CONST_INT_P (el));
13081       elpart = INTVAL (el);
13082
13083       for (unsigned int byte = 0; byte < innersize; byte++)
13084         {
13085           bytes[idx++] = (elpart & 0xff) ^ invmask;
13086           elpart >>= BITS_PER_UNIT;
13087         }
13088     }
13089
13090   /* Sanity check.  */
13091   gcc_assert (idx == GET_MODE_SIZE (mode));
13092
13093   do
13094     {
13095       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
13096                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13097
13098       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13099                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13100
13101       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
13102                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13103
13104       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
13105                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
13106
13107       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13108
13109       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13110
13111       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13112                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13113
13114       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13115                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13116
13117       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13118                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13119
13120       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13121                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13122
13123       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13124
13125       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13126
13127       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13128                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13129
13130       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13131                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13132
13133       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13134                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13135
13136       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13137                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13138
13139       CHECK (1, 8, 16, bytes[i] == bytes[0]);
13140
13141       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13142                         && bytes[i] == bytes[(i + 8) % idx]);
13143     }
13144   while (0);
13145
13146   if (immtype == -1)
13147     return -1;
13148
13149   if (elementwidth)
13150     *elementwidth = elsize;
13151
13152   if (modconst)
13153     {
13154       unsigned HOST_WIDE_INT imm = 0;
13155
13156       /* Un-invert bytes of recognized vector, if necessary.  */
13157       if (invmask != 0)
13158         for (i = 0; i < idx; i++)
13159           bytes[i] ^= invmask;
13160
13161       if (immtype == 17)
13162         {
13163           /* FIXME: Broken on 32-bit H_W_I hosts.  */
13164           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13165
13166           for (i = 0; i < 8; i++)
13167             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13168                    << (i * BITS_PER_UNIT);
13169
13170           *modconst = GEN_INT (imm);
13171         }
13172       else
13173         {
13174           unsigned HOST_WIDE_INT imm = 0;
13175
13176           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13177             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13178
13179           *modconst = GEN_INT (imm);
13180         }
13181     }
13182
13183   return immtype;
13184 #undef CHECK
13185 }
13186
13187 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13188    implicitly, VMVN) immediate.  Write back width per element to *ELEMENTWIDTH
13189    (or zero for float elements), and a modified constant (whatever should be
13190    output for a VMOV) in *MODCONST.  "neon_immediate_valid_for_move" function is
13191    modified to "simd_immediate_valid_for_move" as this function will be used
13192    both by neon and mve.  */
13193 int
13194 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13195                                rtx *modconst, int *elementwidth)
13196 {
13197   rtx tmpconst;
13198   int tmpwidth;
13199   int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13200
13201   if (retval == -1)
13202     return 0;
13203
13204   if (modconst)
13205     *modconst = tmpconst;
13206
13207   if (elementwidth)
13208     *elementwidth = tmpwidth;
13209
13210   return 1;
13211 }
13212
13213 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
13214    the immediate is valid, write a constant suitable for using as an operand
13215    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13216    *ELEMENTWIDTH.  See simd_valid_immediate for description of INVERSE.  */
13217
13218 int
13219 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13220                                 rtx *modconst, int *elementwidth)
13221 {
13222   rtx tmpconst;
13223   int tmpwidth;
13224   int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13225
13226   if (retval < 0 || retval > 5)
13227     return 0;
13228
13229   if (modconst)
13230     *modconst = tmpconst;
13231
13232   if (elementwidth)
13233     *elementwidth = tmpwidth;
13234
13235   return 1;
13236 }
13237
13238 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
13239    the immediate is valid, write a constant suitable for using as an operand
13240    to VSHR/VSHL to *MODCONST and the corresponding element width to
13241    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13242    because they have different limitations.  */
13243
13244 int
13245 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13246                                 rtx *modconst, int *elementwidth,
13247                                 bool isleftshift)
13248 {
13249   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13250   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13251   unsigned HOST_WIDE_INT last_elt = 0;
13252   unsigned HOST_WIDE_INT maxshift;
13253
13254   /* Split vector constant out into a byte vector.  */
13255   for (i = 0; i < n_elts; i++)
13256     {
13257       rtx el = CONST_VECTOR_ELT (op, i);
13258       unsigned HOST_WIDE_INT elpart;
13259
13260       if (CONST_INT_P (el))
13261         elpart = INTVAL (el);
13262       else if (CONST_DOUBLE_P (el))
13263         return 0;
13264       else
13265         gcc_unreachable ();
13266
13267       if (i != 0 && elpart != last_elt)
13268         return 0;
13269
13270       last_elt = elpart;
13271     }
13272
13273   /* Shift less than element size.  */
13274   maxshift = innersize * 8;
13275
13276   if (isleftshift)
13277     {
13278       /* Left shift immediate value can be from 0 to <size>-1.  */
13279       if (last_elt >= maxshift)
13280         return 0;
13281     }
13282   else
13283     {
13284       /* Right shift immediate value can be from 1 to <size>.  */
13285       if (last_elt == 0 || last_elt > maxshift)
13286         return 0;
13287     }
13288
13289   if (elementwidth)
13290     *elementwidth = innersize * 8;
13291
13292   if (modconst)
13293     *modconst = CONST_VECTOR_ELT (op, 0);
13294
13295   return 1;
13296 }
13297
13298 /* Return a string suitable for output of Neon immediate logic operation
13299    MNEM.  */
13300
13301 char *
13302 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13303                              int inverse, int quad)
13304 {
13305   int width, is_valid;
13306   static char templ[40];
13307
13308   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13309
13310   gcc_assert (is_valid != 0);
13311
13312   if (quad)
13313     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13314   else
13315     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13316
13317   return templ;
13318 }
13319
13320 /* Return a string suitable for output of Neon immediate shift operation
13321    (VSHR or VSHL) MNEM.  */
13322
13323 char *
13324 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13325                              machine_mode mode, int quad,
13326                              bool isleftshift)
13327 {
13328   int width, is_valid;
13329   static char templ[40];
13330
13331   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13332   gcc_assert (is_valid != 0);
13333
13334   if (quad)
13335     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13336   else
13337     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13338
13339   return templ;
13340 }
13341
13342 /* Output a sequence of pairwise operations to implement a reduction.
13343    NOTE: We do "too much work" here, because pairwise operations work on two
13344    registers-worth of operands in one go. Unfortunately we can't exploit those
13345    extra calculations to do the full operation in fewer steps, I don't think.
13346    Although all vector elements of the result but the first are ignored, we
13347    actually calculate the same result in each of the elements. An alternative
13348    such as initially loading a vector with zero to use as each of the second
13349    operands would use up an additional register and take an extra instruction,
13350    for no particular gain.  */
13351
13352 void
13353 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13354                       rtx (*reduc) (rtx, rtx, rtx))
13355 {
13356   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13357   rtx tmpsum = op1;
13358
13359   for (i = parts / 2; i >= 1; i /= 2)
13360     {
13361       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13362       emit_insn (reduc (dest, tmpsum, tmpsum));
13363       tmpsum = dest;
13364     }
13365 }
13366
13367 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13368    loaded into a register using VDUP.
13369
13370    If this is the case, and GENERATE is set, we also generate
13371    instructions to do this and return an RTX to assign to the register.  */
13372
13373 static rtx
13374 neon_vdup_constant (rtx vals, bool generate)
13375 {
13376   machine_mode mode = GET_MODE (vals);
13377   machine_mode inner_mode = GET_MODE_INNER (mode);
13378   rtx x;
13379
13380   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13381     return NULL_RTX;
13382
13383   if (!const_vec_duplicate_p (vals, &x))
13384     /* The elements are not all the same.  We could handle repeating
13385        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13386        {0, C, 0, C, 0, C, 0, C} which can be loaded using
13387        vdup.i16).  */
13388     return NULL_RTX;
13389
13390   if (!generate)
13391     return x;
13392
13393   /* We can load this constant by using VDUP and a constant in a
13394      single ARM register.  This will be cheaper than a vector
13395      load.  */
13396
13397   x = copy_to_mode_reg (inner_mode, x);
13398   return gen_vec_duplicate (mode, x);
13399 }
13400
13401 /* Return a HI representation of CONST_VEC suitable for MVE predicates.  */
13402 rtx
13403 mve_bool_vec_to_const (rtx const_vec)
13404 {
13405   machine_mode mode = GET_MODE (const_vec);
13406
13407   if (!VECTOR_MODE_P (mode))
13408     return const_vec;
13409
13410   unsigned n_elts = GET_MODE_NUNITS (mode);
13411   unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode));
13412   unsigned shift_c = 16 / n_elts;
13413   unsigned i;
13414   int hi_val = 0;
13415
13416   for (i = 0; i < n_elts; i++)
13417     {
13418       rtx el = CONST_VECTOR_ELT (const_vec, i);
13419       unsigned HOST_WIDE_INT elpart;
13420
13421       gcc_assert (CONST_INT_P (el));
13422       elpart = INTVAL (el) & ((1U << el_prec) - 1);
13423
13424       unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i;
13425
13426       hi_val |= elpart << (index * shift_c);
13427     }
13428   /* We are using mov immediate to encode this constant which writes 32-bits
13429      so we need to make sure the top 16-bits are all 0, otherwise we can't
13430      guarantee we can actually write this immediate.  */
13431   return gen_int_mode (hi_val, SImode);
13432 }
13433
13434 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13435    constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13436    into a register.
13437
13438    If this is the case, and GENERATE is set, we also generate code to do
13439    this and return an RTX to copy into the register.  */
13440
13441 rtx
13442 neon_make_constant (rtx vals, bool generate)
13443 {
13444   machine_mode mode = GET_MODE (vals);
13445   rtx target;
13446   rtx const_vec = NULL_RTX;
13447   int n_elts = GET_MODE_NUNITS (mode);
13448   int n_const = 0;
13449   int i;
13450
13451   if (GET_CODE (vals) == CONST_VECTOR)
13452     const_vec = vals;
13453   else if (GET_CODE (vals) == PARALLEL)
13454     {
13455       /* A CONST_VECTOR must contain only CONST_INTs and
13456          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13457          Only store valid constants in a CONST_VECTOR.  */
13458       for (i = 0; i < n_elts; ++i)
13459         {
13460           rtx x = XVECEXP (vals, 0, i);
13461           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13462             n_const++;
13463         }
13464       if (n_const == n_elts)
13465         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13466     }
13467   else
13468     gcc_unreachable ();
13469
13470   if (const_vec != NULL
13471       && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13472     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
13473     return const_vec;
13474   else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode))
13475     return mve_bool_vec_to_const (const_vec);
13476   else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13477     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
13478        pipeline cycle; creating the constant takes one or two ARM
13479        pipeline cycles.  */
13480     return target;
13481   else if (const_vec != NULL_RTX)
13482     /* Load from constant pool.  On Cortex-A8 this takes two cycles
13483        (for either double or quad vectors).  We cannot take advantage
13484        of single-cycle VLD1 because we need a PC-relative addressing
13485        mode.  */
13486     return arm_disable_literal_pool ? NULL_RTX : const_vec;
13487   else
13488     /* A PARALLEL containing something not valid inside CONST_VECTOR.
13489        We cannot construct an initializer.  */
13490     return NULL_RTX;
13491 }
13492
13493 /* Initialize vector TARGET to VALS.  */
13494
13495 void
13496 neon_expand_vector_init (rtx target, rtx vals)
13497 {
13498   machine_mode mode = GET_MODE (target);
13499   machine_mode inner_mode = GET_MODE_INNER (mode);
13500   int n_elts = GET_MODE_NUNITS (mode);
13501   int n_var = 0, one_var = -1;
13502   bool all_same = true;
13503   rtx x, mem;
13504   int i;
13505
13506   for (i = 0; i < n_elts; ++i)
13507     {
13508       x = XVECEXP (vals, 0, i);
13509       if (!CONSTANT_P (x))
13510         ++n_var, one_var = i;
13511
13512       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13513         all_same = false;
13514     }
13515
13516   if (n_var == 0)
13517     {
13518       rtx constant = neon_make_constant (vals);
13519       if (constant != NULL_RTX)
13520         {
13521           emit_move_insn (target, constant);
13522           return;
13523         }
13524     }
13525
13526   /* Splat a single non-constant element if we can.  */
13527   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13528     {
13529       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13530       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13531       return;
13532     }
13533
13534   /* One field is non-constant.  Load constant then overwrite varying
13535      field.  This is more efficient than using the stack.  */
13536   if (n_var == 1)
13537     {
13538       rtx copy = copy_rtx (vals);
13539       rtx merge_mask = GEN_INT (1 << one_var);
13540
13541       /* Load constant part of vector, substitute neighboring value for
13542          varying element.  */
13543       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13544       neon_expand_vector_init (target, copy);
13545
13546       /* Insert variable.  */
13547       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13548       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13549       return;
13550     }
13551
13552   /* Construct the vector in memory one field at a time
13553      and load the whole vector.  */
13554   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13555   for (i = 0; i < n_elts; i++)
13556     emit_move_insn (adjust_address_nv (mem, inner_mode,
13557                                     i * GET_MODE_SIZE (inner_mode)),
13558                     XVECEXP (vals, 0, i));
13559   emit_move_insn (target, mem);
13560 }
13561
13562 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
13563    ERR if it doesn't.  EXP indicates the source location, which includes the
13564    inlining history for intrinsics.  */
13565
13566 static void
13567 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13568               const_tree exp, const char *desc)
13569 {
13570   HOST_WIDE_INT lane;
13571
13572   gcc_assert (CONST_INT_P (operand));
13573
13574   lane = INTVAL (operand);
13575
13576   if (lane < low || lane >= high)
13577     {
13578       if (exp)
13579         error_at (EXPR_LOCATION (exp),
13580                   "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13581       else
13582         error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13583     }
13584 }
13585
13586 /* Bounds-check lanes.  */
13587
13588 void
13589 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13590                   const_tree exp)
13591 {
13592   bounds_check (operand, low, high, exp, "lane");
13593 }
13594
13595 /* Bounds-check constants.  */
13596
13597 void
13598 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13599 {
13600   bounds_check (operand, low, high, NULL_TREE, "constant");
13601 }
13602
13603 HOST_WIDE_INT
13604 neon_element_bits (machine_mode mode)
13605 {
13606   return GET_MODE_UNIT_BITSIZE (mode);
13607 }
13608
13609 \f
13610 /* Predicates for `match_operand' and `match_operator'.  */
13611
13612 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13613    WB level is 2 if full writeback address modes are allowed, 1
13614    if limited writeback address modes (POST_INC and PRE_DEC) are
13615    allowed and 0 if no writeback at all is supported.  */
13616
13617 int
13618 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13619 {
13620   gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13621   rtx ind;
13622
13623   /* Reject eliminable registers.  */
13624   if (! (reload_in_progress || reload_completed || lra_in_progress)
13625       && (   reg_mentioned_p (frame_pointer_rtx, op)
13626           || reg_mentioned_p (arg_pointer_rtx, op)
13627           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13628           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13629           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13630           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13631     return FALSE;
13632
13633   /* Constants are converted into offsets from labels.  */
13634   if (!MEM_P (op))
13635     return FALSE;
13636
13637   ind = XEXP (op, 0);
13638
13639   if (reload_completed
13640       && (LABEL_REF_P (ind)
13641           || (GET_CODE (ind) == CONST
13642               && GET_CODE (XEXP (ind, 0)) == PLUS
13643               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13644               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13645     return TRUE;
13646
13647   /* Match: (mem (reg)).  */
13648   if (REG_P (ind))
13649     return arm_address_register_rtx_p (ind, 0);
13650
13651   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
13652      acceptable in any case (subject to verification by
13653      arm_address_register_rtx_p).  We need full writeback to accept
13654      PRE_INC and POST_DEC, and at least restricted writeback for
13655      PRE_INC and POST_DEC.  */
13656   if (wb_level > 0
13657       && (GET_CODE (ind) == POST_INC
13658           || GET_CODE (ind) == PRE_DEC
13659           || (wb_level > 1
13660               && (GET_CODE (ind) == PRE_INC
13661                   || GET_CODE (ind) == POST_DEC))))
13662     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13663
13664   if (wb_level > 1
13665       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13666       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13667       && GET_CODE (XEXP (ind, 1)) == PLUS
13668       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13669     ind = XEXP (ind, 1);
13670
13671   /* Match:
13672      (plus (reg)
13673            (const))
13674
13675      The encoded immediate for 16-bit modes is multiplied by 2,
13676      while the encoded immediate for 32-bit and 64-bit modes is
13677      multiplied by 4.  */
13678   int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13679   if (GET_CODE (ind) == PLUS
13680       && REG_P (XEXP (ind, 0))
13681       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13682       && CONST_INT_P (XEXP (ind, 1))
13683       && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13684       && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13685     return TRUE;
13686
13687   return FALSE;
13688 }
13689
13690 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13691    WB is true if full writeback address modes are allowed and is false
13692    if limited writeback address modes (POST_INC and PRE_DEC) are
13693    allowed.  */
13694
13695 int arm_coproc_mem_operand (rtx op, bool wb)
13696 {
13697   return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13698 }
13699
13700 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13701    context in which no writeback address modes are allowed.  */
13702
13703 int
13704 arm_coproc_mem_operand_no_writeback (rtx op)
13705 {
13706   return arm_coproc_mem_operand_wb (op, 0);
13707 }
13708
13709 /* In non-STRICT mode, return the register number; in STRICT mode return
13710    the hard regno or the replacement if it won't be a mem.  Otherwise, return
13711    the original pseudo number.  */
13712 static int
13713 arm_effective_regno (rtx op, bool strict)
13714 {
13715   gcc_assert (REG_P (op));
13716   if (!strict || REGNO (op) < FIRST_PSEUDO_REGISTER
13717       || !reg_renumber || reg_renumber[REGNO (op)] < 0)
13718     return REGNO (op);
13719   return reg_renumber[REGNO (op)];
13720 }
13721
13722 /* This function returns TRUE on matching mode and op.
13723 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13724 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
13725 int
13726 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13727 {
13728   enum rtx_code code;
13729   int val, reg_no;
13730
13731   /* Match: (mem (reg)).  */
13732   if (REG_P (op))
13733     {
13734       reg_no = arm_effective_regno (op, strict);
13735       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13736                ? reg_no <= LAST_LO_REGNUM
13737                : reg_no < LAST_ARM_REGNUM)
13738               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13739     }
13740   code = GET_CODE (op);
13741
13742   if ((code == POST_INC
13743        || code == PRE_DEC
13744        || code == PRE_INC
13745        || code == POST_DEC)
13746       && REG_P (XEXP (op, 0)))
13747     {
13748       reg_no = arm_effective_regno (XEXP (op, 0), strict);
13749       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13750                ? reg_no <= LAST_LO_REGNUM
13751                :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13752               || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13753     }
13754   else if (((code == POST_MODIFY || code == PRE_MODIFY)
13755             && GET_CODE (XEXP (op, 1)) == PLUS
13756             && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13757             && REG_P (XEXP (op, 0))
13758             && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13759            /* Make sure to only accept PLUS after reload_completed, otherwise
13760               this will interfere with auto_inc's pattern detection.  */
13761            || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13762                && GET_CODE (XEXP (op, 1)) == CONST_INT))
13763     {
13764       reg_no = arm_effective_regno (XEXP (op, 0), strict);
13765       if (code == PLUS)
13766         val = INTVAL (XEXP (op, 1));
13767       else
13768         val = INTVAL (XEXP(XEXP (op, 1), 1));
13769
13770       switch (mode)
13771         {
13772           case E_V16QImode:
13773           case E_V8QImode:
13774           case E_V4QImode:
13775             if (abs (val) > 127)
13776               return FALSE;
13777             break;
13778           case E_V8HImode:
13779           case E_V8HFmode:
13780           case E_V4HImode:
13781           case E_V4HFmode:
13782             if (val % 2 != 0 || abs (val) > 254)
13783               return FALSE;
13784             break;
13785           case E_V4SImode:
13786           case E_V4SFmode:
13787             if (val % 4 != 0 || abs (val) > 508)
13788               return FALSE;
13789             break;
13790           default:
13791             return FALSE;
13792         }
13793       return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13794               || (MVE_STN_LDW_MODE (mode)
13795                   ? reg_no <= LAST_LO_REGNUM
13796                   : (reg_no < LAST_ARM_REGNUM
13797                      && (code == PLUS || reg_no != SP_REGNUM))));
13798     }
13799   return FALSE;
13800 }
13801
13802 /* Return TRUE if OP is a memory operand which we can load or store a vector
13803    to/from. TYPE is one of the following values:
13804     0 - Vector load/stor (vldr)
13805     1 - Core registers (ldm)
13806     2 - Element/structure loads (vld1)
13807  */
13808 int
13809 neon_vector_mem_operand (rtx op, int type, bool strict)
13810 {
13811   rtx ind;
13812
13813   /* Reject eliminable registers.  */
13814   if (strict && ! (reload_in_progress || reload_completed)
13815       && (reg_mentioned_p (frame_pointer_rtx, op)
13816           || reg_mentioned_p (arg_pointer_rtx, op)
13817           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13818           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13819           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13820           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13821     return FALSE;
13822
13823   /* Constants are converted into offsets from labels.  */
13824   if (!MEM_P (op))
13825     return FALSE;
13826
13827   ind = XEXP (op, 0);
13828
13829   if (reload_completed
13830       && (LABEL_REF_P (ind)
13831           || (GET_CODE (ind) == CONST
13832               && GET_CODE (XEXP (ind, 0)) == PLUS
13833               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13834               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13835     return TRUE;
13836
13837   /* Match: (mem (reg)).  */
13838   if (REG_P (ind))
13839     return arm_address_register_rtx_p (ind, 0);
13840
13841   /* Allow post-increment with Neon registers.  */
13842   if ((type != 1 && GET_CODE (ind) == POST_INC)
13843       || (type == 0 && GET_CODE (ind) == PRE_DEC))
13844     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13845
13846   /* Allow post-increment by register for VLDn */
13847   if (type == 2 && GET_CODE (ind) == POST_MODIFY
13848       && GET_CODE (XEXP (ind, 1)) == PLUS
13849       && REG_P (XEXP (XEXP (ind, 1), 1))
13850       && REG_P (XEXP (ind, 0))
13851       && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13852      return true;
13853
13854   /* Match:
13855      (plus (reg)
13856           (const)).  */
13857   if (type == 0
13858       && GET_CODE (ind) == PLUS
13859       && REG_P (XEXP (ind, 0))
13860       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13861       && CONST_INT_P (XEXP (ind, 1))
13862       && INTVAL (XEXP (ind, 1)) > -1024
13863       /* For quad modes, we restrict the constant offset to be slightly less
13864          than what the instruction format permits.  We have no such constraint
13865          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13866       && (INTVAL (XEXP (ind, 1))
13867           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13868       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13869     return TRUE;
13870
13871   return FALSE;
13872 }
13873
13874 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13875    type.  */
13876 int
13877 mve_struct_mem_operand (rtx op)
13878 {
13879   rtx ind = XEXP (op, 0);
13880
13881   /* Match: (mem (reg)).  */
13882   if (REG_P (ind))
13883     return arm_address_register_rtx_p (ind, 0);
13884
13885   /* Allow only post-increment by the mode size.  */
13886   if (GET_CODE (ind) == POST_INC)
13887     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13888
13889   return FALSE;
13890 }
13891
13892 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13893    type.  */
13894 int
13895 neon_struct_mem_operand (rtx op)
13896 {
13897   rtx ind;
13898
13899   /* Reject eliminable registers.  */
13900   if (! (reload_in_progress || reload_completed)
13901       && (   reg_mentioned_p (frame_pointer_rtx, op)
13902           || reg_mentioned_p (arg_pointer_rtx, op)
13903           || reg_mentioned_p (virtual_incoming_args_rtx, op)
13904           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13905           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13906           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13907     return FALSE;
13908
13909   /* Constants are converted into offsets from labels.  */
13910   if (!MEM_P (op))
13911     return FALSE;
13912
13913   ind = XEXP (op, 0);
13914
13915   if (reload_completed
13916       && (LABEL_REF_P (ind)
13917           || (GET_CODE (ind) == CONST
13918               && GET_CODE (XEXP (ind, 0)) == PLUS
13919               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13920               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13921     return TRUE;
13922
13923   /* Match: (mem (reg)).  */
13924   if (REG_P (ind))
13925     return arm_address_register_rtx_p (ind, 0);
13926
13927   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13928   if (GET_CODE (ind) == POST_INC
13929       || GET_CODE (ind) == PRE_DEC)
13930     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13931
13932   return FALSE;
13933 }
13934
13935 /* Prepares the operands for the VCMLA by lane instruction such that the right
13936    register number is selected.  This instruction is special in that it always
13937    requires a D register, however there is a choice to be made between Dn[0],
13938    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13939
13940    The VCMLA by lane function always selects two values. For instance given D0
13941    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13942    used by the instruction.  However given V4SF then index 0 and 1 are valid as
13943    D0[0] or D1[0] are both valid.
13944
13945    This function centralizes that information based on OPERANDS, OPERANDS[3]
13946    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13947    updated to contain the right index.  */
13948
13949 rtx *
13950 neon_vcmla_lane_prepare_operands (rtx *operands)
13951 {
13952   int lane = INTVAL (operands[4]);
13953   machine_mode constmode = SImode;
13954   machine_mode mode = GET_MODE (operands[3]);
13955   int regno = REGNO (operands[3]);
13956   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13957   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13958     {
13959       operands[3] = gen_int_mode (regno + 1, constmode);
13960       operands[4]
13961         = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13962     }
13963   else
13964     {
13965       operands[3] = gen_int_mode (regno, constmode);
13966       operands[4] = gen_int_mode (lane, constmode);
13967     }
13968   return operands;
13969 }
13970
13971
13972 /* Return true if X is a register that will be eliminated later on.  */
13973 int
13974 arm_eliminable_register (rtx x)
13975 {
13976   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13977                        || REGNO (x) == ARG_POINTER_REGNUM
13978                        || VIRTUAL_REGISTER_P (x));
13979 }
13980
13981 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13982    coprocessor registers.  Otherwise return NO_REGS.  */
13983
13984 enum reg_class
13985 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13986 {
13987   if (mode == HFmode)
13988     {
13989       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13990         return GENERAL_REGS;
13991       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13992         return NO_REGS;
13993       return GENERAL_REGS;
13994     }
13995
13996   /* The neon move patterns handle all legitimate vector and struct
13997      addresses.  */
13998   if (TARGET_NEON
13999       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
14000       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
14001           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
14002           || VALID_NEON_STRUCT_MODE (mode)))
14003     return NO_REGS;
14004
14005   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
14006     return NO_REGS;
14007
14008   return GENERAL_REGS;
14009 }
14010
14011 /* Values which must be returned in the most-significant end of the return
14012    register.  */
14013
14014 static bool
14015 arm_return_in_msb (const_tree valtype)
14016 {
14017   return (TARGET_AAPCS_BASED
14018           && BYTES_BIG_ENDIAN
14019           && (AGGREGATE_TYPE_P (valtype)
14020               || TREE_CODE (valtype) == COMPLEX_TYPE
14021               || FIXED_POINT_TYPE_P (valtype)));
14022 }
14023
14024 /* Return TRUE if X references a SYMBOL_REF.  */
14025 int
14026 symbol_mentioned_p (rtx x)
14027 {
14028   const char * fmt;
14029   int i;
14030
14031   if (SYMBOL_REF_P (x))
14032     return 1;
14033
14034   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
14035      are constant offsets, not symbols.  */
14036   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14037     return 0;
14038
14039   fmt = GET_RTX_FORMAT (GET_CODE (x));
14040
14041   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14042     {
14043       if (fmt[i] == 'E')
14044         {
14045           int j;
14046
14047           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14048             if (symbol_mentioned_p (XVECEXP (x, i, j)))
14049               return 1;
14050         }
14051       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
14052         return 1;
14053     }
14054
14055   return 0;
14056 }
14057
14058 /* Return TRUE if X references a LABEL_REF.  */
14059 int
14060 label_mentioned_p (rtx x)
14061 {
14062   const char * fmt;
14063   int i;
14064
14065   if (LABEL_REF_P (x))
14066     return 1;
14067
14068   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
14069      instruction, but they are constant offsets, not symbols.  */
14070   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14071     return 0;
14072
14073   fmt = GET_RTX_FORMAT (GET_CODE (x));
14074   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14075     {
14076       if (fmt[i] == 'E')
14077         {
14078           int j;
14079
14080           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14081             if (label_mentioned_p (XVECEXP (x, i, j)))
14082               return 1;
14083         }
14084       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
14085         return 1;
14086     }
14087
14088   return 0;
14089 }
14090
14091 int
14092 tls_mentioned_p (rtx x)
14093 {
14094   switch (GET_CODE (x))
14095     {
14096     case CONST:
14097       return tls_mentioned_p (XEXP (x, 0));
14098
14099     case UNSPEC:
14100       if (XINT (x, 1) == UNSPEC_TLS)
14101         return 1;
14102
14103     /* Fall through.  */
14104     default:
14105       return 0;
14106     }
14107 }
14108
14109 /* Must not copy any rtx that uses a pc-relative address.
14110    Also, disallow copying of load-exclusive instructions that
14111    may appear after splitting of compare-and-swap-style operations
14112    so as to prevent those loops from being transformed away from their
14113    canonical forms (see PR 69904).  */
14114
14115 static bool
14116 arm_cannot_copy_insn_p (rtx_insn *insn)
14117 {
14118   /* The tls call insn cannot be copied, as it is paired with a data
14119      word.  */
14120   if (recog_memoized (insn) == CODE_FOR_tlscall)
14121     return true;
14122
14123   subrtx_iterator::array_type array;
14124   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
14125     {
14126       const_rtx x = *iter;
14127       if (GET_CODE (x) == UNSPEC
14128           && (XINT (x, 1) == UNSPEC_PIC_BASE
14129               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
14130         return true;
14131     }
14132
14133   rtx set = single_set (insn);
14134   if (set)
14135     {
14136       rtx src = SET_SRC (set);
14137       if (GET_CODE (src) == ZERO_EXTEND)
14138         src = XEXP (src, 0);
14139
14140       /* Catch the load-exclusive and load-acquire operations.  */
14141       if (GET_CODE (src) == UNSPEC_VOLATILE
14142           && (XINT (src, 1) == VUNSPEC_LL
14143               || XINT (src, 1) == VUNSPEC_LAX))
14144         return true;
14145     }
14146   return false;
14147 }
14148
14149 enum rtx_code
14150 minmax_code (rtx x)
14151 {
14152   enum rtx_code code = GET_CODE (x);
14153
14154   switch (code)
14155     {
14156     case SMAX:
14157       return GE;
14158     case SMIN:
14159       return LE;
14160     case UMIN:
14161       return LEU;
14162     case UMAX:
14163       return GEU;
14164     default:
14165       gcc_unreachable ();
14166     }
14167 }
14168
14169 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
14170
14171 bool
14172 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14173                         int *mask, bool *signed_sat)
14174 {
14175   /* The high bound must be a power of two minus one.  */
14176   int log = exact_log2 (INTVAL (hi_bound) + 1);
14177   if (log == -1)
14178     return false;
14179
14180   /* The low bound is either zero (for usat) or one less than the
14181      negation of the high bound (for ssat).  */
14182   if (INTVAL (lo_bound) == 0)
14183     {
14184       if (mask)
14185         *mask = log;
14186       if (signed_sat)
14187         *signed_sat = false;
14188
14189       return true;
14190     }
14191
14192   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14193     {
14194       if (mask)
14195         *mask = log + 1;
14196       if (signed_sat)
14197         *signed_sat = true;
14198
14199       return true;
14200     }
14201
14202   return false;
14203 }
14204
14205 /* Return 1 if memory locations are adjacent.  */
14206 int
14207 adjacent_mem_locations (rtx a, rtx b)
14208 {
14209   /* We don't guarantee to preserve the order of these memory refs.  */
14210   if (volatile_refs_p (a) || volatile_refs_p (b))
14211     return 0;
14212
14213   if ((REG_P (XEXP (a, 0))
14214        || (GET_CODE (XEXP (a, 0)) == PLUS
14215            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14216       && (REG_P (XEXP (b, 0))
14217           || (GET_CODE (XEXP (b, 0)) == PLUS
14218               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14219     {
14220       HOST_WIDE_INT val0 = 0, val1 = 0;
14221       rtx reg0, reg1;
14222       int val_diff;
14223
14224       if (GET_CODE (XEXP (a, 0)) == PLUS)
14225         {
14226           reg0 = XEXP (XEXP (a, 0), 0);
14227           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14228         }
14229       else
14230         reg0 = XEXP (a, 0);
14231
14232       if (GET_CODE (XEXP (b, 0)) == PLUS)
14233         {
14234           reg1 = XEXP (XEXP (b, 0), 0);
14235           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14236         }
14237       else
14238         reg1 = XEXP (b, 0);
14239
14240       /* Don't accept any offset that will require multiple
14241          instructions to handle, since this would cause the
14242          arith_adjacentmem pattern to output an overlong sequence.  */
14243       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14244         return 0;
14245
14246       /* Don't allow an eliminable register: register elimination can make
14247          the offset too large.  */
14248       if (arm_eliminable_register (reg0))
14249         return 0;
14250
14251       val_diff = val1 - val0;
14252
14253       if (arm_ld_sched)
14254         {
14255           /* If the target has load delay slots, then there's no benefit
14256              to using an ldm instruction unless the offset is zero and
14257              we are optimizing for size.  */
14258           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14259                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14260                   && (val_diff == 4 || val_diff == -4));
14261         }
14262
14263       return ((REGNO (reg0) == REGNO (reg1))
14264               && (val_diff == 4 || val_diff == -4));
14265     }
14266
14267   return 0;
14268 }
14269
14270 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
14271    for load operations, false for store operations.  CONSECUTIVE is true
14272    if the register numbers in the operation must be consecutive in the register
14273    bank. RETURN_PC is true if value is to be loaded in PC.
14274    The pattern we are trying to match for load is:
14275      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14276       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14277        :
14278        :
14279       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14280      ]
14281      where
14282      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14283      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14284      3.  If consecutive is TRUE, then for kth register being loaded,
14285          REGNO (R_dk) = REGNO (R_d0) + k.
14286    The pattern for store is similar.  */
14287 bool
14288 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14289                      bool consecutive, bool return_pc)
14290 {
14291   HOST_WIDE_INT count = XVECLEN (op, 0);
14292   rtx reg, mem, addr;
14293   unsigned regno;
14294   unsigned first_regno;
14295   HOST_WIDE_INT i = 1, base = 0, offset = 0;
14296   rtx elt;
14297   bool addr_reg_in_reglist = false;
14298   bool update = false;
14299   int reg_increment;
14300   int offset_adj;
14301   int regs_per_val;
14302
14303   /* If not in SImode, then registers must be consecutive
14304      (e.g., VLDM instructions for DFmode).  */
14305   gcc_assert ((mode == SImode) || consecutive);
14306   /* Setting return_pc for stores is illegal.  */
14307   gcc_assert (!return_pc || load);
14308
14309   /* Set up the increments and the regs per val based on the mode.  */
14310   reg_increment = GET_MODE_SIZE (mode);
14311   regs_per_val = reg_increment / 4;
14312   offset_adj = return_pc ? 1 : 0;
14313
14314   if (count <= 1
14315       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14316       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14317     return false;
14318
14319   /* Check if this is a write-back.  */
14320   elt = XVECEXP (op, 0, offset_adj);
14321   if (GET_CODE (SET_SRC (elt)) == PLUS)
14322     {
14323       i++;
14324       base = 1;
14325       update = true;
14326
14327       /* The offset adjustment must be the number of registers being
14328          popped times the size of a single register.  */
14329       if (!REG_P (SET_DEST (elt))
14330           || !REG_P (XEXP (SET_SRC (elt), 0))
14331           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14332           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14333           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14334              ((count - 1 - offset_adj) * reg_increment))
14335         return false;
14336     }
14337
14338   i = i + offset_adj;
14339   base = base + offset_adj;
14340   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14341      success depends on the type: VLDM can do just one reg,
14342      LDM must do at least two.  */
14343   if ((count <= i) && (mode == SImode))
14344       return false;
14345
14346   elt = XVECEXP (op, 0, i - 1);
14347   if (GET_CODE (elt) != SET)
14348     return false;
14349
14350   if (load)
14351     {
14352       reg = SET_DEST (elt);
14353       mem = SET_SRC (elt);
14354     }
14355   else
14356     {
14357       reg = SET_SRC (elt);
14358       mem = SET_DEST (elt);
14359     }
14360
14361   if (!REG_P (reg) || !MEM_P (mem))
14362     return false;
14363
14364   regno = REGNO (reg);
14365   first_regno = regno;
14366   addr = XEXP (mem, 0);
14367   if (GET_CODE (addr) == PLUS)
14368     {
14369       if (!CONST_INT_P (XEXP (addr, 1)))
14370         return false;
14371
14372       offset = INTVAL (XEXP (addr, 1));
14373       addr = XEXP (addr, 0);
14374     }
14375
14376   if (!REG_P (addr))
14377     return false;
14378
14379   /* Don't allow SP to be loaded unless it is also the base register. It
14380      guarantees that SP is reset correctly when an LDM instruction
14381      is interrupted. Otherwise, we might end up with a corrupt stack.  */
14382   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14383     return false;
14384
14385   if (regno == REGNO (addr))
14386     addr_reg_in_reglist = true;
14387
14388   for (; i < count; i++)
14389     {
14390       elt = XVECEXP (op, 0, i);
14391       if (GET_CODE (elt) != SET)
14392         return false;
14393
14394       if (load)
14395         {
14396           reg = SET_DEST (elt);
14397           mem = SET_SRC (elt);
14398         }
14399       else
14400         {
14401           reg = SET_SRC (elt);
14402           mem = SET_DEST (elt);
14403         }
14404
14405       if (!REG_P (reg)
14406           || GET_MODE (reg) != mode
14407           || REGNO (reg) <= regno
14408           || (consecutive
14409               && (REGNO (reg) !=
14410                   (unsigned int) (first_regno + regs_per_val * (i - base))))
14411           /* Don't allow SP to be loaded unless it is also the base register. It
14412              guarantees that SP is reset correctly when an LDM instruction
14413              is interrupted. Otherwise, we might end up with a corrupt stack.  */
14414           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14415           || !MEM_P (mem)
14416           || GET_MODE (mem) != mode
14417           || ((GET_CODE (XEXP (mem, 0)) != PLUS
14418                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14419                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14420                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14421                    offset + (i - base) * reg_increment))
14422               && (!REG_P (XEXP (mem, 0))
14423                   || offset + (i - base) * reg_increment != 0)))
14424         return false;
14425
14426       regno = REGNO (reg);
14427       if (regno == REGNO (addr))
14428         addr_reg_in_reglist = true;
14429     }
14430
14431   if (load)
14432     {
14433       if (update && addr_reg_in_reglist)
14434         return false;
14435
14436       /* For Thumb-1, address register is always modified - either by write-back
14437          or by explicit load.  If the pattern does not describe an update,
14438          then the address register must be in the list of loaded registers.  */
14439       if (TARGET_THUMB1)
14440         return update || addr_reg_in_reglist;
14441     }
14442
14443   return true;
14444 }
14445
14446 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14447    or VSCCLRM (otherwise) insn.  To be a valid CLRM pattern, OP must have the
14448    following form:
14449
14450    [(set (reg:SI <N>) (const_int 0))
14451     (set (reg:SI <M>) (const_int 0))
14452     ...
14453     (unspec_volatile [(const_int 0)]
14454                      VUNSPEC_CLRM_APSR)
14455     (clobber (reg:CC CC_REGNUM))
14456    ]
14457
14458    Any number (including 0) of set expressions is valid, the volatile unspec is
14459    optional.  All registers but SP and PC are allowed and registers must be in
14460    strict increasing order.
14461
14462    To be a valid VSCCLRM pattern, OP must have the following form:
14463
14464    [(unspec_volatile [(const_int 0)]
14465                      VUNSPEC_VSCCLRM_VPR)
14466     (set (reg:SF <N>) (const_int 0))
14467     (set (reg:SF <M>) (const_int 0))
14468     ...
14469    ]
14470
14471    As with CLRM, any number (including 0) of set expressions is valid, however
14472    the volatile unspec is mandatory here.  Any VFP single-precision register is
14473    accepted but all registers must be consecutive and in increasing order.  */
14474
14475 bool
14476 clear_operation_p (rtx op, bool vfp)
14477 {
14478   unsigned regno;
14479   unsigned last_regno = INVALID_REGNUM;
14480   rtx elt, reg, zero;
14481   int count = XVECLEN (op, 0);
14482   int first_set = vfp ? 1 : 0;
14483   machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14484
14485   for (int i = first_set; i < count; i++)
14486     {
14487       elt = XVECEXP (op, 0, i);
14488
14489       if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14490         {
14491           if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14492               || XVECLEN (elt, 0) != 1
14493               || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14494               || i != count - 2)
14495             return false;
14496
14497           continue;
14498         }
14499
14500       if (GET_CODE (elt) == CLOBBER)
14501         continue;
14502
14503       if (GET_CODE (elt) != SET)
14504         return false;
14505
14506       reg = SET_DEST (elt);
14507       zero = SET_SRC (elt);
14508
14509       if (!REG_P (reg)
14510           || GET_MODE (reg) != expected_mode
14511           || zero != CONST0_RTX (SImode))
14512         return false;
14513
14514       regno = REGNO (reg);
14515
14516       if (vfp)
14517         {
14518           if (i != first_set && regno != last_regno + 1)
14519             return false;
14520         }
14521       else
14522         {
14523           if (regno == SP_REGNUM || regno == PC_REGNUM)
14524             return false;
14525           if (i != first_set && regno <= last_regno)
14526             return false;
14527         }
14528
14529       last_regno = regno;
14530     }
14531
14532   return true;
14533 }
14534
14535 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14536    or stores (depending on IS_STORE) into a load-multiple or store-multiple
14537    instruction.  ADD_OFFSET is nonzero if the base address register needs
14538    to be modified with an add instruction before we can use it.  */
14539
14540 static bool
14541 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14542                                  int nops, HOST_WIDE_INT add_offset)
14543  {
14544   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14545      if the offset isn't small enough.  The reason 2 ldrs are faster
14546      is because these ARMs are able to do more than one cache access
14547      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
14548      whilst the ARM8 has a double bandwidth cache.  This means that
14549      these cores can do both an instruction fetch and a data fetch in
14550      a single cycle, so the trick of calculating the address into a
14551      scratch register (one of the result regs) and then doing a load
14552      multiple actually becomes slower (and no smaller in code size).
14553      That is the transformation
14554
14555         ldr     rd1, [rbase + offset]
14556         ldr     rd2, [rbase + offset + 4]
14557
14558      to
14559
14560         add     rd1, rbase, offset
14561         ldmia   rd1, {rd1, rd2}
14562
14563      produces worse code -- '3 cycles + any stalls on rd2' instead of
14564      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
14565      access per cycle, the first sequence could never complete in less
14566      than 6 cycles, whereas the ldm sequence would only take 5 and
14567      would make better use of sequential accesses if not hitting the
14568      cache.
14569
14570      We cheat here and test 'arm_ld_sched' which we currently know to
14571      only be true for the ARM8, ARM9 and StrongARM.  If this ever
14572      changes, then the test below needs to be reworked.  */
14573   if (nops == 2 && arm_ld_sched && add_offset != 0)
14574     return false;
14575
14576   /* XScale has load-store double instructions, but they have stricter
14577      alignment requirements than load-store multiple, so we cannot
14578      use them.
14579
14580      For XScale ldm requires 2 + NREGS cycles to complete and blocks
14581      the pipeline until completion.
14582
14583         NREGS           CYCLES
14584           1               3
14585           2               4
14586           3               5
14587           4               6
14588
14589      An ldr instruction takes 1-3 cycles, but does not block the
14590      pipeline.
14591
14592         NREGS           CYCLES
14593           1              1-3
14594           2              2-6
14595           3              3-9
14596           4              4-12
14597
14598      Best case ldr will always win.  However, the more ldr instructions
14599      we issue, the less likely we are to be able to schedule them well.
14600      Using ldr instructions also increases code size.
14601
14602      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14603      for counts of 3 or 4 regs.  */
14604   if (nops <= 2 && arm_tune_xscale && !optimize_size)
14605     return false;
14606   return true;
14607 }
14608
14609 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14610    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14611    an array ORDER which describes the sequence to use when accessing the
14612    offsets that produces an ascending order.  In this sequence, each
14613    offset must be larger by exactly 4 than the previous one.  ORDER[0]
14614    must have been filled in with the lowest offset by the caller.
14615    If UNSORTED_REGS is nonnull, it is an array of register numbers that
14616    we use to verify that ORDER produces an ascending order of registers.
14617    Return true if it was possible to construct such an order, false if
14618    not.  */
14619
14620 static bool
14621 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14622                       int *unsorted_regs)
14623 {
14624   int i;
14625   for (i = 1; i < nops; i++)
14626     {
14627       int j;
14628
14629       order[i] = order[i - 1];
14630       for (j = 0; j < nops; j++)
14631         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14632           {
14633             /* We must find exactly one offset that is higher than the
14634                previous one by 4.  */
14635             if (order[i] != order[i - 1])
14636               return false;
14637             order[i] = j;
14638           }
14639       if (order[i] == order[i - 1])
14640         return false;
14641       /* The register numbers must be ascending.  */
14642       if (unsorted_regs != NULL
14643           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14644         return false;
14645     }
14646   return true;
14647 }
14648
14649 /* Used to determine in a peephole whether a sequence of load
14650    instructions can be changed into a load-multiple instruction.
14651    NOPS is the number of separate load instructions we are examining.  The
14652    first NOPS entries in OPERANDS are the destination registers, the
14653    next NOPS entries are memory operands.  If this function is
14654    successful, *BASE is set to the common base register of the memory
14655    accesses; *LOAD_OFFSET is set to the first memory location's offset
14656    from that base register.
14657    REGS is an array filled in with the destination register numbers.
14658    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14659    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
14660    the sequence of registers in REGS matches the loads from ascending memory
14661    locations, and the function verifies that the register numbers are
14662    themselves ascending.  If CHECK_REGS is false, the register numbers
14663    are stored in the order they are found in the operands.  */
14664 static int
14665 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14666                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14667 {
14668   int unsorted_regs[MAX_LDM_STM_OPS];
14669   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14670   int order[MAX_LDM_STM_OPS];
14671   int base_reg = -1;
14672   int i, ldm_case;
14673
14674   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14675      easily extended if required.  */
14676   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14677
14678   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14679
14680   /* Loop over the operands and check that the memory references are
14681      suitable (i.e. immediate offsets from the same base register).  At
14682      the same time, extract the target register, and the memory
14683      offsets.  */
14684   for (i = 0; i < nops; i++)
14685     {
14686       rtx reg;
14687       rtx offset;
14688
14689       /* Convert a subreg of a mem into the mem itself.  */
14690       if (GET_CODE (operands[nops + i]) == SUBREG)
14691         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14692
14693       gcc_assert (MEM_P (operands[nops + i]));
14694
14695       /* Don't reorder volatile memory references; it doesn't seem worth
14696          looking for the case where the order is ok anyway.  */
14697       if (MEM_VOLATILE_P (operands[nops + i]))
14698         return 0;
14699
14700       offset = const0_rtx;
14701
14702       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14703            || (SUBREG_P (reg)
14704                && REG_P (reg = SUBREG_REG (reg))))
14705           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14706               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14707                   || (SUBREG_P (reg)
14708                       && REG_P (reg = SUBREG_REG (reg))))
14709               && (CONST_INT_P (offset
14710                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14711         {
14712           if (i == 0)
14713             {
14714               base_reg = REGNO (reg);
14715               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14716                 return 0;
14717             }
14718           else if (base_reg != (int) REGNO (reg))
14719             /* Not addressed from the same base register.  */
14720             return 0;
14721
14722           unsorted_regs[i] = (REG_P (operands[i])
14723                               ? REGNO (operands[i])
14724                               : REGNO (SUBREG_REG (operands[i])));
14725
14726           /* If it isn't an integer register, or if it overwrites the
14727              base register but isn't the last insn in the list, then
14728              we can't do this.  */
14729           if (unsorted_regs[i] < 0
14730               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14731               || unsorted_regs[i] > 14
14732               || (i != nops - 1 && unsorted_regs[i] == base_reg))
14733             return 0;
14734
14735           /* Don't allow SP to be loaded unless it is also the base
14736              register.  It guarantees that SP is reset correctly when
14737              an LDM instruction is interrupted.  Otherwise, we might
14738              end up with a corrupt stack.  */
14739           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14740             return 0;
14741
14742           unsorted_offsets[i] = INTVAL (offset);
14743           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14744             order[0] = i;
14745         }
14746       else
14747         /* Not a suitable memory address.  */
14748         return 0;
14749     }
14750
14751   /* All the useful information has now been extracted from the
14752      operands into unsorted_regs and unsorted_offsets; additionally,
14753      order[0] has been set to the lowest offset in the list.  Sort
14754      the offsets into order, verifying that they are adjacent, and
14755      check that the register numbers are ascending.  */
14756   if (!compute_offset_order (nops, unsorted_offsets, order,
14757                              check_regs ? unsorted_regs : NULL))
14758     return 0;
14759
14760   if (saved_order)
14761     memcpy (saved_order, order, sizeof order);
14762
14763   if (base)
14764     {
14765       *base = base_reg;
14766
14767       for (i = 0; i < nops; i++)
14768         regs[i] = unsorted_regs[check_regs ? order[i] : i];
14769
14770       *load_offset = unsorted_offsets[order[0]];
14771     }
14772
14773   if (unsorted_offsets[order[0]] == 0)
14774     ldm_case = 1; /* ldmia */
14775   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14776     ldm_case = 2; /* ldmib */
14777   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14778     ldm_case = 3; /* ldmda */
14779   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14780     ldm_case = 4; /* ldmdb */
14781   else if (const_ok_for_arm (unsorted_offsets[order[0]])
14782            || const_ok_for_arm (-unsorted_offsets[order[0]]))
14783     ldm_case = 5;
14784   else
14785     return 0;
14786
14787   if (!multiple_operation_profitable_p (false, nops,
14788                                         ldm_case == 5
14789                                         ? unsorted_offsets[order[0]] : 0))
14790     return 0;
14791
14792   return ldm_case;
14793 }
14794
14795 /* Used to determine in a peephole whether a sequence of store instructions can
14796    be changed into a store-multiple instruction.
14797    NOPS is the number of separate store instructions we are examining.
14798    NOPS_TOTAL is the total number of instructions recognized by the peephole
14799    pattern.
14800    The first NOPS entries in OPERANDS are the source registers, the next
14801    NOPS entries are memory operands.  If this function is successful, *BASE is
14802    set to the common base register of the memory accesses; *LOAD_OFFSET is set
14803    to the first memory location's offset from that base register.  REGS is an
14804    array filled in with the source register numbers, REG_RTXS (if nonnull) is
14805    likewise filled with the corresponding rtx's.
14806    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14807    numbers to an ascending order of stores.
14808    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14809    from ascending memory locations, and the function verifies that the register
14810    numbers are themselves ascending.  If CHECK_REGS is false, the register
14811    numbers are stored in the order they are found in the operands.  */
14812 static int
14813 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14814                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14815                          HOST_WIDE_INT *load_offset, bool check_regs)
14816 {
14817   int unsorted_regs[MAX_LDM_STM_OPS];
14818   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14819   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14820   int order[MAX_LDM_STM_OPS];
14821   int base_reg = -1;
14822   rtx base_reg_rtx = NULL;
14823   int i, stm_case;
14824
14825   /* Write back of base register is currently only supported for Thumb 1.  */
14826   int base_writeback = TARGET_THUMB1;
14827
14828   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14829      easily extended if required.  */
14830   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14831
14832   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14833
14834   /* Loop over the operands and check that the memory references are
14835      suitable (i.e. immediate offsets from the same base register).  At
14836      the same time, extract the target register, and the memory
14837      offsets.  */
14838   for (i = 0; i < nops; i++)
14839     {
14840       rtx reg;
14841       rtx offset;
14842
14843       /* Convert a subreg of a mem into the mem itself.  */
14844       if (GET_CODE (operands[nops + i]) == SUBREG)
14845         operands[nops + i] = alter_subreg (operands + (nops + i), true);
14846
14847       gcc_assert (MEM_P (operands[nops + i]));
14848
14849       /* Don't reorder volatile memory references; it doesn't seem worth
14850          looking for the case where the order is ok anyway.  */
14851       if (MEM_VOLATILE_P (operands[nops + i]))
14852         return 0;
14853
14854       offset = const0_rtx;
14855
14856       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14857            || (SUBREG_P (reg)
14858                && REG_P (reg = SUBREG_REG (reg))))
14859           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14860               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14861                   || (SUBREG_P (reg)
14862                       && REG_P (reg = SUBREG_REG (reg))))
14863               && (CONST_INT_P (offset
14864                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
14865         {
14866           unsorted_reg_rtxs[i] = (REG_P (operands[i])
14867                                   ? operands[i] : SUBREG_REG (operands[i]));
14868           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14869
14870           if (i == 0)
14871             {
14872               base_reg = REGNO (reg);
14873               base_reg_rtx = reg;
14874               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14875                 return 0;
14876             }
14877           else if (base_reg != (int) REGNO (reg))
14878             /* Not addressed from the same base register.  */
14879             return 0;
14880
14881           /* If it isn't an integer register, then we can't do this.  */
14882           if (unsorted_regs[i] < 0
14883               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14884               /* The effects are unpredictable if the base register is
14885                  both updated and stored.  */
14886               || (base_writeback && unsorted_regs[i] == base_reg)
14887               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14888               || unsorted_regs[i] > 14)
14889             return 0;
14890
14891           unsorted_offsets[i] = INTVAL (offset);
14892           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14893             order[0] = i;
14894         }
14895       else
14896         /* Not a suitable memory address.  */
14897         return 0;
14898     }
14899
14900   /* All the useful information has now been extracted from the
14901      operands into unsorted_regs and unsorted_offsets; additionally,
14902      order[0] has been set to the lowest offset in the list.  Sort
14903      the offsets into order, verifying that they are adjacent, and
14904      check that the register numbers are ascending.  */
14905   if (!compute_offset_order (nops, unsorted_offsets, order,
14906                              check_regs ? unsorted_regs : NULL))
14907     return 0;
14908
14909   if (saved_order)
14910     memcpy (saved_order, order, sizeof order);
14911
14912   if (base)
14913     {
14914       *base = base_reg;
14915
14916       for (i = 0; i < nops; i++)
14917         {
14918           regs[i] = unsorted_regs[check_regs ? order[i] : i];
14919           if (reg_rtxs)
14920             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14921         }
14922
14923       *load_offset = unsorted_offsets[order[0]];
14924     }
14925
14926   if (TARGET_THUMB1
14927       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14928     return 0;
14929
14930   if (unsorted_offsets[order[0]] == 0)
14931     stm_case = 1; /* stmia */
14932   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14933     stm_case = 2; /* stmib */
14934   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14935     stm_case = 3; /* stmda */
14936   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14937     stm_case = 4; /* stmdb */
14938   else
14939     return 0;
14940
14941   if (!multiple_operation_profitable_p (false, nops, 0))
14942     return 0;
14943
14944   return stm_case;
14945 }
14946 \f
14947 /* Routines for use in generating RTL.  */
14948
14949 /* Generate a load-multiple instruction.  COUNT is the number of loads in
14950    the instruction; REGS and MEMS are arrays containing the operands.
14951    BASEREG is the base register to be used in addressing the memory operands.
14952    WBACK_OFFSET is nonzero if the instruction should update the base
14953    register.  */
14954
14955 static rtx
14956 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14957                          HOST_WIDE_INT wback_offset)
14958 {
14959   int i = 0, j;
14960   rtx result;
14961
14962   if (!multiple_operation_profitable_p (false, count, 0))
14963     {
14964       rtx seq;
14965
14966       start_sequence ();
14967
14968       for (i = 0; i < count; i++)
14969         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14970
14971       if (wback_offset != 0)
14972         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14973
14974       seq = get_insns ();
14975       end_sequence ();
14976
14977       return seq;
14978     }
14979
14980   result = gen_rtx_PARALLEL (VOIDmode,
14981                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14982   if (wback_offset != 0)
14983     {
14984       XVECEXP (result, 0, 0)
14985         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14986       i = 1;
14987       count++;
14988     }
14989
14990   for (j = 0; i < count; i++, j++)
14991     XVECEXP (result, 0, i)
14992       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14993
14994   return result;
14995 }
14996
14997 /* Generate a store-multiple instruction.  COUNT is the number of stores in
14998    the instruction; REGS and MEMS are arrays containing the operands.
14999    BASEREG is the base register to be used in addressing the memory operands.
15000    WBACK_OFFSET is nonzero if the instruction should update the base
15001    register.  */
15002
15003 static rtx
15004 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
15005                           HOST_WIDE_INT wback_offset)
15006 {
15007   int i = 0, j;
15008   rtx result;
15009
15010   if (GET_CODE (basereg) == PLUS)
15011     basereg = XEXP (basereg, 0);
15012
15013   if (!multiple_operation_profitable_p (false, count, 0))
15014     {
15015       rtx seq;
15016
15017       start_sequence ();
15018
15019       for (i = 0; i < count; i++)
15020         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
15021
15022       if (wback_offset != 0)
15023         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
15024
15025       seq = get_insns ();
15026       end_sequence ();
15027
15028       return seq;
15029     }
15030
15031   result = gen_rtx_PARALLEL (VOIDmode,
15032                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
15033   if (wback_offset != 0)
15034     {
15035       XVECEXP (result, 0, 0)
15036         = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
15037       i = 1;
15038       count++;
15039     }
15040
15041   for (j = 0; i < count; i++, j++)
15042     XVECEXP (result, 0, i)
15043       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
15044
15045   return result;
15046 }
15047
15048 /* Generate either a load-multiple or a store-multiple instruction.  This
15049    function can be used in situations where we can start with a single MEM
15050    rtx and adjust its address upwards.
15051    COUNT is the number of operations in the instruction, not counting a
15052    possible update of the base register.  REGS is an array containing the
15053    register operands.
15054    BASEREG is the base register to be used in addressing the memory operands,
15055    which are constructed from BASEMEM.
15056    WRITE_BACK specifies whether the generated instruction should include an
15057    update of the base register.
15058    OFFSETP is used to pass an offset to and from this function; this offset
15059    is not used when constructing the address (instead BASEMEM should have an
15060    appropriate offset in its address), it is used only for setting
15061    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
15062
15063 static rtx
15064 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
15065                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
15066 {
15067   rtx mems[MAX_LDM_STM_OPS];
15068   HOST_WIDE_INT offset = *offsetp;
15069   int i;
15070
15071   gcc_assert (count <= MAX_LDM_STM_OPS);
15072
15073   if (GET_CODE (basereg) == PLUS)
15074     basereg = XEXP (basereg, 0);
15075
15076   for (i = 0; i < count; i++)
15077     {
15078       rtx addr = plus_constant (Pmode, basereg, i * 4);
15079       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
15080       offset += 4;
15081     }
15082
15083   if (write_back)
15084     *offsetp = offset;
15085
15086   if (is_load)
15087     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
15088                                     write_back ? 4 * count : 0);
15089   else
15090     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
15091                                      write_back ? 4 * count : 0);
15092 }
15093
15094 rtx
15095 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
15096                        rtx basemem, HOST_WIDE_INT *offsetp)
15097 {
15098   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
15099                               offsetp);
15100 }
15101
15102 rtx
15103 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
15104                         rtx basemem, HOST_WIDE_INT *offsetp)
15105 {
15106   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
15107                               offsetp);
15108 }
15109
15110 /* Called from a peephole2 expander to turn a sequence of loads into an
15111    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
15112    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
15113    is true if we can reorder the registers because they are used commutatively
15114    subsequently.
15115    Returns true iff we could generate a new instruction.  */
15116
15117 bool
15118 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
15119 {
15120   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15121   rtx mems[MAX_LDM_STM_OPS];
15122   int i, j, base_reg;
15123   rtx base_reg_rtx;
15124   HOST_WIDE_INT offset;
15125   int write_back = FALSE;
15126   int ldm_case;
15127   rtx addr;
15128
15129   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
15130                                      &base_reg, &offset, !sort_regs);
15131
15132   if (ldm_case == 0)
15133     return false;
15134
15135   if (sort_regs)
15136     for (i = 0; i < nops - 1; i++)
15137       for (j = i + 1; j < nops; j++)
15138         if (regs[i] > regs[j])
15139           {
15140             int t = regs[i];
15141             regs[i] = regs[j];
15142             regs[j] = t;
15143           }
15144   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15145
15146   if (TARGET_THUMB1)
15147     {
15148       gcc_assert (ldm_case == 1 || ldm_case == 5);
15149
15150       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
15151       write_back = true;
15152       for (i = 0; i < nops; i++)
15153         if (base_reg == regs[i])
15154           write_back = false;
15155
15156       /* Ensure the base is dead if it is updated.  */
15157       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15158         return false;
15159     }
15160
15161   if (ldm_case == 5)
15162     {
15163       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15164       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15165       offset = 0;
15166       base_reg_rtx = newbase;
15167     }
15168
15169   for (i = 0; i < nops; i++)
15170     {
15171       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15172       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15173                                               SImode, addr, 0);
15174     }
15175   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15176                                       write_back ? offset + i * 4 : 0));
15177   return true;
15178 }
15179
15180 /* Called from a peephole2 expander to turn a sequence of stores into an
15181    STM instruction.  OPERANDS are the operands found by the peephole matcher;
15182    NOPS indicates how many separate stores we are trying to combine.
15183    Returns true iff we could generate a new instruction.  */
15184
15185 bool
15186 gen_stm_seq (rtx *operands, int nops)
15187 {
15188   int i;
15189   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15190   rtx mems[MAX_LDM_STM_OPS];
15191   int base_reg;
15192   rtx base_reg_rtx;
15193   HOST_WIDE_INT offset;
15194   int write_back = FALSE;
15195   int stm_case;
15196   rtx addr;
15197   bool base_reg_dies;
15198
15199   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15200                                       mem_order, &base_reg, &offset, true);
15201
15202   if (stm_case == 0)
15203     return false;
15204
15205   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15206
15207   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15208   if (TARGET_THUMB1)
15209     {
15210       gcc_assert (base_reg_dies);
15211       write_back = TRUE;
15212     }
15213
15214   if (stm_case == 5)
15215     {
15216       gcc_assert (base_reg_dies);
15217       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15218       offset = 0;
15219     }
15220
15221   addr = plus_constant (Pmode, base_reg_rtx, offset);
15222
15223   for (i = 0; i < nops; i++)
15224     {
15225       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15226       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15227                                               SImode, addr, 0);
15228     }
15229   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15230                                        write_back ? offset + i * 4 : 0));
15231   return true;
15232 }
15233
15234 /* Called from a peephole2 expander to turn a sequence of stores that are
15235    preceded by constant loads into an STM instruction.  OPERANDS are the
15236    operands found by the peephole matcher; NOPS indicates how many
15237    separate stores we are trying to combine; there are 2 * NOPS
15238    instructions in the peephole.
15239    Returns true iff we could generate a new instruction.  */
15240
15241 bool
15242 gen_const_stm_seq (rtx *operands, int nops)
15243 {
15244   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15245   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15246   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15247   rtx mems[MAX_LDM_STM_OPS];
15248   int base_reg;
15249   rtx base_reg_rtx;
15250   HOST_WIDE_INT offset;
15251   int write_back = FALSE;
15252   int stm_case;
15253   rtx addr;
15254   bool base_reg_dies;
15255   int i, j;
15256   HARD_REG_SET allocated;
15257
15258   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15259                                       mem_order, &base_reg, &offset, false);
15260
15261   if (stm_case == 0)
15262     return false;
15263
15264   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15265
15266   /* If the same register is used more than once, try to find a free
15267      register.  */
15268   CLEAR_HARD_REG_SET (allocated);
15269   for (i = 0; i < nops; i++)
15270     {
15271       for (j = i + 1; j < nops; j++)
15272         if (regs[i] == regs[j])
15273           {
15274             rtx t = peep2_find_free_register (0, nops * 2,
15275                                               TARGET_THUMB1 ? "l" : "r",
15276                                               SImode, &allocated);
15277             if (t == NULL_RTX)
15278               return false;
15279             reg_rtxs[i] = t;
15280             regs[i] = REGNO (t);
15281           }
15282     }
15283
15284   /* Compute an ordering that maps the register numbers to an ascending
15285      sequence.  */
15286   reg_order[0] = 0;
15287   for (i = 0; i < nops; i++)
15288     if (regs[i] < regs[reg_order[0]])
15289       reg_order[0] = i;
15290
15291   for (i = 1; i < nops; i++)
15292     {
15293       int this_order = reg_order[i - 1];
15294       for (j = 0; j < nops; j++)
15295         if (regs[j] > regs[reg_order[i - 1]]
15296             && (this_order == reg_order[i - 1]
15297                 || regs[j] < regs[this_order]))
15298           this_order = j;
15299       reg_order[i] = this_order;
15300     }
15301
15302   /* Ensure that registers that must be live after the instruction end
15303      up with the correct value.  */
15304   for (i = 0; i < nops; i++)
15305     {
15306       int this_order = reg_order[i];
15307       if ((this_order != mem_order[i]
15308            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15309           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15310         return false;
15311     }
15312
15313   /* Load the constants.  */
15314   for (i = 0; i < nops; i++)
15315     {
15316       rtx op = operands[2 * nops + mem_order[i]];
15317       sorted_regs[i] = regs[reg_order[i]];
15318       emit_move_insn (reg_rtxs[reg_order[i]], op);
15319     }
15320
15321   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15322
15323   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15324   if (TARGET_THUMB1)
15325     {
15326       gcc_assert (base_reg_dies);
15327       write_back = TRUE;
15328     }
15329
15330   if (stm_case == 5)
15331     {
15332       gcc_assert (base_reg_dies);
15333       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15334       offset = 0;
15335     }
15336
15337   addr = plus_constant (Pmode, base_reg_rtx, offset);
15338
15339   for (i = 0; i < nops; i++)
15340     {
15341       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15342       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15343                                               SImode, addr, 0);
15344     }
15345   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15346                                        write_back ? offset + i * 4 : 0));
15347   return true;
15348 }
15349
15350 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15351    unaligned copies on processors which support unaligned semantics for those
15352    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
15353    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15354    An interleave factor of 1 (the minimum) will perform no interleaving.
15355    Load/store multiple are used for aligned addresses where possible.  */
15356
15357 static void
15358 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15359                                    HOST_WIDE_INT length,
15360                                    unsigned int interleave_factor)
15361 {
15362   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15363   int *regnos = XALLOCAVEC (int, interleave_factor);
15364   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15365   HOST_WIDE_INT i, j;
15366   HOST_WIDE_INT remaining = length, words;
15367   rtx halfword_tmp = NULL, byte_tmp = NULL;
15368   rtx dst, src;
15369   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15370   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15371   HOST_WIDE_INT srcoffset, dstoffset;
15372   HOST_WIDE_INT src_autoinc, dst_autoinc;
15373   rtx mem, addr;
15374
15375   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15376
15377   /* Use hard registers if we have aligned source or destination so we can use
15378      load/store multiple with contiguous registers.  */
15379   if (dst_aligned || src_aligned)
15380     for (i = 0; i < interleave_factor; i++)
15381       regs[i] = gen_rtx_REG (SImode, i);
15382   else
15383     for (i = 0; i < interleave_factor; i++)
15384       regs[i] = gen_reg_rtx (SImode);
15385
15386   dst = copy_addr_to_reg (XEXP (dstbase, 0));
15387   src = copy_addr_to_reg (XEXP (srcbase, 0));
15388
15389   srcoffset = dstoffset = 0;
15390
15391   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15392      For copying the last bytes we want to subtract this offset again.  */
15393   src_autoinc = dst_autoinc = 0;
15394
15395   for (i = 0; i < interleave_factor; i++)
15396     regnos[i] = i;
15397
15398   /* Copy BLOCK_SIZE_BYTES chunks.  */
15399
15400   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15401     {
15402       /* Load words.  */
15403       if (src_aligned && interleave_factor > 1)
15404         {
15405           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15406                                             TRUE, srcbase, &srcoffset));
15407           src_autoinc += UNITS_PER_WORD * interleave_factor;
15408         }
15409       else
15410         {
15411           for (j = 0; j < interleave_factor; j++)
15412             {
15413               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15414                                                  - src_autoinc));
15415               mem = adjust_automodify_address (srcbase, SImode, addr,
15416                                                srcoffset + j * UNITS_PER_WORD);
15417               emit_insn (gen_unaligned_loadsi (regs[j], mem));
15418             }
15419           srcoffset += block_size_bytes;
15420         }
15421
15422       /* Store words.  */
15423       if (dst_aligned && interleave_factor > 1)
15424         {
15425           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15426                                              TRUE, dstbase, &dstoffset));
15427           dst_autoinc += UNITS_PER_WORD * interleave_factor;
15428         }
15429       else
15430         {
15431           for (j = 0; j < interleave_factor; j++)
15432             {
15433               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15434                                                  - dst_autoinc));
15435               mem = adjust_automodify_address (dstbase, SImode, addr,
15436                                                dstoffset + j * UNITS_PER_WORD);
15437               emit_insn (gen_unaligned_storesi (mem, regs[j]));
15438             }
15439           dstoffset += block_size_bytes;
15440         }
15441
15442       remaining -= block_size_bytes;
15443     }
15444
15445   /* Copy any whole words left (note these aren't interleaved with any
15446      subsequent halfword/byte load/stores in the interests of simplicity).  */
15447
15448   words = remaining / UNITS_PER_WORD;
15449
15450   gcc_assert (words < interleave_factor);
15451
15452   if (src_aligned && words > 1)
15453     {
15454       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15455                                         &srcoffset));
15456       src_autoinc += UNITS_PER_WORD * words;
15457     }
15458   else
15459     {
15460       for (j = 0; j < words; j++)
15461         {
15462           addr = plus_constant (Pmode, src,
15463                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15464           mem = adjust_automodify_address (srcbase, SImode, addr,
15465                                            srcoffset + j * UNITS_PER_WORD);
15466           if (src_aligned)
15467             emit_move_insn (regs[j], mem);
15468           else
15469             emit_insn (gen_unaligned_loadsi (regs[j], mem));
15470         }
15471       srcoffset += words * UNITS_PER_WORD;
15472     }
15473
15474   if (dst_aligned && words > 1)
15475     {
15476       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15477                                          &dstoffset));
15478       dst_autoinc += words * UNITS_PER_WORD;
15479     }
15480   else
15481     {
15482       for (j = 0; j < words; j++)
15483         {
15484           addr = plus_constant (Pmode, dst,
15485                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15486           mem = adjust_automodify_address (dstbase, SImode, addr,
15487                                            dstoffset + j * UNITS_PER_WORD);
15488           if (dst_aligned)
15489             emit_move_insn (mem, regs[j]);
15490           else
15491             emit_insn (gen_unaligned_storesi (mem, regs[j]));
15492         }
15493       dstoffset += words * UNITS_PER_WORD;
15494     }
15495
15496   remaining -= words * UNITS_PER_WORD;
15497
15498   gcc_assert (remaining < 4);
15499
15500   /* Copy a halfword if necessary.  */
15501
15502   if (remaining >= 2)
15503     {
15504       halfword_tmp = gen_reg_rtx (SImode);
15505
15506       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15507       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15508       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15509
15510       /* Either write out immediately, or delay until we've loaded the last
15511          byte, depending on interleave factor.  */
15512       if (interleave_factor == 1)
15513         {
15514           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15515           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15516           emit_insn (gen_unaligned_storehi (mem,
15517                        gen_lowpart (HImode, halfword_tmp)));
15518           halfword_tmp = NULL;
15519           dstoffset += 2;
15520         }
15521
15522       remaining -= 2;
15523       srcoffset += 2;
15524     }
15525
15526   gcc_assert (remaining < 2);
15527
15528   /* Copy last byte.  */
15529
15530   if ((remaining & 1) != 0)
15531     {
15532       byte_tmp = gen_reg_rtx (SImode);
15533
15534       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15535       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15536       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15537
15538       if (interleave_factor == 1)
15539         {
15540           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15541           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15542           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15543           byte_tmp = NULL;
15544           dstoffset++;
15545         }
15546
15547       remaining--;
15548       srcoffset++;
15549     }
15550
15551   /* Store last halfword if we haven't done so already.  */
15552
15553   if (halfword_tmp)
15554     {
15555       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15556       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15557       emit_insn (gen_unaligned_storehi (mem,
15558                    gen_lowpart (HImode, halfword_tmp)));
15559       dstoffset += 2;
15560     }
15561
15562   /* Likewise for last byte.  */
15563
15564   if (byte_tmp)
15565     {
15566       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15567       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15568       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15569       dstoffset++;
15570     }
15571
15572   gcc_assert (remaining == 0 && srcoffset == dstoffset);
15573 }
15574
15575 /* From mips_adjust_block_mem:
15576
15577    Helper function for doing a loop-based block operation on memory
15578    reference MEM.  Each iteration of the loop will operate on LENGTH
15579    bytes of MEM.
15580
15581    Create a new base register for use within the loop and point it to
15582    the start of MEM.  Create a new memory reference that uses this
15583    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
15584
15585 static void
15586 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15587                       rtx *loop_mem)
15588 {
15589   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15590
15591   /* Although the new mem does not refer to a known location,
15592      it does keep up to LENGTH bytes of alignment.  */
15593   *loop_mem = change_address (mem, BLKmode, *loop_reg);
15594   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15595 }
15596
15597 /* From mips_block_move_loop:
15598
15599    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15600    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
15601    the memory regions do not overlap.  */
15602
15603 static void
15604 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15605                                unsigned int interleave_factor,
15606                                HOST_WIDE_INT bytes_per_iter)
15607 {
15608   rtx src_reg, dest_reg, final_src, test;
15609   HOST_WIDE_INT leftover;
15610
15611   leftover = length % bytes_per_iter;
15612   length -= leftover;
15613
15614   /* Create registers and memory references for use within the loop.  */
15615   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15616   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15617
15618   /* Calculate the value that SRC_REG should have after the last iteration of
15619      the loop.  */
15620   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15621                                    0, 0, OPTAB_WIDEN);
15622
15623   /* Emit the start of the loop.  */
15624   rtx_code_label *label = gen_label_rtx ();
15625   emit_label (label);
15626
15627   /* Emit the loop body.  */
15628   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15629                                      interleave_factor);
15630
15631   /* Move on to the next block.  */
15632   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15633   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15634
15635   /* Emit the loop condition.  */
15636   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15637   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15638
15639   /* Mop up any left-over bytes.  */
15640   if (leftover)
15641     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15642 }
15643
15644 /* Emit a block move when either the source or destination is unaligned (not
15645    aligned to a four-byte boundary).  This may need further tuning depending on
15646    core type, optimize_size setting, etc.  */
15647
15648 static int
15649 arm_cpymemqi_unaligned (rtx *operands)
15650 {
15651   HOST_WIDE_INT length = INTVAL (operands[2]);
15652
15653   if (optimize_size)
15654     {
15655       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15656       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15657       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15658          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
15659          or dst_aligned though: allow more interleaving in those cases since the
15660          resulting code can be smaller.  */
15661       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15662       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15663
15664       if (length > 12)
15665         arm_block_move_unaligned_loop (operands[0], operands[1], length,
15666                                        interleave_factor, bytes_per_iter);
15667       else
15668         arm_block_move_unaligned_straight (operands[0], operands[1], length,
15669                                            interleave_factor);
15670     }
15671   else
15672     {
15673       /* Note that the loop created by arm_block_move_unaligned_loop may be
15674          subject to loop unrolling, which makes tuning this condition a little
15675          redundant.  */
15676       if (length > 32)
15677         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15678       else
15679         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15680     }
15681
15682   return 1;
15683 }
15684
15685 int
15686 arm_gen_cpymemqi (rtx *operands)
15687 {
15688   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15689   HOST_WIDE_INT srcoffset, dstoffset;
15690   rtx src, dst, srcbase, dstbase;
15691   rtx part_bytes_reg = NULL;
15692   rtx mem;
15693
15694   if (!CONST_INT_P (operands[2])
15695       || !CONST_INT_P (operands[3])
15696       || INTVAL (operands[2]) > 64)
15697     return 0;
15698
15699   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15700     return arm_cpymemqi_unaligned (operands);
15701
15702   if (INTVAL (operands[3]) & 3)
15703     return 0;
15704
15705   dstbase = operands[0];
15706   srcbase = operands[1];
15707
15708   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15709   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15710
15711   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15712   out_words_to_go = INTVAL (operands[2]) / 4;
15713   last_bytes = INTVAL (operands[2]) & 3;
15714   dstoffset = srcoffset = 0;
15715
15716   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15717     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15718
15719   while (in_words_to_go >= 2)
15720     {
15721       if (in_words_to_go > 4)
15722         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15723                                           TRUE, srcbase, &srcoffset));
15724       else
15725         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15726                                           src, FALSE, srcbase,
15727                                           &srcoffset));
15728
15729       if (out_words_to_go)
15730         {
15731           if (out_words_to_go > 4)
15732             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15733                                                TRUE, dstbase, &dstoffset));
15734           else if (out_words_to_go != 1)
15735             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15736                                                out_words_to_go, dst,
15737                                                (last_bytes == 0
15738                                                 ? FALSE : TRUE),
15739                                                dstbase, &dstoffset));
15740           else
15741             {
15742               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15743               emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15744               if (last_bytes != 0)
15745                 {
15746                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15747                   dstoffset += 4;
15748                 }
15749             }
15750         }
15751
15752       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15753       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15754     }
15755
15756   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
15757   if (out_words_to_go)
15758     {
15759       rtx sreg;
15760
15761       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15762       sreg = copy_to_reg (mem);
15763
15764       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15765       emit_move_insn (mem, sreg);
15766       in_words_to_go--;
15767
15768       gcc_assert (!in_words_to_go);     /* Sanity check */
15769     }
15770
15771   if (in_words_to_go)
15772     {
15773       gcc_assert (in_words_to_go > 0);
15774
15775       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15776       part_bytes_reg = copy_to_mode_reg (SImode, mem);
15777     }
15778
15779   gcc_assert (!last_bytes || part_bytes_reg);
15780
15781   if (BYTES_BIG_ENDIAN && last_bytes)
15782     {
15783       rtx tmp = gen_reg_rtx (SImode);
15784
15785       /* The bytes we want are in the top end of the word.  */
15786       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15787                               GEN_INT (8 * (4 - last_bytes))));
15788       part_bytes_reg = tmp;
15789
15790       while (last_bytes)
15791         {
15792           mem = adjust_automodify_address (dstbase, QImode,
15793                                            plus_constant (Pmode, dst,
15794                                                           last_bytes - 1),
15795                                            dstoffset + last_bytes - 1);
15796           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15797
15798           if (--last_bytes)
15799             {
15800               tmp = gen_reg_rtx (SImode);
15801               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15802               part_bytes_reg = tmp;
15803             }
15804         }
15805
15806     }
15807   else
15808     {
15809       if (last_bytes > 1)
15810         {
15811           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15812           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15813           last_bytes -= 2;
15814           if (last_bytes)
15815             {
15816               rtx tmp = gen_reg_rtx (SImode);
15817               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15818               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15819               part_bytes_reg = tmp;
15820               dstoffset += 2;
15821             }
15822         }
15823
15824       if (last_bytes)
15825         {
15826           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15827           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15828         }
15829     }
15830
15831   return 1;
15832 }
15833
15834 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15835 by mode size.  */
15836 inline static rtx
15837 next_consecutive_mem (rtx mem)
15838 {
15839   machine_mode mode = GET_MODE (mem);
15840   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15841   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15842
15843   return adjust_automodify_address (mem, mode, addr, offset);
15844 }
15845
15846 /* Copy using LDRD/STRD instructions whenever possible.
15847    Returns true upon success. */
15848 bool
15849 gen_cpymem_ldrd_strd (rtx *operands)
15850 {
15851   unsigned HOST_WIDE_INT len;
15852   HOST_WIDE_INT align;
15853   rtx src, dst, base;
15854   rtx reg0;
15855   bool src_aligned, dst_aligned;
15856   bool src_volatile, dst_volatile;
15857
15858   gcc_assert (CONST_INT_P (operands[2]));
15859   gcc_assert (CONST_INT_P (operands[3]));
15860
15861   len = UINTVAL (operands[2]);
15862   if (len > 64)
15863     return false;
15864
15865   /* Maximum alignment we can assume for both src and dst buffers.  */
15866   align = INTVAL (operands[3]);
15867
15868   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15869     return false;
15870
15871   /* Place src and dst addresses in registers
15872      and update the corresponding mem rtx.  */
15873   dst = operands[0];
15874   dst_volatile = MEM_VOLATILE_P (dst);
15875   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15876   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15877   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15878
15879   src = operands[1];
15880   src_volatile = MEM_VOLATILE_P (src);
15881   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15882   base = copy_to_mode_reg (SImode, XEXP (src, 0));
15883   src = adjust_automodify_address (src, VOIDmode, base, 0);
15884
15885   if (!unaligned_access && !(src_aligned && dst_aligned))
15886     return false;
15887
15888   if (src_volatile || dst_volatile)
15889     return false;
15890
15891   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
15892   if (!(dst_aligned || src_aligned))
15893     return arm_gen_cpymemqi (operands);
15894
15895   /* If the either src or dst is unaligned we'll be accessing it as pairs
15896      of unaligned SImode accesses.  Otherwise we can generate DImode
15897      ldrd/strd instructions.  */
15898   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15899   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15900
15901   while (len >= 8)
15902     {
15903       len -= 8;
15904       reg0 = gen_reg_rtx (DImode);
15905       rtx first_reg = NULL_RTX;
15906       rtx second_reg = NULL_RTX;
15907
15908       if (!src_aligned || !dst_aligned)
15909         {
15910           if (BYTES_BIG_ENDIAN)
15911             {
15912               second_reg = gen_lowpart (SImode, reg0);
15913               first_reg = gen_highpart_mode (SImode, DImode, reg0);
15914             }
15915           else
15916             {
15917               first_reg = gen_lowpart (SImode, reg0);
15918               second_reg = gen_highpart_mode (SImode, DImode, reg0);
15919             }
15920         }
15921       if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15922         emit_move_insn (reg0, src);
15923       else if (src_aligned)
15924         emit_insn (gen_unaligned_loaddi (reg0, src));
15925       else
15926         {
15927           emit_insn (gen_unaligned_loadsi (first_reg, src));
15928           src = next_consecutive_mem (src);
15929           emit_insn (gen_unaligned_loadsi (second_reg, src));
15930         }
15931
15932       if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15933         emit_move_insn (dst, reg0);
15934       else if (dst_aligned)
15935         emit_insn (gen_unaligned_storedi (dst, reg0));
15936       else
15937         {
15938           emit_insn (gen_unaligned_storesi (dst, first_reg));
15939           dst = next_consecutive_mem (dst);
15940           emit_insn (gen_unaligned_storesi (dst, second_reg));
15941         }
15942
15943       src = next_consecutive_mem (src);
15944       dst = next_consecutive_mem (dst);
15945     }
15946
15947   gcc_assert (len < 8);
15948   if (len >= 4)
15949     {
15950       /* More than a word but less than a double-word to copy.  Copy a word.  */
15951       reg0 = gen_reg_rtx (SImode);
15952       src = adjust_address (src, SImode, 0);
15953       dst = adjust_address (dst, SImode, 0);
15954       if (src_aligned)
15955         emit_move_insn (reg0, src);
15956       else
15957         emit_insn (gen_unaligned_loadsi (reg0, src));
15958
15959       if (dst_aligned)
15960         emit_move_insn (dst, reg0);
15961       else
15962         emit_insn (gen_unaligned_storesi (dst, reg0));
15963
15964       src = next_consecutive_mem (src);
15965       dst = next_consecutive_mem (dst);
15966       len -= 4;
15967     }
15968
15969   if (len == 0)
15970     return true;
15971
15972   /* Copy the remaining bytes.  */
15973   if (len >= 2)
15974     {
15975       dst = adjust_address (dst, HImode, 0);
15976       src = adjust_address (src, HImode, 0);
15977       reg0 = gen_reg_rtx (SImode);
15978       if (src_aligned)
15979         emit_insn (gen_zero_extendhisi2 (reg0, src));
15980       else
15981         emit_insn (gen_unaligned_loadhiu (reg0, src));
15982
15983       if (dst_aligned)
15984         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15985       else
15986         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15987
15988       src = next_consecutive_mem (src);
15989       dst = next_consecutive_mem (dst);
15990       if (len == 2)
15991         return true;
15992     }
15993
15994   dst = adjust_address (dst, QImode, 0);
15995   src = adjust_address (src, QImode, 0);
15996   reg0 = gen_reg_rtx (QImode);
15997   emit_move_insn (reg0, src);
15998   emit_move_insn (dst, reg0);
15999   return true;
16000 }
16001
16002 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
16003    into its component 32-bit subregs.  OP2 may be an immediate
16004    constant and we want to simplify it in that case.  */
16005 void
16006 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
16007                         rtx *lo_op2, rtx *hi_op2)
16008 {
16009   *lo_op1 = gen_lowpart (SImode, op1);
16010   *hi_op1 = gen_highpart (SImode, op1);
16011   *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
16012                                  subreg_lowpart_offset (SImode, DImode));
16013   *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
16014                                  subreg_highpart_offset (SImode, DImode));
16015 }
16016
16017 /* Select a dominance comparison mode if possible for a test of the general
16018    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
16019    COND_OR == DOM_CC_X_AND_Y => (X && Y)
16020    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
16021    COND_OR == DOM_CC_X_OR_Y => (X || Y)
16022    In all cases OP will be either EQ or NE, but we don't need to know which
16023    here.  If we are unable to support a dominance comparison we return
16024    CC mode.  This will then fail to match for the RTL expressions that
16025    generate this call.  */
16026 machine_mode
16027 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
16028 {
16029   enum rtx_code cond1, cond2;
16030   int swapped = 0;
16031
16032   /* Currently we will probably get the wrong result if the individual
16033      comparisons are not simple.  This also ensures that it is safe to
16034      reverse a comparison if necessary.  */
16035   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
16036        != CCmode)
16037       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
16038           != CCmode))
16039     return CCmode;
16040
16041   /* The if_then_else variant of this tests the second condition if the
16042      first passes, but is true if the first fails.  Reverse the first
16043      condition to get a true "inclusive-or" expression.  */
16044   if (cond_or == DOM_CC_NX_OR_Y)
16045     cond1 = reverse_condition (cond1);
16046
16047   /* If the comparisons are not equal, and one doesn't dominate the other,
16048      then we can't do this.  */
16049   if (cond1 != cond2
16050       && !comparison_dominates_p (cond1, cond2)
16051       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
16052     return CCmode;
16053
16054   if (swapped)
16055     std::swap (cond1, cond2);
16056
16057   switch (cond1)
16058     {
16059     case EQ:
16060       if (cond_or == DOM_CC_X_AND_Y)
16061         return CC_DEQmode;
16062
16063       switch (cond2)
16064         {
16065         case EQ: return CC_DEQmode;
16066         case LE: return CC_DLEmode;
16067         case LEU: return CC_DLEUmode;
16068         case GE: return CC_DGEmode;
16069         case GEU: return CC_DGEUmode;
16070         default: gcc_unreachable ();
16071         }
16072
16073     case LT:
16074       if (cond_or == DOM_CC_X_AND_Y)
16075         return CC_DLTmode;
16076
16077       switch (cond2)
16078         {
16079         case  LT:
16080             return CC_DLTmode;
16081         case LE:
16082           return CC_DLEmode;
16083         case NE:
16084           return CC_DNEmode;
16085         default:
16086           gcc_unreachable ();
16087         }
16088
16089     case GT:
16090       if (cond_or == DOM_CC_X_AND_Y)
16091         return CC_DGTmode;
16092
16093       switch (cond2)
16094         {
16095         case GT:
16096           return CC_DGTmode;
16097         case GE:
16098           return CC_DGEmode;
16099         case NE:
16100           return CC_DNEmode;
16101         default:
16102           gcc_unreachable ();
16103         }
16104
16105     case LTU:
16106       if (cond_or == DOM_CC_X_AND_Y)
16107         return CC_DLTUmode;
16108
16109       switch (cond2)
16110         {
16111         case LTU:
16112           return CC_DLTUmode;
16113         case LEU:
16114           return CC_DLEUmode;
16115         case NE:
16116           return CC_DNEmode;
16117         default:
16118           gcc_unreachable ();
16119         }
16120
16121     case GTU:
16122       if (cond_or == DOM_CC_X_AND_Y)
16123         return CC_DGTUmode;
16124
16125       switch (cond2)
16126         {
16127         case GTU:
16128           return CC_DGTUmode;
16129         case GEU:
16130           return CC_DGEUmode;
16131         case NE:
16132           return CC_DNEmode;
16133         default:
16134           gcc_unreachable ();
16135         }
16136
16137     /* The remaining cases only occur when both comparisons are the
16138        same.  */
16139     case NE:
16140       gcc_assert (cond1 == cond2);
16141       return CC_DNEmode;
16142
16143     case LE:
16144       gcc_assert (cond1 == cond2);
16145       return CC_DLEmode;
16146
16147     case GE:
16148       gcc_assert (cond1 == cond2);
16149       return CC_DGEmode;
16150
16151     case LEU:
16152       gcc_assert (cond1 == cond2);
16153       return CC_DLEUmode;
16154
16155     case GEU:
16156       gcc_assert (cond1 == cond2);
16157       return CC_DGEUmode;
16158
16159     default:
16160       gcc_unreachable ();
16161     }
16162 }
16163
16164 machine_mode
16165 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16166 {
16167   /* All floating point compares return CCFP if it is an equality
16168      comparison, and CCFPE otherwise.  */
16169   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16170     {
16171       switch (op)
16172         {
16173         case EQ:
16174         case NE:
16175         case UNORDERED:
16176         case ORDERED:
16177         case UNLT:
16178         case UNLE:
16179         case UNGT:
16180         case UNGE:
16181         case UNEQ:
16182         case LTGT:
16183           return CCFPmode;
16184
16185         case LT:
16186         case LE:
16187         case GT:
16188         case GE:
16189           return CCFPEmode;
16190
16191         default:
16192           gcc_unreachable ();
16193         }
16194     }
16195
16196   /* A compare with a shifted operand.  Because of canonicalization, the
16197      comparison will have to be swapped when we emit the assembler.  */
16198   if (GET_MODE (y) == SImode
16199       && (REG_P (y) || (SUBREG_P (y)))
16200       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16201           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16202           || GET_CODE (x) == ROTATERT))
16203     return CC_SWPmode;
16204
16205   /* A widened compare of the sum of a value plus a carry against a
16206      constant.  This is a representation of RSC.  We want to swap the
16207      result of the comparison at output.  Not valid if the Z bit is
16208      needed.  */
16209   if (GET_MODE (x) == DImode
16210       && GET_CODE (x) == PLUS
16211       && arm_borrow_operation (XEXP (x, 1), DImode)
16212       && CONST_INT_P (y)
16213       && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16214            && (op == LE || op == GT))
16215           || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16216               && (op == LEU || op == GTU))))
16217     return CC_SWPmode;
16218
16219   /* If X is a constant we want to use CC_RSBmode.  This is
16220      non-canonical, but arm_gen_compare_reg uses this to generate the
16221      correct canonical form.  */
16222   if (GET_MODE (y) == SImode
16223       && (REG_P (y) || SUBREG_P (y))
16224       && CONST_INT_P (x))
16225     return CC_RSBmode;
16226
16227   /* This operation is performed swapped, but since we only rely on the Z
16228      flag we don't need an additional mode.  */
16229   if (GET_MODE (y) == SImode
16230       && (REG_P (y) || (SUBREG_P (y)))
16231       && GET_CODE (x) == NEG
16232       && (op == EQ || op == NE))
16233     return CC_Zmode;
16234
16235   /* This is a special case that is used by combine to allow a
16236      comparison of a shifted byte load to be split into a zero-extend
16237      followed by a comparison of the shifted integer (only valid for
16238      equalities and unsigned inequalities).  */
16239   if (GET_MODE (x) == SImode
16240       && GET_CODE (x) == ASHIFT
16241       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16242       && GET_CODE (XEXP (x, 0)) == SUBREG
16243       && MEM_P (SUBREG_REG (XEXP (x, 0)))
16244       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16245       && (op == EQ || op == NE
16246           || op == GEU || op == GTU || op == LTU || op == LEU)
16247       && CONST_INT_P (y))
16248     return CC_Zmode;
16249
16250   /* A construct for a conditional compare, if the false arm contains
16251      0, then both conditions must be true, otherwise either condition
16252      must be true.  Not all conditions are possible, so CCmode is
16253      returned if it can't be done.  */
16254   if (GET_CODE (x) == IF_THEN_ELSE
16255       && (XEXP (x, 2) == const0_rtx
16256           || XEXP (x, 2) == const1_rtx)
16257       && COMPARISON_P (XEXP (x, 0))
16258       && COMPARISON_P (XEXP (x, 1)))
16259     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16260                                          INTVAL (XEXP (x, 2)));
16261
16262   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
16263   if (GET_CODE (x) == AND
16264       && (op == EQ || op == NE)
16265       && COMPARISON_P (XEXP (x, 0))
16266       && COMPARISON_P (XEXP (x, 1)))
16267     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16268                                          DOM_CC_X_AND_Y);
16269
16270   if (GET_CODE (x) == IOR
16271       && (op == EQ || op == NE)
16272       && COMPARISON_P (XEXP (x, 0))
16273       && COMPARISON_P (XEXP (x, 1)))
16274     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16275                                          DOM_CC_X_OR_Y);
16276
16277   /* An operation (on Thumb) where we want to test for a single bit.
16278      This is done by shifting that bit up into the top bit of a
16279      scratch register; we can then branch on the sign bit.  */
16280   if (TARGET_THUMB1
16281       && GET_MODE (x) == SImode
16282       && (op == EQ || op == NE)
16283       && GET_CODE (x) == ZERO_EXTRACT
16284       && XEXP (x, 1) == const1_rtx)
16285     return CC_Nmode;
16286
16287   /* An operation that sets the condition codes as a side-effect, the
16288      V flag is not set correctly, so we can only use comparisons where
16289      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
16290      instead.)  */
16291   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
16292   if (GET_MODE (x) == SImode
16293       && y == const0_rtx
16294       && (op == EQ || op == NE || op == LT || op == GE)
16295       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16296           || GET_CODE (x) == AND || GET_CODE (x) == IOR
16297           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16298           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16299           || GET_CODE (x) == LSHIFTRT
16300           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16301           || GET_CODE (x) == ROTATERT
16302           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16303     return CC_NZmode;
16304
16305   /* A comparison of ~reg with a const is really a special
16306      canoncialization of compare (~const, reg), which is a reverse
16307      subtract operation.  We may not get here if CONST is 0, but that
16308      doesn't matter because ~0 isn't a valid immediate for RSB.  */
16309   if (GET_MODE (x) == SImode
16310       && GET_CODE (x) == NOT
16311       && CONST_INT_P (y))
16312     return CC_RSBmode;
16313
16314   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16315     return CC_Zmode;
16316
16317   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16318       && GET_CODE (x) == PLUS
16319       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16320     return CC_Cmode;
16321
16322   if (GET_MODE (x) == DImode
16323       && GET_CODE (x) == PLUS
16324       && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16325       && CONST_INT_P (y)
16326       && UINTVAL (y) == 0x800000000
16327       && (op == GEU || op == LTU))
16328     return CC_ADCmode;
16329
16330   if (GET_MODE (x) == DImode
16331       && (op == GE || op == LT)
16332       && GET_CODE (x) == SIGN_EXTEND
16333       && ((GET_CODE (y) == PLUS
16334            && arm_borrow_operation (XEXP (y, 0), DImode))
16335           || arm_borrow_operation (y, DImode)))
16336     return CC_NVmode;
16337
16338   if (GET_MODE (x) == DImode
16339       && (op == GEU || op == LTU)
16340       && GET_CODE (x) == ZERO_EXTEND
16341       && ((GET_CODE (y) == PLUS
16342            && arm_borrow_operation (XEXP (y, 0), DImode))
16343           || arm_borrow_operation (y, DImode)))
16344     return CC_Bmode;
16345
16346   if (GET_MODE (x) == DImode
16347       && (op == EQ || op == NE)
16348       && (GET_CODE (x) == PLUS
16349           || GET_CODE (x) == MINUS)
16350       && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16351           || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16352       && GET_CODE (y) == SIGN_EXTEND
16353       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16354     return CC_Vmode;
16355
16356   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16357     return GET_MODE (x);
16358
16359   return CCmode;
16360 }
16361
16362 /* X and Y are two (DImode) things to compare for the condition CODE.  Emit
16363    the sequence of instructions needed to generate a suitable condition
16364    code register.  Return the CC register result.  */
16365 static rtx
16366 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16367 {
16368   machine_mode mode;
16369   rtx cc_reg;
16370
16371     /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
16372   gcc_assert (TARGET_32BIT);
16373   gcc_assert (!CONST_INT_P (x));
16374
16375   rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16376                                   subreg_lowpart_offset (SImode, DImode));
16377   rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16378                                   subreg_highpart_offset (SImode, DImode));
16379   rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16380                                   subreg_lowpart_offset (SImode, DImode));
16381   rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16382                                   subreg_highpart_offset (SImode, DImode));
16383   switch (code)
16384     {
16385     case EQ:
16386     case NE:
16387       {
16388         if (y_lo == const0_rtx || y_hi == const0_rtx)
16389           {
16390             if (y_lo != const0_rtx)
16391               {
16392                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16393
16394                 gcc_assert (y_hi == const0_rtx);
16395                 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16396                 if (!arm_add_operand (y_lo, SImode))
16397                   y_lo = force_reg (SImode, y_lo);
16398                 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16399                 x_lo = scratch2;
16400               }
16401             else if (y_hi != const0_rtx)
16402               {
16403                 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16404
16405                 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16406                 if (!arm_add_operand (y_hi, SImode))
16407                   y_hi = force_reg (SImode, y_hi);
16408                 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16409                 x_hi = scratch2;
16410               }
16411
16412             if (!scratch)
16413               {
16414                 gcc_assert (!reload_completed);
16415                 scratch = gen_rtx_SCRATCH (SImode);
16416               }
16417
16418             rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16419             cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16420
16421             rtx set
16422               = gen_rtx_SET (cc_reg,
16423                              gen_rtx_COMPARE (CC_NZmode,
16424                                               gen_rtx_IOR (SImode, x_lo, x_hi),
16425                                               const0_rtx));
16426             emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16427                                                               clobber)));
16428             return cc_reg;
16429           }
16430
16431         if (!arm_add_operand (y_lo, SImode))
16432           y_lo = force_reg (SImode, y_lo);
16433
16434         if (!arm_add_operand (y_hi, SImode))
16435           y_hi = force_reg (SImode, y_hi);
16436
16437         rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16438         rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16439         rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16440         mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16441         cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16442
16443         emit_insn (gen_rtx_SET (cc_reg,
16444                                 gen_rtx_COMPARE (mode, conjunction,
16445                                                  const0_rtx)));
16446         return cc_reg;
16447       }
16448
16449     case LT:
16450     case GE:
16451       {
16452         if (y_lo == const0_rtx)
16453           {
16454             /* If the low word of y is 0, then this is simply a normal
16455                compare of the upper words.  */
16456             if (!arm_add_operand (y_hi, SImode))
16457               y_hi = force_reg (SImode, y_hi);
16458
16459             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16460           }
16461
16462         if (!arm_add_operand (y_lo, SImode))
16463           y_lo = force_reg (SImode, y_lo);
16464
16465         rtx cmp1
16466           = gen_rtx_LTU (DImode,
16467                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16468                          const0_rtx);
16469
16470         if (!scratch)
16471           scratch = gen_rtx_SCRATCH (SImode);
16472
16473         if (!arm_not_operand (y_hi, SImode))
16474           y_hi = force_reg (SImode, y_hi);
16475
16476         rtx_insn *insn;
16477         if (y_hi == const0_rtx)
16478           insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16479                                                            cmp1));
16480         else if (CONST_INT_P (y_hi))
16481           insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16482                                                              y_hi, cmp1));
16483         else
16484           insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16485                                                          cmp1));
16486         return SET_DEST (single_set (insn));
16487       }
16488
16489     case LE:
16490     case GT:
16491       {
16492         /* During expansion, we only expect to get here if y is a
16493            constant that we want to handle, otherwise we should have
16494            swapped the operands already.  */
16495         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16496
16497         if (!const_ok_for_arm (INTVAL (y_lo)))
16498           y_lo = force_reg (SImode, y_lo);
16499
16500         /* Perform a reverse subtract and compare.  */
16501         rtx cmp1
16502           = gen_rtx_LTU (DImode,
16503                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16504                          const0_rtx);
16505         rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16506                                                                  x_hi, cmp1));
16507         return SET_DEST (single_set (insn));
16508       }
16509
16510     case LTU:
16511     case GEU:
16512       {
16513         if (y_lo == const0_rtx)
16514           {
16515             /* If the low word of y is 0, then this is simply a normal
16516                compare of the upper words.  */
16517             if (!arm_add_operand (y_hi, SImode))
16518               y_hi = force_reg (SImode, y_hi);
16519
16520             return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16521           }
16522
16523         if (!arm_add_operand (y_lo, SImode))
16524           y_lo = force_reg (SImode, y_lo);
16525
16526         rtx cmp1
16527           = gen_rtx_LTU (DImode,
16528                          arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16529                          const0_rtx);
16530
16531         if (!scratch)
16532           scratch = gen_rtx_SCRATCH (SImode);
16533         if (!arm_not_operand (y_hi, SImode))
16534           y_hi = force_reg (SImode, y_hi);
16535
16536         rtx_insn *insn;
16537         if (y_hi == const0_rtx)
16538           insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16539                                                           cmp1));
16540         else if (CONST_INT_P (y_hi))
16541           {
16542             /* Constant is viewed as unsigned when zero-extended.  */
16543             y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16544             insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16545                                                               y_hi, cmp1));
16546           }
16547         else
16548           insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16549                                                         cmp1));
16550         return SET_DEST (single_set (insn));
16551       }
16552
16553     case LEU:
16554     case GTU:
16555       {
16556         /* During expansion, we only expect to get here if y is a
16557            constant that we want to handle, otherwise we should have
16558            swapped the operands already.  */
16559         gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16560
16561         if (!const_ok_for_arm (INTVAL (y_lo)))
16562           y_lo = force_reg (SImode, y_lo);
16563
16564         /* Perform a reverse subtract and compare.  */
16565         rtx cmp1
16566           = gen_rtx_LTU (DImode,
16567                          arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16568                          const0_rtx);
16569         y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16570         rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16571                                                                 x_hi, cmp1));
16572         return SET_DEST (single_set (insn));
16573       }
16574
16575     default:
16576       gcc_unreachable ();
16577     }
16578 }
16579
16580 /* X and Y are two things to compare using CODE.  Emit the compare insn and
16581    return the rtx for register 0 in the proper mode.  */
16582 rtx
16583 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16584 {
16585   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16586     return arm_gen_dicompare_reg (code, x, y, scratch);
16587
16588   machine_mode mode = SELECT_CC_MODE (code, x, y);
16589   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16590   if (mode == CC_RSBmode)
16591     {
16592       if (!scratch)
16593         scratch = gen_rtx_SCRATCH (SImode);
16594       emit_insn (gen_rsb_imm_compare_scratch (scratch,
16595                                               GEN_INT (~UINTVAL (x)), y));
16596     }
16597   else
16598     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16599
16600   return cc_reg;
16601 }
16602
16603 /* Generate a sequence of insns that will generate the correct return
16604    address mask depending on the physical architecture that the program
16605    is running on.  */
16606 rtx
16607 arm_gen_return_addr_mask (void)
16608 {
16609   rtx reg = gen_reg_rtx (Pmode);
16610
16611   emit_insn (gen_return_addr_mask (reg));
16612   return reg;
16613 }
16614
16615 void
16616 arm_reload_in_hi (rtx *operands)
16617 {
16618   rtx ref = operands[1];
16619   rtx base, scratch;
16620   HOST_WIDE_INT offset = 0;
16621
16622   if (SUBREG_P (ref))
16623     {
16624       offset = SUBREG_BYTE (ref);
16625       ref = SUBREG_REG (ref);
16626     }
16627
16628   if (REG_P (ref))
16629     {
16630       /* We have a pseudo which has been spilt onto the stack; there
16631          are two cases here: the first where there is a simple
16632          stack-slot replacement and a second where the stack-slot is
16633          out of range, or is used as a subreg.  */
16634       if (reg_equiv_mem (REGNO (ref)))
16635         {
16636           ref = reg_equiv_mem (REGNO (ref));
16637           base = find_replacement (&XEXP (ref, 0));
16638         }
16639       else
16640         /* The slot is out of range, or was dressed up in a SUBREG.  */
16641         base = reg_equiv_address (REGNO (ref));
16642
16643       /* PR 62554: If there is no equivalent memory location then just move
16644          the value as an SImode register move.  This happens when the target
16645          architecture variant does not have an HImode register move.  */
16646       if (base == NULL)
16647         {
16648           gcc_assert (REG_P (operands[0]));
16649           emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16650                                 gen_rtx_SUBREG (SImode, ref, 0)));
16651           return;
16652         }
16653     }
16654   else
16655     base = find_replacement (&XEXP (ref, 0));
16656
16657   /* Handle the case where the address is too complex to be offset by 1.  */
16658   if (GET_CODE (base) == MINUS
16659       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16660     {
16661       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16662
16663       emit_set_insn (base_plus, base);
16664       base = base_plus;
16665     }
16666   else if (GET_CODE (base) == PLUS)
16667     {
16668       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16669       HOST_WIDE_INT hi, lo;
16670
16671       offset += INTVAL (XEXP (base, 1));
16672       base = XEXP (base, 0);
16673
16674       /* Rework the address into a legal sequence of insns.  */
16675       /* Valid range for lo is -4095 -> 4095 */
16676       lo = (offset >= 0
16677             ? (offset & 0xfff)
16678             : -((-offset) & 0xfff));
16679
16680       /* Corner case, if lo is the max offset then we would be out of range
16681          once we have added the additional 1 below, so bump the msb into the
16682          pre-loading insn(s).  */
16683       if (lo == 4095)
16684         lo &= 0x7ff;
16685
16686       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16687              ^ (HOST_WIDE_INT) 0x80000000)
16688             - (HOST_WIDE_INT) 0x80000000);
16689
16690       gcc_assert (hi + lo == offset);
16691
16692       if (hi != 0)
16693         {
16694           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16695
16696           /* Get the base address; addsi3 knows how to handle constants
16697              that require more than one insn.  */
16698           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16699           base = base_plus;
16700           offset = lo;
16701         }
16702     }
16703
16704   /* Operands[2] may overlap operands[0] (though it won't overlap
16705      operands[1]), that's why we asked for a DImode reg -- so we can
16706      use the bit that does not overlap.  */
16707   if (REGNO (operands[2]) == REGNO (operands[0]))
16708     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16709   else
16710     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16711
16712   emit_insn (gen_zero_extendqisi2 (scratch,
16713                                    gen_rtx_MEM (QImode,
16714                                                 plus_constant (Pmode, base,
16715                                                                offset))));
16716   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16717                                    gen_rtx_MEM (QImode,
16718                                                 plus_constant (Pmode, base,
16719                                                                offset + 1))));
16720   if (!BYTES_BIG_ENDIAN)
16721     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16722                    gen_rtx_IOR (SImode,
16723                                 gen_rtx_ASHIFT
16724                                 (SImode,
16725                                  gen_rtx_SUBREG (SImode, operands[0], 0),
16726                                  GEN_INT (8)),
16727                                 scratch));
16728   else
16729     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16730                    gen_rtx_IOR (SImode,
16731                                 gen_rtx_ASHIFT (SImode, scratch,
16732                                                 GEN_INT (8)),
16733                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
16734 }
16735
16736 /* Handle storing a half-word to memory during reload by synthesizing as two
16737    byte stores.  Take care not to clobber the input values until after we
16738    have moved them somewhere safe.  This code assumes that if the DImode
16739    scratch in operands[2] overlaps either the input value or output address
16740    in some way, then that value must die in this insn (we absolutely need
16741    two scratch registers for some corner cases).  */
16742 void
16743 arm_reload_out_hi (rtx *operands)
16744 {
16745   rtx ref = operands[0];
16746   rtx outval = operands[1];
16747   rtx base, scratch;
16748   HOST_WIDE_INT offset = 0;
16749
16750   if (SUBREG_P (ref))
16751     {
16752       offset = SUBREG_BYTE (ref);
16753       ref = SUBREG_REG (ref);
16754     }
16755
16756   if (REG_P (ref))
16757     {
16758       /* We have a pseudo which has been spilt onto the stack; there
16759          are two cases here: the first where there is a simple
16760          stack-slot replacement and a second where the stack-slot is
16761          out of range, or is used as a subreg.  */
16762       if (reg_equiv_mem (REGNO (ref)))
16763         {
16764           ref = reg_equiv_mem (REGNO (ref));
16765           base = find_replacement (&XEXP (ref, 0));
16766         }
16767       else
16768         /* The slot is out of range, or was dressed up in a SUBREG.  */
16769         base = reg_equiv_address (REGNO (ref));
16770
16771       /* PR 62254: If there is no equivalent memory location then just move
16772          the value as an SImode register move.  This happens when the target
16773          architecture variant does not have an HImode register move.  */
16774       if (base == NULL)
16775         {
16776           gcc_assert (REG_P (outval) || SUBREG_P (outval));
16777
16778           if (REG_P (outval))
16779             {
16780               emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16781                                     gen_rtx_SUBREG (SImode, outval, 0)));
16782             }
16783           else /* SUBREG_P (outval)  */
16784             {
16785               if (GET_MODE (SUBREG_REG (outval)) == SImode)
16786                 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16787                                       SUBREG_REG (outval)));
16788               else
16789                 /* FIXME: Handle other cases ?  */
16790                 gcc_unreachable ();
16791             }
16792           return;
16793         }
16794     }
16795   else
16796     base = find_replacement (&XEXP (ref, 0));
16797
16798   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16799
16800   /* Handle the case where the address is too complex to be offset by 1.  */
16801   if (GET_CODE (base) == MINUS
16802       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16803     {
16804       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16805
16806       /* Be careful not to destroy OUTVAL.  */
16807       if (reg_overlap_mentioned_p (base_plus, outval))
16808         {
16809           /* Updating base_plus might destroy outval, see if we can
16810              swap the scratch and base_plus.  */
16811           if (!reg_overlap_mentioned_p (scratch, outval))
16812             std::swap (scratch, base_plus);
16813           else
16814             {
16815               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16816
16817               /* Be conservative and copy OUTVAL into the scratch now,
16818                  this should only be necessary if outval is a subreg
16819                  of something larger than a word.  */
16820               /* XXX Might this clobber base?  I can't see how it can,
16821                  since scratch is known to overlap with OUTVAL, and
16822                  must be wider than a word.  */
16823               emit_insn (gen_movhi (scratch_hi, outval));
16824               outval = scratch_hi;
16825             }
16826         }
16827
16828       emit_set_insn (base_plus, base);
16829       base = base_plus;
16830     }
16831   else if (GET_CODE (base) == PLUS)
16832     {
16833       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16834       HOST_WIDE_INT hi, lo;
16835
16836       offset += INTVAL (XEXP (base, 1));
16837       base = XEXP (base, 0);
16838
16839       /* Rework the address into a legal sequence of insns.  */
16840       /* Valid range for lo is -4095 -> 4095 */
16841       lo = (offset >= 0
16842             ? (offset & 0xfff)
16843             : -((-offset) & 0xfff));
16844
16845       /* Corner case, if lo is the max offset then we would be out of range
16846          once we have added the additional 1 below, so bump the msb into the
16847          pre-loading insn(s).  */
16848       if (lo == 4095)
16849         lo &= 0x7ff;
16850
16851       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16852              ^ (HOST_WIDE_INT) 0x80000000)
16853             - (HOST_WIDE_INT) 0x80000000);
16854
16855       gcc_assert (hi + lo == offset);
16856
16857       if (hi != 0)
16858         {
16859           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16860
16861           /* Be careful not to destroy OUTVAL.  */
16862           if (reg_overlap_mentioned_p (base_plus, outval))
16863             {
16864               /* Updating base_plus might destroy outval, see if we
16865                  can swap the scratch and base_plus.  */
16866               if (!reg_overlap_mentioned_p (scratch, outval))
16867                 std::swap (scratch, base_plus);
16868               else
16869                 {
16870                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16871
16872                   /* Be conservative and copy outval into scratch now,
16873                      this should only be necessary if outval is a
16874                      subreg of something larger than a word.  */
16875                   /* XXX Might this clobber base?  I can't see how it
16876                      can, since scratch is known to overlap with
16877                      outval.  */
16878                   emit_insn (gen_movhi (scratch_hi, outval));
16879                   outval = scratch_hi;
16880                 }
16881             }
16882
16883           /* Get the base address; addsi3 knows how to handle constants
16884              that require more than one insn.  */
16885           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16886           base = base_plus;
16887           offset = lo;
16888         }
16889     }
16890
16891   if (BYTES_BIG_ENDIAN)
16892     {
16893       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16894                                          plus_constant (Pmode, base,
16895                                                         offset + 1)),
16896                             gen_lowpart (QImode, outval)));
16897       emit_insn (gen_lshrsi3 (scratch,
16898                               gen_rtx_SUBREG (SImode, outval, 0),
16899                               GEN_INT (8)));
16900       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16901                                                                 offset)),
16902                             gen_lowpart (QImode, scratch)));
16903     }
16904   else
16905     {
16906       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16907                                                                 offset)),
16908                             gen_lowpart (QImode, outval)));
16909       emit_insn (gen_lshrsi3 (scratch,
16910                               gen_rtx_SUBREG (SImode, outval, 0),
16911                               GEN_INT (8)));
16912       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16913                                          plus_constant (Pmode, base,
16914                                                         offset + 1)),
16915                             gen_lowpart (QImode, scratch)));
16916     }
16917 }
16918
16919 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16920    (padded to the size of a word) should be passed in a register.  */
16921
16922 static bool
16923 arm_must_pass_in_stack (const function_arg_info &arg)
16924 {
16925   if (TARGET_AAPCS_BASED)
16926     return must_pass_in_stack_var_size (arg);
16927   else
16928     return must_pass_in_stack_var_size_or_pad (arg);
16929 }
16930
16931
16932 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16933    byte of a stack argument has useful data.  For legacy APCS ABIs we use
16934    the default.  For AAPCS based ABIs small aggregate types are placed
16935    in the lowest memory address.  */
16936
16937 static pad_direction
16938 arm_function_arg_padding (machine_mode mode, const_tree type)
16939 {
16940   if (!TARGET_AAPCS_BASED)
16941     return default_function_arg_padding (mode, type);
16942
16943   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16944     return PAD_DOWNWARD;
16945
16946   return PAD_UPWARD;
16947 }
16948
16949
16950 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16951    Return !BYTES_BIG_ENDIAN if the least significant byte of the
16952    register has useful data, and return the opposite if the most
16953    significant byte does.  */
16954
16955 bool
16956 arm_pad_reg_upward (machine_mode mode,
16957                     tree type, int first ATTRIBUTE_UNUSED)
16958 {
16959   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16960     {
16961       /* For AAPCS, small aggregates, small fixed-point types,
16962          and small complex types are always padded upwards.  */
16963       if (type)
16964         {
16965           if ((AGGREGATE_TYPE_P (type)
16966                || TREE_CODE (type) == COMPLEX_TYPE
16967                || FIXED_POINT_TYPE_P (type))
16968               && int_size_in_bytes (type) <= 4)
16969             return true;
16970         }
16971       else
16972         {
16973           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16974               && GET_MODE_SIZE (mode) <= 4)
16975             return true;
16976         }
16977     }
16978
16979   /* Otherwise, use default padding.  */
16980   return !BYTES_BIG_ENDIAN;
16981 }
16982
16983 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16984    assuming that the address in the base register is word aligned.  */
16985 bool
16986 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16987 {
16988   HOST_WIDE_INT max_offset;
16989
16990   /* Offset must be a multiple of 4 in Thumb mode.  */
16991   if (TARGET_THUMB2 && ((offset & 3) != 0))
16992     return false;
16993
16994   if (TARGET_THUMB2)
16995     max_offset = 1020;
16996   else if (TARGET_ARM)
16997     max_offset = 255;
16998   else
16999     return false;
17000
17001   return ((offset <= max_offset) && (offset >= -max_offset));
17002 }
17003
17004 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
17005    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
17006    Assumes that the address in the base register RN is word aligned.  Pattern
17007    guarantees that both memory accesses use the same base register,
17008    the offsets are constants within the range, and the gap between the offsets is 4.
17009    If preload complete then check that registers are legal.  WBACK indicates whether
17010    address is updated.  LOAD indicates whether memory access is load or store.  */
17011 bool
17012 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
17013                        bool wback, bool load)
17014 {
17015   unsigned int t, t2, n;
17016
17017   if (!reload_completed)
17018     return true;
17019
17020   if (!offset_ok_for_ldrd_strd (offset))
17021     return false;
17022
17023   t = REGNO (rt);
17024   t2 = REGNO (rt2);
17025   n = REGNO (rn);
17026
17027   if ((TARGET_THUMB2)
17028       && ((wback && (n == t || n == t2))
17029           || (t == SP_REGNUM)
17030           || (t == PC_REGNUM)
17031           || (t2 == SP_REGNUM)
17032           || (t2 == PC_REGNUM)
17033           || (!load && (n == PC_REGNUM))
17034           || (load && (t == t2))
17035           /* Triggers Cortex-M3 LDRD errata.  */
17036           || (!wback && load && fix_cm3_ldrd && (n == t))))
17037     return false;
17038
17039   if ((TARGET_ARM)
17040       && ((wback && (n == t || n == t2))
17041           || (t2 == PC_REGNUM)
17042           || (t % 2 != 0)   /* First destination register is not even.  */
17043           || (t2 != t + 1)
17044           /* PC can be used as base register (for offset addressing only),
17045              but it is depricated.  */
17046           || (n == PC_REGNUM)))
17047     return false;
17048
17049   return true;
17050 }
17051
17052 /* Return true if a 64-bit access with alignment ALIGN and with a
17053    constant offset OFFSET from the base pointer is permitted on this
17054    architecture.  */
17055 static bool
17056 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
17057 {
17058   return (unaligned_access
17059           ? (align >= BITS_PER_WORD && (offset & 3) == 0)
17060           : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
17061 }
17062
17063 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
17064    operand MEM's address contains an immediate offset from the base
17065    register and has no side effects, in which case it sets BASE,
17066    OFFSET and ALIGN accordingly.  */
17067 static bool
17068 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
17069 {
17070   rtx addr;
17071
17072   gcc_assert (base != NULL && offset != NULL);
17073
17074   /* TODO: Handle more general memory operand patterns, such as
17075      PRE_DEC and PRE_INC.  */
17076
17077   if (side_effects_p (mem))
17078     return false;
17079
17080   /* Can't deal with subregs.  */
17081   if (SUBREG_P (mem))
17082     return false;
17083
17084   gcc_assert (MEM_P (mem));
17085
17086   *offset = const0_rtx;
17087   *align = MEM_ALIGN (mem);
17088
17089   addr = XEXP (mem, 0);
17090
17091   /* If addr isn't valid for DImode, then we can't handle it.  */
17092   if (!arm_legitimate_address_p (DImode, addr,
17093                                  reload_in_progress || reload_completed))
17094     return false;
17095
17096   if (REG_P (addr))
17097     {
17098       *base = addr;
17099       return true;
17100     }
17101   else if (GET_CODE (addr) == PLUS)
17102     {
17103       *base = XEXP (addr, 0);
17104       *offset = XEXP (addr, 1);
17105       return (REG_P (*base) && CONST_INT_P (*offset));
17106     }
17107
17108   return false;
17109 }
17110
17111 /* Called from a peephole2 to replace two word-size accesses with a
17112    single LDRD/STRD instruction.  Returns true iff we can generate a
17113    new instruction sequence.  That is, both accesses use the same base
17114    register and the gap between constant offsets is 4.  This function
17115    may reorder its operands to match ldrd/strd RTL templates.
17116    OPERANDS are the operands found by the peephole matcher;
17117    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17118    corresponding memory operands.  LOAD indicaates whether the access
17119    is load or store.  CONST_STORE indicates a store of constant
17120    integer values held in OPERANDS[4,5] and assumes that the pattern
17121    is of length 4 insn, for the purpose of checking dead registers.
17122    COMMUTE indicates that register operands may be reordered.  */
17123 bool
17124 gen_operands_ldrd_strd (rtx *operands, bool load,
17125                         bool const_store, bool commute)
17126 {
17127   int nops = 2;
17128   HOST_WIDE_INT offsets[2], offset, align[2];
17129   rtx base = NULL_RTX;
17130   rtx cur_base, cur_offset, tmp;
17131   int i, gap;
17132   HARD_REG_SET regset;
17133
17134   gcc_assert (!const_store || !load);
17135   /* Check that the memory references are immediate offsets from the
17136      same base register.  Extract the base register, the destination
17137      registers, and the corresponding memory offsets.  */
17138   for (i = 0; i < nops; i++)
17139     {
17140       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17141                                  &align[i]))
17142         return false;
17143
17144       if (i == 0)
17145         base = cur_base;
17146       else if (REGNO (base) != REGNO (cur_base))
17147         return false;
17148
17149       offsets[i] = INTVAL (cur_offset);
17150       if (GET_CODE (operands[i]) == SUBREG)
17151         {
17152           tmp = SUBREG_REG (operands[i]);
17153           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17154           operands[i] = tmp;
17155         }
17156     }
17157
17158   /* Make sure there is no dependency between the individual loads.  */
17159   if (load && REGNO (operands[0]) == REGNO (base))
17160     return false; /* RAW */
17161
17162   if (load && REGNO (operands[0]) == REGNO (operands[1]))
17163     return false; /* WAW */
17164
17165   /* If the same input register is used in both stores
17166      when storing different constants, try to find a free register.
17167      For example, the code
17168         mov r0, 0
17169         str r0, [r2]
17170         mov r0, 1
17171         str r0, [r2, #4]
17172      can be transformed into
17173         mov r1, 0
17174         mov r0, 1
17175         strd r1, r0, [r2]
17176      in Thumb mode assuming that r1 is free.
17177      For ARM mode do the same but only if the starting register
17178      can be made to be even.  */
17179   if (const_store
17180       && REGNO (operands[0]) == REGNO (operands[1])
17181       && INTVAL (operands[4]) != INTVAL (operands[5]))
17182     {
17183     if (TARGET_THUMB2)
17184       {
17185         CLEAR_HARD_REG_SET (regset);
17186         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17187         if (tmp == NULL_RTX)
17188           return false;
17189
17190         /* Use the new register in the first load to ensure that
17191            if the original input register is not dead after peephole,
17192            then it will have the correct constant value.  */
17193         operands[0] = tmp;
17194       }
17195     else if (TARGET_ARM)
17196       {
17197         int regno = REGNO (operands[0]);
17198         if (!peep2_reg_dead_p (4, operands[0]))
17199           {
17200             /* When the input register is even and is not dead after the
17201                pattern, it has to hold the second constant but we cannot
17202                form a legal STRD in ARM mode with this register as the second
17203                register.  */
17204             if (regno % 2 == 0)
17205               return false;
17206
17207             /* Is regno-1 free? */
17208             SET_HARD_REG_SET (regset);
17209             CLEAR_HARD_REG_BIT(regset, regno - 1);
17210             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17211             if (tmp == NULL_RTX)
17212               return false;
17213
17214             operands[0] = tmp;
17215           }
17216         else
17217           {
17218             /* Find a DImode register.  */
17219             CLEAR_HARD_REG_SET (regset);
17220             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17221             if (tmp != NULL_RTX)
17222               {
17223                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17224                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17225               }
17226             else
17227               {
17228                 /* Can we use the input register to form a DI register?  */
17229                 SET_HARD_REG_SET (regset);
17230                 CLEAR_HARD_REG_BIT(regset,
17231                                    regno % 2 == 0 ? regno + 1 : regno - 1);
17232                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17233                 if (tmp == NULL_RTX)
17234                   return false;
17235                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17236               }
17237           }
17238
17239         gcc_assert (operands[0] != NULL_RTX);
17240         gcc_assert (operands[1] != NULL_RTX);
17241         gcc_assert (REGNO (operands[0]) % 2 == 0);
17242         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17243       }
17244     }
17245
17246   /* Make sure the instructions are ordered with lower memory access first.  */
17247   if (offsets[0] > offsets[1])
17248     {
17249       gap = offsets[0] - offsets[1];
17250       offset = offsets[1];
17251
17252       /* Swap the instructions such that lower memory is accessed first.  */
17253       std::swap (operands[0], operands[1]);
17254       std::swap (operands[2], operands[3]);
17255       std::swap (align[0], align[1]);
17256       if (const_store)
17257         std::swap (operands[4], operands[5]);
17258     }
17259   else
17260     {
17261       gap = offsets[1] - offsets[0];
17262       offset = offsets[0];
17263     }
17264
17265   /* Make sure accesses are to consecutive memory locations.  */
17266   if (gap != GET_MODE_SIZE (SImode))
17267     return false;
17268
17269   if (!align_ok_ldrd_strd (align[0], offset))
17270     return false;
17271
17272   /* Make sure we generate legal instructions.  */
17273   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17274                              false, load))
17275     return true;
17276
17277   /* In Thumb state, where registers are almost unconstrained, there
17278      is little hope to fix it.  */
17279   if (TARGET_THUMB2)
17280     return false;
17281
17282   if (load && commute)
17283     {
17284       /* Try reordering registers.  */
17285       std::swap (operands[0], operands[1]);
17286       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17287                                  false, load))
17288         return true;
17289     }
17290
17291   if (const_store)
17292     {
17293       /* If input registers are dead after this pattern, they can be
17294          reordered or replaced by other registers that are free in the
17295          current pattern.  */
17296       if (!peep2_reg_dead_p (4, operands[0])
17297           || !peep2_reg_dead_p (4, operands[1]))
17298         return false;
17299
17300       /* Try to reorder the input registers.  */
17301       /* For example, the code
17302            mov r0, 0
17303            mov r1, 1
17304            str r1, [r2]
17305            str r0, [r2, #4]
17306          can be transformed into
17307            mov r1, 0
17308            mov r0, 1
17309            strd r0, [r2]
17310       */
17311       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17312                                   false, false))
17313         {
17314           std::swap (operands[0], operands[1]);
17315           return true;
17316         }
17317
17318       /* Try to find a free DI register.  */
17319       CLEAR_HARD_REG_SET (regset);
17320       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17321       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17322       while (true)
17323         {
17324           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17325           if (tmp == NULL_RTX)
17326             return false;
17327
17328           /* DREG must be an even-numbered register in DImode.
17329              Split it into SI registers.  */
17330           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17331           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17332           gcc_assert (operands[0] != NULL_RTX);
17333           gcc_assert (operands[1] != NULL_RTX);
17334           gcc_assert (REGNO (operands[0]) % 2 == 0);
17335           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17336
17337           return (operands_ok_ldrd_strd (operands[0], operands[1],
17338                                          base, offset,
17339                                          false, load));
17340         }
17341     }
17342
17343   return false;
17344 }
17345
17346
17347 /* Return true if parallel execution of the two word-size accesses provided
17348    could be satisfied with a single LDRD/STRD instruction.  Two word-size
17349    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17350    register operands and OPERANDS[2,3] are the corresponding memory operands.
17351    */
17352 bool
17353 valid_operands_ldrd_strd (rtx *operands, bool load)
17354 {
17355   int nops = 2;
17356   HOST_WIDE_INT offsets[2], offset, align[2];
17357   rtx base = NULL_RTX;
17358   rtx cur_base, cur_offset;
17359   int i, gap;
17360
17361   /* Check that the memory references are immediate offsets from the
17362      same base register.  Extract the base register, the destination
17363      registers, and the corresponding memory offsets.  */
17364   for (i = 0; i < nops; i++)
17365     {
17366       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17367                                  &align[i]))
17368         return false;
17369
17370       if (i == 0)
17371         base = cur_base;
17372       else if (REGNO (base) != REGNO (cur_base))
17373         return false;
17374
17375       offsets[i] = INTVAL (cur_offset);
17376       if (GET_CODE (operands[i]) == SUBREG)
17377         return false;
17378     }
17379
17380   if (offsets[0] > offsets[1])
17381     return false;
17382
17383   gap = offsets[1] - offsets[0];
17384   offset = offsets[0];
17385
17386   /* Make sure accesses are to consecutive memory locations.  */
17387   if (gap != GET_MODE_SIZE (SImode))
17388     return false;
17389
17390   if (!align_ok_ldrd_strd (align[0], offset))
17391     return false;
17392
17393   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17394                                 false, load);
17395 }
17396
17397 \f
17398 /* Print a symbolic form of X to the debug file, F.  */
17399 static void
17400 arm_print_value (FILE *f, rtx x)
17401 {
17402   switch (GET_CODE (x))
17403     {
17404     case CONST_INT:
17405       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17406       return;
17407
17408     case CONST_DOUBLE:
17409       {
17410         char fpstr[20];
17411         real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17412                          sizeof (fpstr), 0, 1);
17413         fputs (fpstr, f);
17414       }
17415       return;
17416
17417     case CONST_VECTOR:
17418       {
17419         int i;
17420
17421         fprintf (f, "<");
17422         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17423           {
17424             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17425             if (i < (CONST_VECTOR_NUNITS (x) - 1))
17426               fputc (',', f);
17427           }
17428         fprintf (f, ">");
17429       }
17430       return;
17431
17432     case CONST_STRING:
17433       fprintf (f, "\"%s\"", XSTR (x, 0));
17434       return;
17435
17436     case SYMBOL_REF:
17437       fprintf (f, "`%s'", XSTR (x, 0));
17438       return;
17439
17440     case LABEL_REF:
17441       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17442       return;
17443
17444     case CONST:
17445       arm_print_value (f, XEXP (x, 0));
17446       return;
17447
17448     case PLUS:
17449       arm_print_value (f, XEXP (x, 0));
17450       fprintf (f, "+");
17451       arm_print_value (f, XEXP (x, 1));
17452       return;
17453
17454     case PC:
17455       fprintf (f, "pc");
17456       return;
17457
17458     default:
17459       fprintf (f, "????");
17460       return;
17461     }
17462 }
17463 \f
17464 /* Routines for manipulation of the constant pool.  */
17465
17466 /* Arm instructions cannot load a large constant directly into a
17467    register; they have to come from a pc relative load.  The constant
17468    must therefore be placed in the addressable range of the pc
17469    relative load.  Depending on the precise pc relative load
17470    instruction the range is somewhere between 256 bytes and 4k.  This
17471    means that we often have to dump a constant inside a function, and
17472    generate code to branch around it.
17473
17474    It is important to minimize this, since the branches will slow
17475    things down and make the code larger.
17476
17477    Normally we can hide the table after an existing unconditional
17478    branch so that there is no interruption of the flow, but in the
17479    worst case the code looks like this:
17480
17481         ldr     rn, L1
17482         ...
17483         b       L2
17484         align
17485         L1:     .long value
17486         L2:
17487         ...
17488
17489         ldr     rn, L3
17490         ...
17491         b       L4
17492         align
17493         L3:     .long value
17494         L4:
17495         ...
17496
17497    We fix this by performing a scan after scheduling, which notices
17498    which instructions need to have their operands fetched from the
17499    constant table and builds the table.
17500
17501    The algorithm starts by building a table of all the constants that
17502    need fixing up and all the natural barriers in the function (places
17503    where a constant table can be dropped without breaking the flow).
17504    For each fixup we note how far the pc-relative replacement will be
17505    able to reach and the offset of the instruction into the function.
17506
17507    Having built the table we then group the fixes together to form
17508    tables that are as large as possible (subject to addressing
17509    constraints) and emit each table of constants after the last
17510    barrier that is within range of all the instructions in the group.
17511    If a group does not contain a barrier, then we forcibly create one
17512    by inserting a jump instruction into the flow.  Once the table has
17513    been inserted, the insns are then modified to reference the
17514    relevant entry in the pool.
17515
17516    Possible enhancements to the algorithm (not implemented) are:
17517
17518    1) For some processors and object formats, there may be benefit in
17519    aligning the pools to the start of cache lines; this alignment
17520    would need to be taken into account when calculating addressability
17521    of a pool.  */
17522
17523 /* These typedefs are located at the start of this file, so that
17524    they can be used in the prototypes there.  This comment is to
17525    remind readers of that fact so that the following structures
17526    can be understood more easily.
17527
17528      typedef struct minipool_node    Mnode;
17529      typedef struct minipool_fixup   Mfix;  */
17530
17531 struct minipool_node
17532 {
17533   /* Doubly linked chain of entries.  */
17534   Mnode * next;
17535   Mnode * prev;
17536   /* The maximum offset into the code that this entry can be placed.  While
17537      pushing fixes for forward references, all entries are sorted in order
17538      of increasing max_address.  */
17539   HOST_WIDE_INT max_address;
17540   /* Similarly for an entry inserted for a backwards ref.  */
17541   HOST_WIDE_INT min_address;
17542   /* The number of fixes referencing this entry.  This can become zero
17543      if we "unpush" an entry.  In this case we ignore the entry when we
17544      come to emit the code.  */
17545   int refcount;
17546   /* The offset from the start of the minipool.  */
17547   HOST_WIDE_INT offset;
17548   /* The value in table.  */
17549   rtx value;
17550   /* The mode of value.  */
17551   machine_mode mode;
17552   /* The size of the value.  With iWMMXt enabled
17553      sizes > 4 also imply an alignment of 8-bytes.  */
17554   int fix_size;
17555 };
17556
17557 struct minipool_fixup
17558 {
17559   Mfix *            next;
17560   rtx_insn *        insn;
17561   HOST_WIDE_INT     address;
17562   rtx *             loc;
17563   machine_mode mode;
17564   int               fix_size;
17565   rtx               value;
17566   Mnode *           minipool;
17567   HOST_WIDE_INT     forwards;
17568   HOST_WIDE_INT     backwards;
17569 };
17570
17571 /* Fixes less than a word need padding out to a word boundary.  */
17572 #define MINIPOOL_FIX_SIZE(mode) \
17573   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17574
17575 static Mnode *  minipool_vector_head;
17576 static Mnode *  minipool_vector_tail;
17577 static rtx_code_label   *minipool_vector_label;
17578 static int      minipool_pad;
17579
17580 /* The linked list of all minipool fixes required for this function.  */
17581 Mfix *          minipool_fix_head;
17582 Mfix *          minipool_fix_tail;
17583 /* The fix entry for the current minipool, once it has been placed.  */
17584 Mfix *          minipool_barrier;
17585
17586 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17587 #define JUMP_TABLES_IN_TEXT_SECTION 0
17588 #endif
17589
17590 static HOST_WIDE_INT
17591 get_jump_table_size (rtx_jump_table_data *insn)
17592 {
17593   /* ADDR_VECs only take room if read-only data does into the text
17594      section.  */
17595   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17596     {
17597       rtx body = PATTERN (insn);
17598       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17599       HOST_WIDE_INT size;
17600       HOST_WIDE_INT modesize;
17601
17602       modesize = GET_MODE_SIZE (GET_MODE (body));
17603       size = modesize * XVECLEN (body, elt);
17604       switch (modesize)
17605         {
17606         case 1:
17607           /* Round up size  of TBB table to a halfword boundary.  */
17608           size = (size + 1) & ~HOST_WIDE_INT_1;
17609           break;
17610         case 2:
17611           /* No padding necessary for TBH.  */
17612           break;
17613         case 4:
17614           /* Add two bytes for alignment on Thumb.  */
17615           if (TARGET_THUMB)
17616             size += 2;
17617           break;
17618         default:
17619           gcc_unreachable ();
17620         }
17621       return size;
17622     }
17623
17624   return 0;
17625 }
17626
17627 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17628    function descriptor) into a register and the GOT address into the
17629    FDPIC register, returning an rtx for the register holding the
17630    function address.  */
17631
17632 rtx
17633 arm_load_function_descriptor (rtx funcdesc)
17634 {
17635   rtx fnaddr_reg = gen_reg_rtx (Pmode);
17636   rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17637   rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17638   rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17639
17640   emit_move_insn (fnaddr_reg, fnaddr);
17641
17642   /* The ABI requires the entry point address to be loaded first, but
17643      since we cannot support lazy binding for lack of atomic load of
17644      two 32-bits values, we do not need to bother to prevent the
17645      previous load from being moved after that of the GOT address.  */
17646   emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17647
17648   return fnaddr_reg;
17649 }
17650
17651 /* Return the maximum amount of padding that will be inserted before
17652    label LABEL.  */
17653 static HOST_WIDE_INT
17654 get_label_padding (rtx label)
17655 {
17656   HOST_WIDE_INT align, min_insn_size;
17657
17658   align = 1 << label_to_alignment (label).levels[0].log;
17659   min_insn_size = TARGET_THUMB ? 2 : 4;
17660   return align > min_insn_size ? align - min_insn_size : 0;
17661 }
17662
17663 /* Move a minipool fix MP from its current location to before MAX_MP.
17664    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17665    constraints may need updating.  */
17666 static Mnode *
17667 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17668                                HOST_WIDE_INT max_address)
17669 {
17670   /* The code below assumes these are different.  */
17671   gcc_assert (mp != max_mp);
17672
17673   if (max_mp == NULL)
17674     {
17675       if (max_address < mp->max_address)
17676         mp->max_address = max_address;
17677     }
17678   else
17679     {
17680       if (max_address > max_mp->max_address - mp->fix_size)
17681         mp->max_address = max_mp->max_address - mp->fix_size;
17682       else
17683         mp->max_address = max_address;
17684
17685       /* Unlink MP from its current position.  Since max_mp is non-null,
17686        mp->prev must be non-null.  */
17687       mp->prev->next = mp->next;
17688       if (mp->next != NULL)
17689         mp->next->prev = mp->prev;
17690       else
17691         minipool_vector_tail = mp->prev;
17692
17693       /* Re-insert it before MAX_MP.  */
17694       mp->next = max_mp;
17695       mp->prev = max_mp->prev;
17696       max_mp->prev = mp;
17697
17698       if (mp->prev != NULL)
17699         mp->prev->next = mp;
17700       else
17701         minipool_vector_head = mp;
17702     }
17703
17704   /* Save the new entry.  */
17705   max_mp = mp;
17706
17707   /* Scan over the preceding entries and adjust their addresses as
17708      required.  */
17709   while (mp->prev != NULL
17710          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17711     {
17712       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17713       mp = mp->prev;
17714     }
17715
17716   return max_mp;
17717 }
17718
17719 /* Add a constant to the minipool for a forward reference.  Returns the
17720    node added or NULL if the constant will not fit in this pool.  */
17721 static Mnode *
17722 add_minipool_forward_ref (Mfix *fix)
17723 {
17724   /* If set, max_mp is the first pool_entry that has a lower
17725      constraint than the one we are trying to add.  */
17726   Mnode *       max_mp = NULL;
17727   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17728   Mnode *       mp;
17729
17730   /* If the minipool starts before the end of FIX->INSN then this FIX
17731      cannot be placed into the current pool.  Furthermore, adding the
17732      new constant pool entry may cause the pool to start FIX_SIZE bytes
17733      earlier.  */
17734   if (minipool_vector_head &&
17735       (fix->address + get_attr_length (fix->insn)
17736        >= minipool_vector_head->max_address - fix->fix_size))
17737     return NULL;
17738
17739   /* Scan the pool to see if a constant with the same value has
17740      already been added.  While we are doing this, also note the
17741      location where we must insert the constant if it doesn't already
17742      exist.  */
17743   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17744     {
17745       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17746           && fix->mode == mp->mode
17747           && (!LABEL_P (fix->value)
17748               || (CODE_LABEL_NUMBER (fix->value)
17749                   == CODE_LABEL_NUMBER (mp->value)))
17750           && rtx_equal_p (fix->value, mp->value))
17751         {
17752           /* More than one fix references this entry.  */
17753           mp->refcount++;
17754           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17755         }
17756
17757       /* Note the insertion point if necessary.  */
17758       if (max_mp == NULL
17759           && mp->max_address > max_address)
17760         max_mp = mp;
17761
17762       /* If we are inserting an 8-bytes aligned quantity and
17763          we have not already found an insertion point, then
17764          make sure that all such 8-byte aligned quantities are
17765          placed at the start of the pool.  */
17766       if (ARM_DOUBLEWORD_ALIGN
17767           && max_mp == NULL
17768           && fix->fix_size >= 8
17769           && mp->fix_size < 8)
17770         {
17771           max_mp = mp;
17772           max_address = mp->max_address;
17773         }
17774     }
17775
17776   /* The value is not currently in the minipool, so we need to create
17777      a new entry for it.  If MAX_MP is NULL, the entry will be put on
17778      the end of the list since the placement is less constrained than
17779      any existing entry.  Otherwise, we insert the new fix before
17780      MAX_MP and, if necessary, adjust the constraints on the other
17781      entries.  */
17782   mp = XNEW (Mnode);
17783   mp->fix_size = fix->fix_size;
17784   mp->mode = fix->mode;
17785   mp->value = fix->value;
17786   mp->refcount = 1;
17787   /* Not yet required for a backwards ref.  */
17788   mp->min_address = -65536;
17789
17790   if (max_mp == NULL)
17791     {
17792       mp->max_address = max_address;
17793       mp->next = NULL;
17794       mp->prev = minipool_vector_tail;
17795
17796       if (mp->prev == NULL)
17797         {
17798           minipool_vector_head = mp;
17799           minipool_vector_label = gen_label_rtx ();
17800         }
17801       else
17802         mp->prev->next = mp;
17803
17804       minipool_vector_tail = mp;
17805     }
17806   else
17807     {
17808       if (max_address > max_mp->max_address - mp->fix_size)
17809         mp->max_address = max_mp->max_address - mp->fix_size;
17810       else
17811         mp->max_address = max_address;
17812
17813       mp->next = max_mp;
17814       mp->prev = max_mp->prev;
17815       max_mp->prev = mp;
17816       if (mp->prev != NULL)
17817         mp->prev->next = mp;
17818       else
17819         minipool_vector_head = mp;
17820     }
17821
17822   /* Save the new entry.  */
17823   max_mp = mp;
17824
17825   /* Scan over the preceding entries and adjust their addresses as
17826      required.  */
17827   while (mp->prev != NULL
17828          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17829     {
17830       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17831       mp = mp->prev;
17832     }
17833
17834   return max_mp;
17835 }
17836
17837 static Mnode *
17838 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17839                                 HOST_WIDE_INT  min_address)
17840 {
17841   HOST_WIDE_INT offset;
17842
17843   /* The code below assumes these are different.  */
17844   gcc_assert (mp != min_mp);
17845
17846   if (min_mp == NULL)
17847     {
17848       if (min_address > mp->min_address)
17849         mp->min_address = min_address;
17850     }
17851   else
17852     {
17853       /* We will adjust this below if it is too loose.  */
17854       mp->min_address = min_address;
17855
17856       /* Unlink MP from its current position.  Since min_mp is non-null,
17857          mp->next must be non-null.  */
17858       mp->next->prev = mp->prev;
17859       if (mp->prev != NULL)
17860         mp->prev->next = mp->next;
17861       else
17862         minipool_vector_head = mp->next;
17863
17864       /* Reinsert it after MIN_MP.  */
17865       mp->prev = min_mp;
17866       mp->next = min_mp->next;
17867       min_mp->next = mp;
17868       if (mp->next != NULL)
17869         mp->next->prev = mp;
17870       else
17871         minipool_vector_tail = mp;
17872     }
17873
17874   min_mp = mp;
17875
17876   offset = 0;
17877   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17878     {
17879       mp->offset = offset;
17880       if (mp->refcount > 0)
17881         offset += mp->fix_size;
17882
17883       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17884         mp->next->min_address = mp->min_address + mp->fix_size;
17885     }
17886
17887   return min_mp;
17888 }
17889
17890 /* Add a constant to the minipool for a backward reference.  Returns the
17891    node added or NULL if the constant will not fit in this pool.
17892
17893    Note that the code for insertion for a backwards reference can be
17894    somewhat confusing because the calculated offsets for each fix do
17895    not take into account the size of the pool (which is still under
17896    construction.  */
17897 static Mnode *
17898 add_minipool_backward_ref (Mfix *fix)
17899 {
17900   /* If set, min_mp is the last pool_entry that has a lower constraint
17901      than the one we are trying to add.  */
17902   Mnode *min_mp = NULL;
17903   /* This can be negative, since it is only a constraint.  */
17904   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
17905   Mnode *mp;
17906
17907   /* If we can't reach the current pool from this insn, or if we can't
17908      insert this entry at the end of the pool without pushing other
17909      fixes out of range, then we don't try.  This ensures that we
17910      can't fail later on.  */
17911   if (min_address >= minipool_barrier->address
17912       || (minipool_vector_tail->min_address + fix->fix_size
17913           >= minipool_barrier->address))
17914     return NULL;
17915
17916   /* Scan the pool to see if a constant with the same value has
17917      already been added.  While we are doing this, also note the
17918      location where we must insert the constant if it doesn't already
17919      exist.  */
17920   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17921     {
17922       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17923           && fix->mode == mp->mode
17924           && (!LABEL_P (fix->value)
17925               || (CODE_LABEL_NUMBER (fix->value)
17926                   == CODE_LABEL_NUMBER (mp->value)))
17927           && rtx_equal_p (fix->value, mp->value)
17928           /* Check that there is enough slack to move this entry to the
17929              end of the table (this is conservative).  */
17930           && (mp->max_address
17931               > (minipool_barrier->address
17932                  + minipool_vector_tail->offset
17933                  + minipool_vector_tail->fix_size)))
17934         {
17935           mp->refcount++;
17936           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17937         }
17938
17939       if (min_mp != NULL)
17940         mp->min_address += fix->fix_size;
17941       else
17942         {
17943           /* Note the insertion point if necessary.  */
17944           if (mp->min_address < min_address)
17945             {
17946               /* For now, we do not allow the insertion of 8-byte alignment
17947                  requiring nodes anywhere but at the start of the pool.  */
17948               if (ARM_DOUBLEWORD_ALIGN
17949                   && fix->fix_size >= 8 && mp->fix_size < 8)
17950                 return NULL;
17951               else
17952                 min_mp = mp;
17953             }
17954           else if (mp->max_address
17955                    < minipool_barrier->address + mp->offset + fix->fix_size)
17956             {
17957               /* Inserting before this entry would push the fix beyond
17958                  its maximum address (which can happen if we have
17959                  re-located a forwards fix); force the new fix to come
17960                  after it.  */
17961               if (ARM_DOUBLEWORD_ALIGN
17962                   && fix->fix_size >= 8 && mp->fix_size < 8)
17963                 return NULL;
17964               else
17965                 {
17966                   min_mp = mp;
17967                   min_address = mp->min_address + fix->fix_size;
17968                 }
17969             }
17970           /* Do not insert a non-8-byte aligned quantity before 8-byte
17971              aligned quantities.  */
17972           else if (ARM_DOUBLEWORD_ALIGN
17973                    && fix->fix_size < 8
17974                    && mp->fix_size >= 8)
17975             {
17976               min_mp = mp;
17977               min_address = mp->min_address + fix->fix_size;
17978             }
17979         }
17980     }
17981
17982   /* We need to create a new entry.  */
17983   mp = XNEW (Mnode);
17984   mp->fix_size = fix->fix_size;
17985   mp->mode = fix->mode;
17986   mp->value = fix->value;
17987   mp->refcount = 1;
17988   mp->max_address = minipool_barrier->address + 65536;
17989
17990   mp->min_address = min_address;
17991
17992   if (min_mp == NULL)
17993     {
17994       mp->prev = NULL;
17995       mp->next = minipool_vector_head;
17996
17997       if (mp->next == NULL)
17998         {
17999           minipool_vector_tail = mp;
18000           minipool_vector_label = gen_label_rtx ();
18001         }
18002       else
18003         mp->next->prev = mp;
18004
18005       minipool_vector_head = mp;
18006     }
18007   else
18008     {
18009       mp->next = min_mp->next;
18010       mp->prev = min_mp;
18011       min_mp->next = mp;
18012
18013       if (mp->next != NULL)
18014         mp->next->prev = mp;
18015       else
18016         minipool_vector_tail = mp;
18017     }
18018
18019   /* Save the new entry.  */
18020   min_mp = mp;
18021
18022   if (mp->prev)
18023     mp = mp->prev;
18024   else
18025     mp->offset = 0;
18026
18027   /* Scan over the following entries and adjust their offsets.  */
18028   while (mp->next != NULL)
18029     {
18030       if (mp->next->min_address < mp->min_address + mp->fix_size)
18031         mp->next->min_address = mp->min_address + mp->fix_size;
18032
18033       if (mp->refcount)
18034         mp->next->offset = mp->offset + mp->fix_size;
18035       else
18036         mp->next->offset = mp->offset;
18037
18038       mp = mp->next;
18039     }
18040
18041   return min_mp;
18042 }
18043
18044 static void
18045 assign_minipool_offsets (Mfix *barrier)
18046 {
18047   HOST_WIDE_INT offset = 0;
18048   Mnode *mp;
18049
18050   minipool_barrier = barrier;
18051
18052   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18053     {
18054       mp->offset = offset;
18055
18056       if (mp->refcount > 0)
18057         offset += mp->fix_size;
18058     }
18059 }
18060
18061 /* Output the literal table */
18062 static void
18063 dump_minipool (rtx_insn *scan)
18064 {
18065   Mnode * mp;
18066   Mnode * nmp;
18067   int align64 = 0;
18068
18069   if (ARM_DOUBLEWORD_ALIGN)
18070     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18071       if (mp->refcount > 0 && mp->fix_size >= 8)
18072         {
18073           align64 = 1;
18074           break;
18075         }
18076
18077   if (dump_file)
18078     fprintf (dump_file,
18079              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18080              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
18081
18082   scan = emit_label_after (gen_label_rtx (), scan);
18083   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
18084   scan = emit_label_after (minipool_vector_label, scan);
18085
18086   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
18087     {
18088       if (mp->refcount > 0)
18089         {
18090           if (dump_file)
18091             {
18092               fprintf (dump_file,
18093                        ";;  Offset %u, min %ld, max %ld ",
18094                        (unsigned) mp->offset, (unsigned long) mp->min_address,
18095                        (unsigned long) mp->max_address);
18096               arm_print_value (dump_file, mp->value);
18097               fputc ('\n', dump_file);
18098             }
18099
18100           rtx val = copy_rtx (mp->value);
18101
18102           switch (GET_MODE_SIZE (mp->mode))
18103             {
18104 #ifdef HAVE_consttable_1
18105             case 1:
18106               scan = emit_insn_after (gen_consttable_1 (val), scan);
18107               break;
18108
18109 #endif
18110 #ifdef HAVE_consttable_2
18111             case 2:
18112               scan = emit_insn_after (gen_consttable_2 (val), scan);
18113               break;
18114
18115 #endif
18116 #ifdef HAVE_consttable_4
18117             case 4:
18118               scan = emit_insn_after (gen_consttable_4 (val), scan);
18119               break;
18120
18121 #endif
18122 #ifdef HAVE_consttable_8
18123             case 8:
18124               scan = emit_insn_after (gen_consttable_8 (val), scan);
18125               break;
18126
18127 #endif
18128 #ifdef HAVE_consttable_16
18129             case 16:
18130               scan = emit_insn_after (gen_consttable_16 (val), scan);
18131               break;
18132
18133 #endif
18134             default:
18135               gcc_unreachable ();
18136             }
18137         }
18138
18139       nmp = mp->next;
18140       free (mp);
18141     }
18142
18143   minipool_vector_head = minipool_vector_tail = NULL;
18144   scan = emit_insn_after (gen_consttable_end (), scan);
18145   scan = emit_barrier_after (scan);
18146 }
18147
18148 /* Return the cost of forcibly inserting a barrier after INSN.  */
18149 static int
18150 arm_barrier_cost (rtx_insn *insn)
18151 {
18152   /* Basing the location of the pool on the loop depth is preferable,
18153      but at the moment, the basic block information seems to be
18154      corrupt by this stage of the compilation.  */
18155   int base_cost = 50;
18156   rtx_insn *next = next_nonnote_insn (insn);
18157
18158   if (next != NULL && LABEL_P (next))
18159     base_cost -= 20;
18160
18161   switch (GET_CODE (insn))
18162     {
18163     case CODE_LABEL:
18164       /* It will always be better to place the table before the label, rather
18165          than after it.  */
18166       return 50;
18167
18168     case INSN:
18169     case CALL_INSN:
18170       return base_cost;
18171
18172     case JUMP_INSN:
18173       return base_cost - 10;
18174
18175     default:
18176       return base_cost + 10;
18177     }
18178 }
18179
18180 /* Find the best place in the insn stream in the range
18181    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18182    Create the barrier by inserting a jump and add a new fix entry for
18183    it.  */
18184 static Mfix *
18185 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18186 {
18187   HOST_WIDE_INT count = 0;
18188   rtx_barrier *barrier;
18189   rtx_insn *from = fix->insn;
18190   /* The instruction after which we will insert the jump.  */
18191   rtx_insn *selected = NULL;
18192   int selected_cost;
18193   /* The address at which the jump instruction will be placed.  */
18194   HOST_WIDE_INT selected_address;
18195   Mfix * new_fix;
18196   HOST_WIDE_INT max_count = max_address - fix->address;
18197   rtx_code_label *label = gen_label_rtx ();
18198
18199   selected_cost = arm_barrier_cost (from);
18200   selected_address = fix->address;
18201
18202   while (from && count < max_count)
18203     {
18204       rtx_jump_table_data *tmp;
18205       int new_cost;
18206
18207       /* This code shouldn't have been called if there was a natural barrier
18208          within range.  */
18209       gcc_assert (!BARRIER_P (from));
18210
18211       /* Count the length of this insn.  This must stay in sync with the
18212          code that pushes minipool fixes.  */
18213       if (LABEL_P (from))
18214         count += get_label_padding (from);
18215       else
18216         count += get_attr_length (from);
18217
18218       /* If there is a jump table, add its length.  */
18219       if (tablejump_p (from, NULL, &tmp))
18220         {
18221           count += get_jump_table_size (tmp);
18222
18223           /* Jump tables aren't in a basic block, so base the cost on
18224              the dispatch insn.  If we select this location, we will
18225              still put the pool after the table.  */
18226           new_cost = arm_barrier_cost (from);
18227
18228           if (count < max_count
18229               && (!selected || new_cost <= selected_cost))
18230             {
18231               selected = tmp;
18232               selected_cost = new_cost;
18233               selected_address = fix->address + count;
18234             }
18235
18236           /* Continue after the dispatch table.  */
18237           from = NEXT_INSN (tmp);
18238           continue;
18239         }
18240
18241       new_cost = arm_barrier_cost (from);
18242
18243       if (count < max_count
18244           && (!selected || new_cost <= selected_cost))
18245         {
18246           selected = from;
18247           selected_cost = new_cost;
18248           selected_address = fix->address + count;
18249         }
18250
18251       from = NEXT_INSN (from);
18252     }
18253
18254   /* Make sure that we found a place to insert the jump.  */
18255   gcc_assert (selected);
18256
18257   /* Create a new JUMP_INSN that branches around a barrier.  */
18258   from = emit_jump_insn_after (gen_jump (label), selected);
18259   JUMP_LABEL (from) = label;
18260   barrier = emit_barrier_after (from);
18261   emit_label_after (label, barrier);
18262
18263   /* Create a minipool barrier entry for the new barrier.  */
18264   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18265   new_fix->insn = barrier;
18266   new_fix->address = selected_address;
18267   new_fix->next = fix->next;
18268   fix->next = new_fix;
18269
18270   return new_fix;
18271 }
18272
18273 /* Record that there is a natural barrier in the insn stream at
18274    ADDRESS.  */
18275 static void
18276 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18277 {
18278   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18279
18280   fix->insn = insn;
18281   fix->address = address;
18282
18283   fix->next = NULL;
18284   if (minipool_fix_head != NULL)
18285     minipool_fix_tail->next = fix;
18286   else
18287     minipool_fix_head = fix;
18288
18289   minipool_fix_tail = fix;
18290 }
18291
18292 /* Record INSN, which will need fixing up to load a value from the
18293    minipool.  ADDRESS is the offset of the insn since the start of the
18294    function; LOC is a pointer to the part of the insn which requires
18295    fixing; VALUE is the constant that must be loaded, which is of type
18296    MODE.  */
18297 static void
18298 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18299                    machine_mode mode, rtx value)
18300 {
18301   gcc_assert (!arm_disable_literal_pool);
18302   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18303
18304   fix->insn = insn;
18305   fix->address = address;
18306   fix->loc = loc;
18307   fix->mode = mode;
18308   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18309   fix->value = value;
18310   fix->forwards = get_attr_pool_range (insn);
18311   fix->backwards = get_attr_neg_pool_range (insn);
18312   fix->minipool = NULL;
18313
18314   /* If an insn doesn't have a range defined for it, then it isn't
18315      expecting to be reworked by this code.  Better to stop now than
18316      to generate duff assembly code.  */
18317   gcc_assert (fix->forwards || fix->backwards);
18318
18319   /* If an entry requires 8-byte alignment then assume all constant pools
18320      require 4 bytes of padding.  Trying to do this later on a per-pool
18321      basis is awkward because existing pool entries have to be modified.  */
18322   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18323     minipool_pad = 4;
18324
18325   if (dump_file)
18326     {
18327       fprintf (dump_file,
18328                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18329                GET_MODE_NAME (mode),
18330                INSN_UID (insn), (unsigned long) address,
18331                -1 * (long)fix->backwards, (long)fix->forwards);
18332       arm_print_value (dump_file, fix->value);
18333       fprintf (dump_file, "\n");
18334     }
18335
18336   /* Add it to the chain of fixes.  */
18337   fix->next = NULL;
18338
18339   if (minipool_fix_head != NULL)
18340     minipool_fix_tail->next = fix;
18341   else
18342     minipool_fix_head = fix;
18343
18344   minipool_fix_tail = fix;
18345 }
18346
18347 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18348    Returns the number of insns needed, or 99 if we always want to synthesize
18349    the value.  */
18350 int
18351 arm_max_const_double_inline_cost ()
18352 {
18353   return ((optimize_size || arm_ld_sched) ? 3 : 4);
18354 }
18355
18356 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18357    Returns the number of insns needed, or 99 if we don't know how to
18358    do it.  */
18359 int
18360 arm_const_double_inline_cost (rtx val)
18361 {
18362   rtx lowpart, highpart;
18363   machine_mode mode;
18364
18365   mode = GET_MODE (val);
18366
18367   if (mode == VOIDmode)
18368     mode = DImode;
18369
18370   gcc_assert (GET_MODE_SIZE (mode) == 8);
18371
18372   lowpart = gen_lowpart (SImode, val);
18373   highpart = gen_highpart_mode (SImode, mode, val);
18374
18375   gcc_assert (CONST_INT_P (lowpart));
18376   gcc_assert (CONST_INT_P (highpart));
18377
18378   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18379                             NULL_RTX, NULL_RTX, 0, 0)
18380           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18381                               NULL_RTX, NULL_RTX, 0, 0));
18382 }
18383
18384 /* Cost of loading a SImode constant.  */
18385 static inline int
18386 arm_const_inline_cost (enum rtx_code code, rtx val)
18387 {
18388   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18389                            NULL_RTX, NULL_RTX, 1, 0);
18390 }
18391
18392 /* Return true if it is worthwhile to split a 64-bit constant into two
18393    32-bit operations.  This is the case if optimizing for size, or
18394    if we have load delay slots, or if one 32-bit part can be done with
18395    a single data operation.  */
18396 bool
18397 arm_const_double_by_parts (rtx val)
18398 {
18399   machine_mode mode = GET_MODE (val);
18400   rtx part;
18401
18402   if (optimize_size || arm_ld_sched)
18403     return true;
18404
18405   if (mode == VOIDmode)
18406     mode = DImode;
18407
18408   part = gen_highpart_mode (SImode, mode, val);
18409
18410   gcc_assert (CONST_INT_P (part));
18411
18412   if (const_ok_for_arm (INTVAL (part))
18413       || const_ok_for_arm (~INTVAL (part)))
18414     return true;
18415
18416   part = gen_lowpart (SImode, val);
18417
18418   gcc_assert (CONST_INT_P (part));
18419
18420   if (const_ok_for_arm (INTVAL (part))
18421       || const_ok_for_arm (~INTVAL (part)))
18422     return true;
18423
18424   return false;
18425 }
18426
18427 /* Return true if it is possible to inline both the high and low parts
18428    of a 64-bit constant into 32-bit data processing instructions.  */
18429 bool
18430 arm_const_double_by_immediates (rtx val)
18431 {
18432   machine_mode mode = GET_MODE (val);
18433   rtx part;
18434
18435   if (mode == VOIDmode)
18436     mode = DImode;
18437
18438   part = gen_highpart_mode (SImode, mode, val);
18439
18440   gcc_assert (CONST_INT_P (part));
18441
18442   if (!const_ok_for_arm (INTVAL (part)))
18443     return false;
18444
18445   part = gen_lowpart (SImode, val);
18446
18447   gcc_assert (CONST_INT_P (part));
18448
18449   if (!const_ok_for_arm (INTVAL (part)))
18450     return false;
18451
18452   return true;
18453 }
18454
18455 /* Scan INSN and note any of its operands that need fixing.
18456    If DO_PUSHES is false we do not actually push any of the fixups
18457    needed.  */
18458 static void
18459 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18460 {
18461   int opno;
18462
18463   extract_constrain_insn (insn);
18464
18465   if (recog_data.n_alternatives == 0)
18466     return;
18467
18468   /* Fill in recog_op_alt with information about the constraints of
18469      this insn.  */
18470   preprocess_constraints (insn);
18471
18472   const operand_alternative *op_alt = which_op_alt ();
18473   for (opno = 0; opno < recog_data.n_operands; opno++)
18474     {
18475       /* Things we need to fix can only occur in inputs.  */
18476       if (recog_data.operand_type[opno] != OP_IN)
18477         continue;
18478
18479       /* If this alternative is a memory reference, then any mention
18480          of constants in this alternative is really to fool reload
18481          into allowing us to accept one there.  We need to fix them up
18482          now so that we output the right code.  */
18483       if (op_alt[opno].memory_ok)
18484         {
18485           rtx op = recog_data.operand[opno];
18486
18487           if (CONSTANT_P (op))
18488             {
18489               if (do_pushes)
18490                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18491                                    recog_data.operand_mode[opno], op);
18492             }
18493           else if (MEM_P (op)
18494                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18495                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18496             {
18497               if (do_pushes)
18498                 {
18499                   rtx cop = avoid_constant_pool_reference (op);
18500
18501                   /* Casting the address of something to a mode narrower
18502                      than a word can cause avoid_constant_pool_reference()
18503                      to return the pool reference itself.  That's no good to
18504                      us here.  Lets just hope that we can use the
18505                      constant pool value directly.  */
18506                   if (op == cop)
18507                     cop = get_pool_constant (XEXP (op, 0));
18508
18509                   push_minipool_fix (insn, address,
18510                                      recog_data.operand_loc[opno],
18511                                      recog_data.operand_mode[opno], cop);
18512                 }
18513
18514             }
18515         }
18516     }
18517
18518   return;
18519 }
18520
18521 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18522    and unions in the context of ARMv8-M Security Extensions.  It is used as a
18523    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18524    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18525    or four masks, depending on whether it is being computed for a
18526    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18527    respectively.  The tree for the type of the argument or a field within an
18528    argument is passed in ARG_TYPE, the current register this argument or field
18529    starts in is kept in the pointer REGNO and updated accordingly, the bit this
18530    argument or field starts at is passed in STARTING_BIT and the last used bit
18531    is kept in LAST_USED_BIT which is also updated accordingly.  */
18532
18533 static unsigned HOST_WIDE_INT
18534 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18535                                uint32_t * padding_bits_to_clear,
18536                                unsigned starting_bit, int * last_used_bit)
18537
18538 {
18539   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18540
18541   if (TREE_CODE (arg_type) == RECORD_TYPE)
18542     {
18543       unsigned current_bit = starting_bit;
18544       tree field;
18545       long int offset, size;
18546
18547
18548       field = TYPE_FIELDS (arg_type);
18549       while (field)
18550         {
18551           /* The offset within a structure is always an offset from
18552              the start of that structure.  Make sure we take that into the
18553              calculation of the register based offset that we use here.  */
18554           offset = starting_bit;
18555           offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18556           offset %= 32;
18557
18558           /* This is the actual size of the field, for bitfields this is the
18559              bitfield width and not the container size.  */
18560           size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18561
18562           if (*last_used_bit != offset)
18563             {
18564               if (offset < *last_used_bit)
18565                 {
18566                   /* This field's offset is before the 'last_used_bit', that
18567                      means this field goes on the next register.  So we need to
18568                      pad the rest of the current register and increase the
18569                      register number.  */
18570                   uint32_t mask;
18571                   mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18572                   mask++;
18573
18574                   padding_bits_to_clear[*regno] |= mask;
18575                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18576                   (*regno)++;
18577                 }
18578               else
18579                 {
18580                   /* Otherwise we pad the bits between the last field's end and
18581                      the start of the new field.  */
18582                   uint32_t mask;
18583
18584                   mask = ((uint32_t)-1) >> (32 - offset);
18585                   mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18586                   padding_bits_to_clear[*regno] |= mask;
18587                 }
18588               current_bit = offset;
18589             }
18590
18591           /* Calculate further padding bits for inner structs/unions too.  */
18592           if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18593             {
18594               *last_used_bit = current_bit;
18595               not_to_clear_reg_mask
18596                 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18597                                                   padding_bits_to_clear, offset,
18598                                                   last_used_bit);
18599             }
18600           else
18601             {
18602               /* Update 'current_bit' with this field's size.  If the
18603                  'current_bit' lies in a subsequent register, update 'regno' and
18604                  reset 'current_bit' to point to the current bit in that new
18605                  register.  */
18606               current_bit += size;
18607               while (current_bit >= 32)
18608                 {
18609                   current_bit-=32;
18610                   not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18611                   (*regno)++;
18612                 }
18613               *last_used_bit = current_bit;
18614             }
18615
18616           field = TREE_CHAIN (field);
18617         }
18618       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18619     }
18620   else if (TREE_CODE (arg_type) == UNION_TYPE)
18621     {
18622       tree field, field_t;
18623       int i, regno_t, field_size;
18624       int max_reg = -1;
18625       int max_bit = -1;
18626       uint32_t mask;
18627       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18628         = {-1, -1, -1, -1};
18629
18630       /* To compute the padding bits in a union we only consider bits as
18631          padding bits if they are always either a padding bit or fall outside a
18632          fields size for all fields in the union.  */
18633       field = TYPE_FIELDS (arg_type);
18634       while (field)
18635         {
18636           uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18637             = {0U, 0U, 0U, 0U};
18638           int last_used_bit_t = *last_used_bit;
18639           regno_t = *regno;
18640           field_t = TREE_TYPE (field);
18641
18642           /* If the field's type is either a record or a union make sure to
18643              compute their padding bits too.  */
18644           if (RECORD_OR_UNION_TYPE_P (field_t))
18645             not_to_clear_reg_mask
18646               |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18647                                                 &padding_bits_to_clear_t[0],
18648                                                 starting_bit, &last_used_bit_t);
18649           else
18650             {
18651               field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18652               regno_t = (field_size / 32) + *regno;
18653               last_used_bit_t = (starting_bit + field_size) % 32;
18654             }
18655
18656           for (i = *regno; i < regno_t; i++)
18657             {
18658               /* For all but the last register used by this field only keep the
18659                  padding bits that were padding bits in this field.  */
18660               padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18661             }
18662
18663             /* For the last register, keep all padding bits that were padding
18664                bits in this field and any padding bits that are still valid
18665                as padding bits but fall outside of this field's size.  */
18666             mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18667             padding_bits_to_clear_res[regno_t]
18668               &= padding_bits_to_clear_t[regno_t] | mask;
18669
18670           /* Update the maximum size of the fields in terms of registers used
18671              ('max_reg') and the 'last_used_bit' in said register.  */
18672           if (max_reg < regno_t)
18673             {
18674               max_reg = regno_t;
18675               max_bit = last_used_bit_t;
18676             }
18677           else if (max_reg == regno_t && max_bit < last_used_bit_t)
18678             max_bit = last_used_bit_t;
18679
18680           field = TREE_CHAIN (field);
18681         }
18682
18683       /* Update the current padding_bits_to_clear using the intersection of the
18684          padding bits of all the fields.  */
18685       for (i=*regno; i < max_reg; i++)
18686         padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18687
18688       /* Do not keep trailing padding bits, we do not know yet whether this
18689          is the end of the argument.  */
18690       mask = ((uint32_t) 1 << max_bit) - 1;
18691       padding_bits_to_clear[max_reg]
18692         |= padding_bits_to_clear_res[max_reg] & mask;
18693
18694       *regno = max_reg;
18695       *last_used_bit = max_bit;
18696     }
18697   else
18698     /* This function should only be used for structs and unions.  */
18699     gcc_unreachable ();
18700
18701   return not_to_clear_reg_mask;
18702 }
18703
18704 /* In the context of ARMv8-M Security Extensions, this function is used for both
18705    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18706    registers are used when returning or passing arguments, which is then
18707    returned as a mask.  It will also compute a mask to indicate padding/unused
18708    bits for each of these registers, and passes this through the
18709    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
18710    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18711    the starting register used to pass this argument or return value is passed
18712    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18713    for struct and union types.  */
18714
18715 static unsigned HOST_WIDE_INT
18716 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18717                              uint32_t * padding_bits_to_clear)
18718
18719 {
18720   int last_used_bit = 0;
18721   unsigned HOST_WIDE_INT not_to_clear_mask;
18722
18723   if (RECORD_OR_UNION_TYPE_P (arg_type))
18724     {
18725       not_to_clear_mask
18726         = comp_not_to_clear_mask_str_un (arg_type, &regno,
18727                                          padding_bits_to_clear, 0,
18728                                          &last_used_bit);
18729
18730
18731       /* If the 'last_used_bit' is not zero, that means we are still using a
18732          part of the last 'regno'.  In such cases we must clear the trailing
18733          bits.  Otherwise we are not using regno and we should mark it as to
18734          clear.  */
18735       if (last_used_bit != 0)
18736         padding_bits_to_clear[regno]
18737           |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18738       else
18739         not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18740     }
18741   else
18742     {
18743       not_to_clear_mask = 0;
18744       /* We are not dealing with structs nor unions.  So these arguments may be
18745          passed in floating point registers too.  In some cases a BLKmode is
18746          used when returning or passing arguments in multiple VFP registers.  */
18747       if (GET_MODE (arg_rtx) == BLKmode)
18748         {
18749           int i, arg_regs;
18750           rtx reg;
18751
18752           /* This should really only occur when dealing with the hard-float
18753              ABI.  */
18754           gcc_assert (TARGET_HARD_FLOAT_ABI);
18755
18756           for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18757             {
18758               reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18759               gcc_assert (REG_P (reg));
18760
18761               not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18762
18763               /* If we are dealing with DF mode, make sure we don't
18764                  clear either of the registers it addresses.  */
18765               arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18766               if (arg_regs > 1)
18767                 {
18768                   unsigned HOST_WIDE_INT mask;
18769                   mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18770                   mask -= HOST_WIDE_INT_1U << REGNO (reg);
18771                   not_to_clear_mask |= mask;
18772                 }
18773             }
18774         }
18775       else
18776         {
18777           /* Otherwise we can rely on the MODE to determine how many registers
18778              are being used by this argument.  */
18779           int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18780           not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18781           if (arg_regs > 1)
18782             {
18783               unsigned HOST_WIDE_INT
18784               mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18785               mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18786               not_to_clear_mask |= mask;
18787             }
18788         }
18789     }
18790
18791   return not_to_clear_mask;
18792 }
18793
18794 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18795    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
18796    are to be fully cleared, using the value in register CLEARING_REG if more
18797    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18798    the bits that needs to be cleared in caller-saved core registers, with
18799    SCRATCH_REG used as a scratch register for that clearing.
18800
18801    NOTE: one of three following assertions must hold:
18802    - SCRATCH_REG is a low register
18803    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18804      in TO_CLEAR_BITMAP)
18805    - CLEARING_REG is a low register.  */
18806
18807 static void
18808 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18809                       int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18810 {
18811   bool saved_clearing = false;
18812   rtx saved_clearing_reg = NULL_RTX;
18813   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18814
18815   gcc_assert (arm_arch_cmse);
18816
18817   if (!bitmap_empty_p (to_clear_bitmap))
18818     {
18819       minregno = bitmap_first_set_bit (to_clear_bitmap);
18820       maxregno = bitmap_last_set_bit (to_clear_bitmap);
18821     }
18822   clearing_regno = REGNO (clearing_reg);
18823
18824   /* Clear padding bits.  */
18825   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18826   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18827     {
18828       uint64_t mask;
18829       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18830
18831       if (padding_bits_to_clear[i] == 0)
18832         continue;
18833
18834       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18835          CLEARING_REG as scratch.  */
18836       if (TARGET_THUMB1
18837           && REGNO (scratch_reg) > LAST_LO_REGNUM)
18838         {
18839           /* clearing_reg is not to be cleared, copy its value into scratch_reg
18840              such that we can use clearing_reg to clear the unused bits in the
18841              arguments.  */
18842           if ((clearing_regno > maxregno
18843                || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18844               && !saved_clearing)
18845             {
18846               gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18847               emit_move_insn (scratch_reg, clearing_reg);
18848               saved_clearing = true;
18849               saved_clearing_reg = scratch_reg;
18850             }
18851           scratch_reg = clearing_reg;
18852         }
18853
18854       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
18855       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18856       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18857
18858       /* Fill the top half of the negated padding_bits_to_clear[i].  */
18859       mask = (~padding_bits_to_clear[i]) >> 16;
18860       rtx16 = gen_int_mode (16, SImode);
18861       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18862       if (mask)
18863         emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18864
18865       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18866     }
18867   if (saved_clearing)
18868     emit_move_insn (clearing_reg, saved_clearing_reg);
18869
18870
18871   /* Clear full registers.  */
18872
18873   if (TARGET_HAVE_FPCXT_CMSE)
18874     {
18875       rtvec vunspec_vec;
18876       int i, j, k, nb_regs;
18877       rtx use_seq, par, reg, set, vunspec;
18878       int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18879       auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18880       auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18881
18882       for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18883         {
18884           /* Find next register to clear and exit if none.  */
18885           for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18886           if (i > maxregno)
18887             break;
18888
18889           /* Compute number of consecutive registers to clear.  */
18890           for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18891                j++);
18892           nb_regs = j - i;
18893
18894           /* Create VSCCLRM RTX pattern.  */
18895           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18896           vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18897           vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18898                                              VUNSPEC_VSCCLRM_VPR);
18899           XVECEXP (par, 0, 0) = vunspec;
18900
18901           /* Insert VFP register clearing RTX in the pattern.  */
18902           start_sequence ();
18903           for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18904             {
18905               if (!bitmap_bit_p (to_clear_bitmap, j))
18906                 continue;
18907
18908               reg = gen_rtx_REG (SFmode, j);
18909               set = gen_rtx_SET (reg, const0_rtx);
18910               XVECEXP (par, 0, k++) = set;
18911               emit_use (reg);
18912             }
18913           use_seq = get_insns ();
18914           end_sequence ();
18915
18916           emit_insn_after (use_seq, emit_insn (par));
18917         }
18918
18919       /* Get set of core registers to clear.  */
18920       bitmap_clear (core_regs_bitmap);
18921       bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18922                         IP_REGNUM - R0_REGNUM + 1);
18923       bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18924                   core_regs_bitmap);
18925       gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18926
18927       if (bitmap_empty_p (to_clear_core_bitmap))
18928         return;
18929
18930       /* Create clrm RTX pattern.  */
18931       nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18932       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18933
18934       /* Insert core register clearing RTX in the pattern.  */
18935       start_sequence ();
18936       for (j = 0, i = minregno; j < nb_regs; i++)
18937         {
18938           if (!bitmap_bit_p (to_clear_core_bitmap, i))
18939             continue;
18940
18941           reg = gen_rtx_REG (SImode, i);
18942           set = gen_rtx_SET (reg, const0_rtx);
18943           XVECEXP (par, 0, j++) = set;
18944           emit_use (reg);
18945         }
18946
18947       /* Insert APSR register clearing RTX in the pattern
18948        * along with clobbering CC.  */
18949       vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18950       vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18951                                          VUNSPEC_CLRM_APSR);
18952
18953       XVECEXP (par, 0, j++) = vunspec;
18954
18955       rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18956       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18957       XVECEXP (par, 0, j) = clobber;
18958
18959       use_seq = get_insns ();
18960       end_sequence ();
18961
18962       emit_insn_after (use_seq, emit_insn (par));
18963     }
18964   else
18965     {
18966       /* If not marked for clearing, clearing_reg already does not contain
18967          any secret.  */
18968       if (clearing_regno <= maxregno
18969           && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18970         {
18971           emit_move_insn (clearing_reg, const0_rtx);
18972           emit_use (clearing_reg);
18973           bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18974         }
18975
18976       for (regno = minregno; regno <= maxregno; regno++)
18977         {
18978           if (!bitmap_bit_p (to_clear_bitmap, regno))
18979             continue;
18980
18981           if (IS_VFP_REGNUM (regno))
18982             {
18983               /* If regno is an even vfp register and its successor is also to
18984                  be cleared, use vmov.  */
18985               if (TARGET_VFP_DOUBLE
18986                   && VFP_REGNO_OK_FOR_DOUBLE (regno)
18987                   && bitmap_bit_p (to_clear_bitmap, regno + 1))
18988                 {
18989                   emit_move_insn (gen_rtx_REG (DFmode, regno),
18990                                   CONST1_RTX (DFmode));
18991                   emit_use (gen_rtx_REG (DFmode, regno));
18992                   regno++;
18993                 }
18994               else
18995                 {
18996                   emit_move_insn (gen_rtx_REG (SFmode, regno),
18997                                   CONST1_RTX (SFmode));
18998                   emit_use (gen_rtx_REG (SFmode, regno));
18999                 }
19000             }
19001           else
19002             {
19003               emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
19004               emit_use (gen_rtx_REG (SImode, regno));
19005             }
19006         }
19007     }
19008 }
19009
19010 /* Clear core and caller-saved VFP registers not used to pass arguments before
19011    a cmse_nonsecure_call.  Saving, clearing and restoring of VFP callee-saved
19012    registers is done in the __gnu_cmse_nonsecure_call libcall.  See
19013    libgcc/config/arm/cmse_nonsecure_call.S.  */
19014
19015 static void
19016 cmse_nonsecure_call_inline_register_clear (void)
19017 {
19018   basic_block bb;
19019
19020   FOR_EACH_BB_FN (bb, cfun)
19021     {
19022       rtx_insn *insn;
19023
19024       FOR_BB_INSNS (bb, insn)
19025         {
19026           bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
19027           /* frame = VFP regs + FPSCR + VPR.  */
19028           unsigned lazy_store_stack_frame_size
19029             = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
19030           unsigned long callee_saved_mask
19031             = ((1 << (LAST_HI_REGNUM + 1)) - 1)
19032             & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
19033           unsigned address_regnum, regno;
19034           unsigned max_int_regno
19035             = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
19036           unsigned max_fp_regno
19037             = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
19038           unsigned maxregno
19039             = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
19040           auto_sbitmap to_clear_bitmap (maxregno + 1);
19041           rtx_insn *seq;
19042           rtx pat, call, unspec, clearing_reg, ip_reg, shift;
19043           rtx address;
19044           CUMULATIVE_ARGS args_so_far_v;
19045           cumulative_args_t args_so_far;
19046           tree arg_type, fntype;
19047           bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
19048           function_args_iterator args_iter;
19049           uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
19050
19051           if (!NONDEBUG_INSN_P (insn))
19052             continue;
19053
19054           if (!CALL_P (insn))
19055             continue;
19056
19057           pat = PATTERN (insn);
19058           gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
19059           call = XVECEXP (pat, 0, 0);
19060
19061           /* Get the real call RTX if the insn sets a value, ie. returns.  */
19062           if (GET_CODE (call) == SET)
19063               call = SET_SRC (call);
19064
19065           /* Check if it is a cmse_nonsecure_call.  */
19066           unspec = XEXP (call, 0);
19067           if (GET_CODE (unspec) != UNSPEC
19068               || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
19069             continue;
19070
19071           /* Mark registers that needs to be cleared.  Those that holds a
19072              parameter are removed from the set further below.  */
19073           bitmap_clear (to_clear_bitmap);
19074           bitmap_set_range (to_clear_bitmap, R0_REGNUM,
19075                             max_int_regno - R0_REGNUM + 1);
19076
19077           /* Only look at the caller-saved floating point registers in case of
19078              -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
19079              lazy store and loads which clear both caller- and callee-saved
19080              registers.  */
19081           if (!lazy_fpclear)
19082             {
19083               auto_sbitmap float_bitmap (maxregno + 1);
19084
19085               bitmap_clear (float_bitmap);
19086               bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
19087                                 max_fp_regno - FIRST_VFP_REGNUM + 1);
19088               bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
19089             }
19090
19091           /* Make sure the register used to hold the function address is not
19092              cleared.  */
19093           address = RTVEC_ELT (XVEC (unspec, 0), 0);
19094           gcc_assert (MEM_P (address));
19095           gcc_assert (REG_P (XEXP (address, 0)));
19096           address_regnum = REGNO (XEXP (address, 0));
19097           if (address_regnum <= max_int_regno)
19098             bitmap_clear_bit (to_clear_bitmap, address_regnum);
19099
19100           /* Set basic block of call insn so that df rescan is performed on
19101              insns inserted here.  */
19102           set_block_for_insn (insn, bb);
19103           df_set_flags (DF_DEFER_INSN_RESCAN);
19104           start_sequence ();
19105
19106           /* Make sure the scheduler doesn't schedule other insns beyond
19107              here.  */
19108           emit_insn (gen_blockage ());
19109
19110           /* Walk through all arguments and clear registers appropriately.
19111           */
19112           fntype = TREE_TYPE (MEM_EXPR (address));
19113           arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
19114                                     NULL_TREE);
19115           args_so_far = pack_cumulative_args (&args_so_far_v);
19116           FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
19117             {
19118               rtx arg_rtx;
19119               uint64_t to_clear_args_mask;
19120
19121               if (VOID_TYPE_P (arg_type))
19122                 continue;
19123
19124               function_arg_info arg (arg_type, /*named=*/true);
19125               if (!first_param)
19126                 /* ??? We should advance after processing the argument and pass
19127                    the argument we're advancing past.  */
19128                 arm_function_arg_advance (args_so_far, arg);
19129
19130               arg_rtx = arm_function_arg (args_so_far, arg);
19131               gcc_assert (REG_P (arg_rtx));
19132               to_clear_args_mask
19133                 = compute_not_to_clear_mask (arg_type, arg_rtx,
19134                                              REGNO (arg_rtx),
19135                                              &padding_bits_to_clear[0]);
19136               if (to_clear_args_mask)
19137                 {
19138                   for (regno = R0_REGNUM; regno <= maxregno; regno++)
19139                     {
19140                       if (to_clear_args_mask & (1ULL << regno))
19141                         bitmap_clear_bit (to_clear_bitmap, regno);
19142                     }
19143                 }
19144
19145               first_param = false;
19146             }
19147
19148           /* We use right shift and left shift to clear the LSB of the address
19149              we jump to instead of using bic, to avoid having to use an extra
19150              register on Thumb-1.  */
19151           clearing_reg = XEXP (address, 0);
19152           shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19153           emit_insn (gen_rtx_SET (clearing_reg, shift));
19154           shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19155           emit_insn (gen_rtx_SET (clearing_reg, shift));
19156
19157           if (clear_callee_saved)
19158             {
19159               rtx push_insn =
19160                 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19161               /* Disable frame debug info in push because it needs to be
19162                  disabled for pop (see below).  */
19163               RTX_FRAME_RELATED_P (push_insn) = 0;
19164
19165               /* Lazy store multiple.  */
19166               if (lazy_fpclear)
19167                 {
19168                   rtx imm;
19169                   rtx_insn *add_insn;
19170
19171                   imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19172                   add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19173                                                     stack_pointer_rtx, imm));
19174                   /* If we have the frame pointer, then it will be the
19175                      CFA reg.  Otherwise, the stack pointer is the CFA
19176                      reg, so we need to emit a CFA adjust.  */
19177                   if (!frame_pointer_needed)
19178                     arm_add_cfa_adjust_cfa_note (add_insn,
19179                                                  - lazy_store_stack_frame_size,
19180                                                  stack_pointer_rtx,
19181                                                  stack_pointer_rtx);
19182                   emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19183                 }
19184               /* Save VFP callee-saved registers.  */
19185               else
19186                 {
19187                   vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19188                                   (max_fp_regno - D7_VFP_REGNUM) / 2);
19189                   /* Disable frame debug info in push because it needs to be
19190                      disabled for vpop (see below).  */
19191                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19192                 }
19193             }
19194
19195           /* Clear caller-saved registers that leak before doing a non-secure
19196              call.  */
19197           ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19198           cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19199                                 NUM_ARG_REGS, ip_reg, clearing_reg);
19200
19201           seq = get_insns ();
19202           end_sequence ();
19203           emit_insn_before (seq, insn);
19204
19205           /* The AAPCS requires the callee to widen integral types narrower
19206              than 32 bits to the full width of the register; but when handling
19207              calls to non-secure space, we cannot trust the callee to have
19208              correctly done so.  So forcibly re-widen the result here.  */
19209           tree ret_type = TREE_TYPE (fntype);
19210           if ((TREE_CODE (ret_type) == INTEGER_TYPE
19211               || TREE_CODE (ret_type) == ENUMERAL_TYPE
19212               || TREE_CODE (ret_type) == BOOLEAN_TYPE)
19213               && known_lt (GET_MODE_SIZE (TYPE_MODE (ret_type)), 4))
19214             {
19215               rtx ret_reg = gen_rtx_REG (TYPE_MODE (ret_type), R0_REGNUM);
19216               rtx si_reg = gen_rtx_REG (SImode, R0_REGNUM);
19217               rtx extend;
19218               if (TYPE_UNSIGNED (ret_type))
19219                 extend = gen_rtx_SET (si_reg, gen_rtx_ZERO_EXTEND (SImode,
19220                                                                    ret_reg));
19221               else
19222                 {
19223                   /* Signed-extension is a special case because of
19224                      thumb1_extendhisi2.  */
19225                   if (TARGET_THUMB1
19226                       && known_eq (GET_MODE_SIZE (TYPE_MODE (ret_type)), 2))
19227                     extend = gen_thumb1_extendhisi2 (si_reg, ret_reg);
19228                   else
19229                     extend = gen_rtx_SET (si_reg,
19230                                           gen_rtx_SIGN_EXTEND (SImode,
19231                                                                ret_reg));
19232                 }
19233               emit_insn_after (extend, insn);
19234             }
19235
19236
19237           if (TARGET_HAVE_FPCXT_CMSE)
19238             {
19239               rtx_insn *last, *pop_insn, *after = insn;
19240
19241               start_sequence ();
19242
19243               /* Lazy load multiple done as part of libcall in Armv8-M.  */
19244               if (lazy_fpclear)
19245                 {
19246                   rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19247                   emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19248                   rtx_insn *add_insn =
19249                     emit_insn (gen_addsi3 (stack_pointer_rtx,
19250                                            stack_pointer_rtx, imm));
19251                   if (!frame_pointer_needed)
19252                     arm_add_cfa_adjust_cfa_note (add_insn,
19253                                                  lazy_store_stack_frame_size,
19254                                                  stack_pointer_rtx,
19255                                                  stack_pointer_rtx);
19256                 }
19257               /* Restore VFP callee-saved registers.  */
19258               else
19259                 {
19260                   int nb_callee_saved_vfp_regs =
19261                     (max_fp_regno - D7_VFP_REGNUM) / 2;
19262                   arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19263                                               nb_callee_saved_vfp_regs,
19264                                               stack_pointer_rtx);
19265                   /* Disable frame debug info in vpop because the SP adjustment
19266                      is made using a CFA adjustment note while CFA used is
19267                      sometimes R7.  This then causes an assert failure in the
19268                      CFI note creation code.  */
19269                   RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19270                 }
19271
19272               arm_emit_multi_reg_pop (callee_saved_mask);
19273               pop_insn = get_last_insn ();
19274
19275               /* Disable frame debug info in pop because they reset the state
19276                  of popped registers to what it was at the beginning of the
19277                  function, before the prologue.  This leads to incorrect state
19278                  when doing the pop after the nonsecure call for registers that
19279                  are pushed both in prologue and before the nonsecure call.
19280
19281                  It also occasionally triggers an assert failure in CFI note
19282                  creation code when there are two codepaths to the epilogue,
19283                  one of which does not go through the nonsecure call.
19284                  Obviously this mean that debugging between the push and pop is
19285                  not reliable.  */
19286               RTX_FRAME_RELATED_P (pop_insn) = 0;
19287
19288               seq = get_insns ();
19289               last = get_last_insn ();
19290               end_sequence ();
19291
19292               emit_insn_after (seq, after);
19293
19294               /* Skip pop we have just inserted after nonsecure call, we know
19295                  it does not contain a nonsecure call.  */
19296               insn = last;
19297             }
19298         }
19299     }
19300 }
19301
19302 /* Rewrite move insn into subtract of 0 if the condition codes will
19303    be useful in next conditional jump insn.  */
19304
19305 static void
19306 thumb1_reorg (void)
19307 {
19308   basic_block bb;
19309
19310   FOR_EACH_BB_FN (bb, cfun)
19311     {
19312       rtx dest, src;
19313       rtx cmp, op0, op1, set = NULL;
19314       rtx_insn *prev, *insn = BB_END (bb);
19315       bool insn_clobbered = false;
19316
19317       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19318         insn = PREV_INSN (insn);
19319
19320       /* Find the last cbranchsi4_insn in basic block BB.  */
19321       if (insn == BB_HEAD (bb)
19322           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19323         continue;
19324
19325       /* Get the register with which we are comparing.  */
19326       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19327       op0 = XEXP (cmp, 0);
19328       op1 = XEXP (cmp, 1);
19329
19330       /* Check that comparison is against ZERO.  */
19331       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19332         continue;
19333
19334       /* Find the first flag setting insn before INSN in basic block BB.  */
19335       gcc_assert (insn != BB_HEAD (bb));
19336       for (prev = PREV_INSN (insn);
19337            (!insn_clobbered
19338             && prev != BB_HEAD (bb)
19339             && (NOTE_P (prev)
19340                 || DEBUG_INSN_P (prev)
19341                 || ((set = single_set (prev)) != NULL
19342                     && get_attr_conds (prev) == CONDS_NOCOND)));
19343            prev = PREV_INSN (prev))
19344         {
19345           if (reg_set_p (op0, prev))
19346             insn_clobbered = true;
19347         }
19348
19349       /* Skip if op0 is clobbered by insn other than prev. */
19350       if (insn_clobbered)
19351         continue;
19352
19353       if (!set)
19354         continue;
19355
19356       dest = SET_DEST (set);
19357       src = SET_SRC (set);
19358       if (!low_register_operand (dest, SImode)
19359           || !low_register_operand (src, SImode))
19360         continue;
19361
19362       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19363          in INSN.  Both src and dest of the move insn are checked.  */
19364       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19365         {
19366           dest = copy_rtx (dest);
19367           src = copy_rtx (src);
19368           src = gen_rtx_MINUS (SImode, src, const0_rtx);
19369           PATTERN (prev) = gen_rtx_SET (dest, src);
19370           INSN_CODE (prev) = -1;
19371           /* Set test register in INSN to dest.  */
19372           XEXP (cmp, 0) = copy_rtx (dest);
19373           INSN_CODE (insn) = -1;
19374         }
19375     }
19376 }
19377
19378 /* Convert instructions to their cc-clobbering variant if possible, since
19379    that allows us to use smaller encodings.  */
19380
19381 static void
19382 thumb2_reorg (void)
19383 {
19384   basic_block bb;
19385   regset_head live;
19386
19387   INIT_REG_SET (&live);
19388
19389   /* We are freeing block_for_insn in the toplev to keep compatibility
19390      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
19391   compute_bb_for_insn ();
19392   df_analyze ();
19393
19394   enum Convert_Action {SKIP, CONV, SWAP_CONV};
19395
19396   FOR_EACH_BB_FN (bb, cfun)
19397     {
19398       if ((current_tune->disparage_flag_setting_t16_encodings
19399            == tune_params::DISPARAGE_FLAGS_ALL)
19400           && optimize_bb_for_speed_p (bb))
19401         continue;
19402
19403       rtx_insn *insn;
19404       Convert_Action action = SKIP;
19405       Convert_Action action_for_partial_flag_setting
19406         = ((current_tune->disparage_flag_setting_t16_encodings
19407             != tune_params::DISPARAGE_FLAGS_NEITHER)
19408            && optimize_bb_for_speed_p (bb))
19409           ? SKIP : CONV;
19410
19411       COPY_REG_SET (&live, DF_LR_OUT (bb));
19412       df_simulate_initialize_backwards (bb, &live);
19413       FOR_BB_INSNS_REVERSE (bb, insn)
19414         {
19415           if (NONJUMP_INSN_P (insn)
19416               && !REGNO_REG_SET_P (&live, CC_REGNUM)
19417               && GET_CODE (PATTERN (insn)) == SET)
19418             {
19419               action = SKIP;
19420               rtx pat = PATTERN (insn);
19421               rtx dst = XEXP (pat, 0);
19422               rtx src = XEXP (pat, 1);
19423               rtx op0 = NULL_RTX, op1 = NULL_RTX;
19424
19425               if (UNARY_P (src) || BINARY_P (src))
19426                   op0 = XEXP (src, 0);
19427
19428               if (BINARY_P (src))
19429                   op1 = XEXP (src, 1);
19430
19431               if (low_register_operand (dst, SImode))
19432                 {
19433                   switch (GET_CODE (src))
19434                     {
19435                     case PLUS:
19436                       /* Adding two registers and storing the result
19437                          in the first source is already a 16-bit
19438                          operation.  */
19439                       if (rtx_equal_p (dst, op0)
19440                           && register_operand (op1, SImode))
19441                         break;
19442
19443                       if (low_register_operand (op0, SImode))
19444                         {
19445                           /* ADDS <Rd>,<Rn>,<Rm>  */
19446                           if (low_register_operand (op1, SImode))
19447                             action = CONV;
19448                           /* ADDS <Rdn>,#<imm8>  */
19449                           /* SUBS <Rdn>,#<imm8>  */
19450                           else if (rtx_equal_p (dst, op0)
19451                                    && CONST_INT_P (op1)
19452                                    && IN_RANGE (INTVAL (op1), -255, 255))
19453                             action = CONV;
19454                           /* ADDS <Rd>,<Rn>,#<imm3>  */
19455                           /* SUBS <Rd>,<Rn>,#<imm3>  */
19456                           else if (CONST_INT_P (op1)
19457                                    && IN_RANGE (INTVAL (op1), -7, 7))
19458                             action = CONV;
19459                         }
19460                       /* ADCS <Rd>, <Rn>  */
19461                       else if (GET_CODE (XEXP (src, 0)) == PLUS
19462                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19463                               && low_register_operand (XEXP (XEXP (src, 0), 1),
19464                                                        SImode)
19465                               && COMPARISON_P (op1)
19466                               && cc_register (XEXP (op1, 0), VOIDmode)
19467                               && maybe_get_arm_condition_code (op1) == ARM_CS
19468                               && XEXP (op1, 1) == const0_rtx)
19469                         action = CONV;
19470                       break;
19471
19472                     case MINUS:
19473                       /* RSBS <Rd>,<Rn>,#0
19474                          Not handled here: see NEG below.  */
19475                       /* SUBS <Rd>,<Rn>,#<imm3>
19476                          SUBS <Rdn>,#<imm8>
19477                          Not handled here: see PLUS above.  */
19478                       /* SUBS <Rd>,<Rn>,<Rm>  */
19479                       if (low_register_operand (op0, SImode)
19480                           && low_register_operand (op1, SImode))
19481                             action = CONV;
19482                       break;
19483
19484                     case MULT:
19485                       /* MULS <Rdm>,<Rn>,<Rdm>
19486                          As an exception to the rule, this is only used
19487                          when optimizing for size since MULS is slow on all
19488                          known implementations.  We do not even want to use
19489                          MULS in cold code, if optimizing for speed, so we
19490                          test the global flag here.  */
19491                       if (!optimize_size)
19492                         break;
19493                       /* Fall through.  */
19494                     case AND:
19495                     case IOR:
19496                     case XOR:
19497                       /* ANDS <Rdn>,<Rm>  */
19498                       if (rtx_equal_p (dst, op0)
19499                           && low_register_operand (op1, SImode))
19500                         action = action_for_partial_flag_setting;
19501                       else if (rtx_equal_p (dst, op1)
19502                                && low_register_operand (op0, SImode))
19503                         action = action_for_partial_flag_setting == SKIP
19504                                  ? SKIP : SWAP_CONV;
19505                       break;
19506
19507                     case ASHIFTRT:
19508                     case ASHIFT:
19509                     case LSHIFTRT:
19510                       /* ASRS <Rdn>,<Rm> */
19511                       /* LSRS <Rdn>,<Rm> */
19512                       /* LSLS <Rdn>,<Rm> */
19513                       if (rtx_equal_p (dst, op0)
19514                           && low_register_operand (op1, SImode))
19515                         action = action_for_partial_flag_setting;
19516                       /* ASRS <Rd>,<Rm>,#<imm5> */
19517                       /* LSRS <Rd>,<Rm>,#<imm5> */
19518                       /* LSLS <Rd>,<Rm>,#<imm5> */
19519                       else if (low_register_operand (op0, SImode)
19520                                && CONST_INT_P (op1)
19521                                && IN_RANGE (INTVAL (op1), 0, 31))
19522                         action = action_for_partial_flag_setting;
19523                       break;
19524
19525                     case ROTATERT:
19526                       /* RORS <Rdn>,<Rm>  */
19527                       if (rtx_equal_p (dst, op0)
19528                           && low_register_operand (op1, SImode))
19529                         action = action_for_partial_flag_setting;
19530                       break;
19531
19532                     case NOT:
19533                       /* MVNS <Rd>,<Rm>  */
19534                       if (low_register_operand (op0, SImode))
19535                         action = action_for_partial_flag_setting;
19536                       break;
19537
19538                     case NEG:
19539                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
19540                       if (low_register_operand (op0, SImode))
19541                         action = CONV;
19542                       break;
19543
19544                     case CONST_INT:
19545                       /* MOVS <Rd>,#<imm8>  */
19546                       if (CONST_INT_P (src)
19547                           && IN_RANGE (INTVAL (src), 0, 255))
19548                         action = action_for_partial_flag_setting;
19549                       break;
19550
19551                     case REG:
19552                       /* MOVS and MOV<c> with registers have different
19553                          encodings, so are not relevant here.  */
19554                       break;
19555
19556                     default:
19557                       break;
19558                     }
19559                 }
19560
19561               if (action != SKIP)
19562                 {
19563                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19564                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19565                   rtvec vec;
19566
19567                   if (action == SWAP_CONV)
19568                     {
19569                       src = copy_rtx (src);
19570                       XEXP (src, 0) = op1;
19571                       XEXP (src, 1) = op0;
19572                       pat = gen_rtx_SET (dst, src);
19573                       vec = gen_rtvec (2, pat, clobber);
19574                     }
19575                   else /* action == CONV */
19576                     vec = gen_rtvec (2, pat, clobber);
19577
19578                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19579                   INSN_CODE (insn) = -1;
19580                 }
19581             }
19582
19583           if (NONDEBUG_INSN_P (insn))
19584             df_simulate_one_insn_backwards (bb, insn, &live);
19585         }
19586     }
19587
19588   CLEAR_REG_SET (&live);
19589 }
19590
19591 /* Gcc puts the pool in the wrong place for ARM, since we can only
19592    load addresses a limited distance around the pc.  We do some
19593    special munging to move the constant pool values to the correct
19594    point in the code.  */
19595 static void
19596 arm_reorg (void)
19597 {
19598   rtx_insn *insn;
19599   HOST_WIDE_INT address = 0;
19600   Mfix * fix;
19601
19602   if (use_cmse)
19603     cmse_nonsecure_call_inline_register_clear ();
19604
19605   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
19606   if (cfun->is_thunk)
19607     ;
19608   else if (TARGET_THUMB1)
19609     thumb1_reorg ();
19610   else if (TARGET_THUMB2)
19611     thumb2_reorg ();
19612
19613   /* Ensure all insns that must be split have been split at this point.
19614      Otherwise, the pool placement code below may compute incorrect
19615      insn lengths.  Note that when optimizing, all insns have already
19616      been split at this point.  */
19617   if (!optimize)
19618     split_all_insns_noflow ();
19619
19620   /* Make sure we do not attempt to create a literal pool even though it should
19621      no longer be necessary to create any.  */
19622   if (arm_disable_literal_pool)
19623     return ;
19624
19625   minipool_fix_head = minipool_fix_tail = NULL;
19626
19627   /* The first insn must always be a note, or the code below won't
19628      scan it properly.  */
19629   insn = get_insns ();
19630   gcc_assert (NOTE_P (insn));
19631   minipool_pad = 0;
19632
19633   /* Scan all the insns and record the operands that will need fixing.  */
19634   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19635     {
19636       if (BARRIER_P (insn))
19637         push_minipool_barrier (insn, address);
19638       else if (INSN_P (insn))
19639         {
19640           rtx_jump_table_data *table;
19641
19642           note_invalid_constants (insn, address, true);
19643           address += get_attr_length (insn);
19644
19645           /* If the insn is a vector jump, add the size of the table
19646              and skip the table.  */
19647           if (tablejump_p (insn, NULL, &table))
19648             {
19649               address += get_jump_table_size (table);
19650               insn = table;
19651             }
19652         }
19653       else if (LABEL_P (insn))
19654         /* Add the worst-case padding due to alignment.  We don't add
19655            the _current_ padding because the minipool insertions
19656            themselves might change it.  */
19657         address += get_label_padding (insn);
19658     }
19659
19660   fix = minipool_fix_head;
19661
19662   /* Now scan the fixups and perform the required changes.  */
19663   while (fix)
19664     {
19665       Mfix * ftmp;
19666       Mfix * fdel;
19667       Mfix *  last_added_fix;
19668       Mfix * last_barrier = NULL;
19669       Mfix * this_fix;
19670
19671       /* Skip any further barriers before the next fix.  */
19672       while (fix && BARRIER_P (fix->insn))
19673         fix = fix->next;
19674
19675       /* No more fixes.  */
19676       if (fix == NULL)
19677         break;
19678
19679       last_added_fix = NULL;
19680
19681       for (ftmp = fix; ftmp; ftmp = ftmp->next)
19682         {
19683           if (BARRIER_P (ftmp->insn))
19684             {
19685               if (ftmp->address >= minipool_vector_head->max_address)
19686                 break;
19687
19688               last_barrier = ftmp;
19689             }
19690           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19691             break;
19692
19693           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
19694         }
19695
19696       /* If we found a barrier, drop back to that; any fixes that we
19697          could have reached but come after the barrier will now go in
19698          the next mini-pool.  */
19699       if (last_barrier != NULL)
19700         {
19701           /* Reduce the refcount for those fixes that won't go into this
19702              pool after all.  */
19703           for (fdel = last_barrier->next;
19704                fdel && fdel != ftmp;
19705                fdel = fdel->next)
19706             {
19707               fdel->minipool->refcount--;
19708               fdel->minipool = NULL;
19709             }
19710
19711           ftmp = last_barrier;
19712         }
19713       else
19714         {
19715           /* ftmp is first fix that we can't fit into this pool and
19716              there no natural barriers that we could use.  Insert a
19717              new barrier in the code somewhere between the previous
19718              fix and this one, and arrange to jump around it.  */
19719           HOST_WIDE_INT max_address;
19720
19721           /* The last item on the list of fixes must be a barrier, so
19722              we can never run off the end of the list of fixes without
19723              last_barrier being set.  */
19724           gcc_assert (ftmp);
19725
19726           max_address = minipool_vector_head->max_address;
19727           /* Check that there isn't another fix that is in range that
19728              we couldn't fit into this pool because the pool was
19729              already too large: we need to put the pool before such an
19730              instruction.  The pool itself may come just after the
19731              fix because create_fix_barrier also allows space for a
19732              jump instruction.  */
19733           if (ftmp->address < max_address)
19734             max_address = ftmp->address + 1;
19735
19736           last_barrier = create_fix_barrier (last_added_fix, max_address);
19737         }
19738
19739       assign_minipool_offsets (last_barrier);
19740
19741       while (ftmp)
19742         {
19743           if (!BARRIER_P (ftmp->insn)
19744               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19745                   == NULL))
19746             break;
19747
19748           ftmp = ftmp->next;
19749         }
19750
19751       /* Scan over the fixes we have identified for this pool, fixing them
19752          up and adding the constants to the pool itself.  */
19753       for (this_fix = fix; this_fix && ftmp != this_fix;
19754            this_fix = this_fix->next)
19755         if (!BARRIER_P (this_fix->insn))
19756           {
19757             rtx addr
19758               = plus_constant (Pmode,
19759                                gen_rtx_LABEL_REF (VOIDmode,
19760                                                   minipool_vector_label),
19761                                this_fix->minipool->offset);
19762             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19763           }
19764
19765       dump_minipool (last_barrier->insn);
19766       fix = ftmp;
19767     }
19768
19769   /* From now on we must synthesize any constants that we can't handle
19770      directly.  This can happen if the RTL gets split during final
19771      instruction generation.  */
19772   cfun->machine->after_arm_reorg = 1;
19773
19774   /* Free the minipool memory.  */
19775   obstack_free (&minipool_obstack, minipool_startobj);
19776 }
19777 \f
19778 /* Routines to output assembly language.  */
19779
19780 /* OPERANDS[0] is the entire list of insns that constitute pop,
19781    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19782    is in the list, UPDATE is true iff the list contains explicit
19783    update of base register.  */
19784 void
19785 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19786                          bool update)
19787 {
19788   int i;
19789   char pattern[100];
19790   int offset;
19791   const char *conditional;
19792   int num_saves = XVECLEN (operands[0], 0);
19793   unsigned int regno;
19794   unsigned int regno_base = REGNO (operands[1]);
19795   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19796
19797   offset = 0;
19798   offset += update ? 1 : 0;
19799   offset += return_pc ? 1 : 0;
19800
19801   /* Is the base register in the list?  */
19802   for (i = offset; i < num_saves; i++)
19803     {
19804       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19805       /* If SP is in the list, then the base register must be SP.  */
19806       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19807       /* If base register is in the list, there must be no explicit update.  */
19808       if (regno == regno_base)
19809         gcc_assert (!update);
19810     }
19811
19812   conditional = reverse ? "%?%D0" : "%?%d0";
19813   /* Can't use POP if returning from an interrupt.  */
19814   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19815     sprintf (pattern, "pop%s\t{", conditional);
19816   else
19817     {
19818       /* Output ldmfd when the base register is SP, otherwise output ldmia.
19819          It's just a convention, their semantics are identical.  */
19820       if (regno_base == SP_REGNUM)
19821         sprintf (pattern, "ldmfd%s\t", conditional);
19822       else if (update)
19823         sprintf (pattern, "ldmia%s\t", conditional);
19824       else
19825         sprintf (pattern, "ldm%s\t", conditional);
19826
19827       strcat (pattern, reg_names[regno_base]);
19828       if (update)
19829         strcat (pattern, "!, {");
19830       else
19831         strcat (pattern, ", {");
19832     }
19833
19834   /* Output the first destination register.  */
19835   strcat (pattern,
19836           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19837
19838   /* Output the rest of the destination registers.  */
19839   for (i = offset + 1; i < num_saves; i++)
19840     {
19841       strcat (pattern, ", ");
19842       strcat (pattern,
19843               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19844     }
19845
19846   strcat (pattern, "}");
19847
19848   if (interrupt_p && return_pc)
19849     strcat (pattern, "^");
19850
19851   output_asm_insn (pattern, &cond);
19852 }
19853
19854
19855 /* Output the assembly for a store multiple.  */
19856
19857 const char *
19858 vfp_output_vstmd (rtx * operands)
19859 {
19860   char pattern[100];
19861   int p;
19862   int base;
19863   int i;
19864   rtx addr_reg = REG_P (XEXP (operands[0], 0))
19865                    ? XEXP (operands[0], 0)
19866                    : XEXP (XEXP (operands[0], 0), 0);
19867   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
19868
19869   if (push_p)
19870     strcpy (pattern, "vpush%?.64\t{%P1");
19871   else
19872     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19873
19874   p = strlen (pattern);
19875
19876   gcc_assert (REG_P (operands[1]));
19877
19878   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19879   for (i = 1; i < XVECLEN (operands[2], 0); i++)
19880     {
19881       p += sprintf (&pattern[p], ", d%d", base + i);
19882     }
19883   strcpy (&pattern[p], "}");
19884
19885   output_asm_insn (pattern, operands);
19886   return "";
19887 }
19888
19889
19890 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
19891    number of bytes pushed.  */
19892
19893 static int
19894 vfp_emit_fstmd (int base_reg, int count)
19895 {
19896   rtx par;
19897   rtx dwarf;
19898   rtx tmp, reg;
19899   int i;
19900
19901   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
19902      register pairs are stored by a store multiple insn.  We avoid this
19903      by pushing an extra pair.  */
19904   if (count == 2 && !arm_arch6)
19905     {
19906       if (base_reg == LAST_VFP_REGNUM - 3)
19907         base_reg -= 2;
19908       count++;
19909     }
19910
19911   /* FSTMD may not store more than 16 doubleword registers at once.  Split
19912      larger stores into multiple parts (up to a maximum of two, in
19913      practice).  */
19914   if (count > 16)
19915     {
19916       int saved;
19917       /* NOTE: base_reg is an internal register number, so each D register
19918          counts as 2.  */
19919       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19920       saved += vfp_emit_fstmd (base_reg, 16);
19921       return saved;
19922     }
19923
19924   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19925   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19926
19927   reg = gen_rtx_REG (DFmode, base_reg);
19928   base_reg += 2;
19929
19930   XVECEXP (par, 0, 0)
19931     = gen_rtx_SET (gen_frame_mem
19932                    (BLKmode,
19933                     gen_rtx_PRE_MODIFY (Pmode,
19934                                         stack_pointer_rtx,
19935                                         plus_constant
19936                                         (Pmode, stack_pointer_rtx,
19937                                          - (count * 8)))
19938                     ),
19939                    gen_rtx_UNSPEC (BLKmode,
19940                                    gen_rtvec (1, reg),
19941                                    UNSPEC_PUSH_MULT));
19942
19943   tmp = gen_rtx_SET (stack_pointer_rtx,
19944                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19945   RTX_FRAME_RELATED_P (tmp) = 1;
19946   XVECEXP (dwarf, 0, 0) = tmp;
19947
19948   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19949   RTX_FRAME_RELATED_P (tmp) = 1;
19950   XVECEXP (dwarf, 0, 1) = tmp;
19951
19952   for (i = 1; i < count; i++)
19953     {
19954       reg = gen_rtx_REG (DFmode, base_reg);
19955       base_reg += 2;
19956       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19957
19958       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19959                                         plus_constant (Pmode,
19960                                                        stack_pointer_rtx,
19961                                                        i * 8)),
19962                          reg);
19963       RTX_FRAME_RELATED_P (tmp) = 1;
19964       XVECEXP (dwarf, 0, i + 1) = tmp;
19965     }
19966
19967   par = emit_insn (par);
19968   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19969   RTX_FRAME_RELATED_P (par) = 1;
19970
19971   return count * 8;
19972 }
19973
19974 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19975    has the cmse_nonsecure_call attribute and returns false otherwise.  */
19976
19977 bool
19978 detect_cmse_nonsecure_call (tree addr)
19979 {
19980   if (!addr)
19981     return FALSE;
19982
19983   tree fntype = TREE_TYPE (addr);
19984   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19985                                     TYPE_ATTRIBUTES (fntype)))
19986     return TRUE;
19987   return FALSE;
19988 }
19989
19990
19991 /* Emit a call instruction with pattern PAT.  ADDR is the address of
19992    the call target.  */
19993
19994 void
19995 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19996 {
19997   rtx insn;
19998
19999   insn = emit_call_insn (pat);
20000
20001   /* The PIC register is live on entry to VxWorks PIC PLT entries.
20002      If the call might use such an entry, add a use of the PIC register
20003      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
20004   if (TARGET_VXWORKS_RTP
20005       && flag_pic
20006       && !sibcall
20007       && SYMBOL_REF_P (addr)
20008       && (SYMBOL_REF_DECL (addr)
20009           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
20010           : !SYMBOL_REF_LOCAL_P (addr)))
20011     {
20012       require_pic_register (NULL_RTX, false /*compute_now*/);
20013       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
20014     }
20015
20016   if (TARGET_FDPIC)
20017     {
20018       rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
20019       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
20020     }
20021
20022   if (TARGET_AAPCS_BASED)
20023     {
20024       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
20025          linker.  We need to add an IP clobber to allow setting
20026          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
20027          is not needed since it's a fixed register.  */
20028       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
20029       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
20030     }
20031 }
20032
20033 /* Output a 'call' insn.  */
20034 const char *
20035 output_call (rtx *operands)
20036 {
20037   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
20038
20039   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
20040   if (REGNO (operands[0]) == LR_REGNUM)
20041     {
20042       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
20043       output_asm_insn ("mov%?\t%0, %|lr", operands);
20044     }
20045
20046   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
20047
20048   if (TARGET_INTERWORK || arm_arch4t)
20049     output_asm_insn ("bx%?\t%0", operands);
20050   else
20051     output_asm_insn ("mov%?\t%|pc, %0", operands);
20052
20053   return "";
20054 }
20055
20056 /* Output a move from arm registers to arm registers of a long double
20057    OPERANDS[0] is the destination.
20058    OPERANDS[1] is the source.  */
20059 const char *
20060 output_mov_long_double_arm_from_arm (rtx *operands)
20061 {
20062   /* We have to be careful here because the two might overlap.  */
20063   int dest_start = REGNO (operands[0]);
20064   int src_start = REGNO (operands[1]);
20065   rtx ops[2];
20066   int i;
20067
20068   if (dest_start < src_start)
20069     {
20070       for (i = 0; i < 3; i++)
20071         {
20072           ops[0] = gen_rtx_REG (SImode, dest_start + i);
20073           ops[1] = gen_rtx_REG (SImode, src_start + i);
20074           output_asm_insn ("mov%?\t%0, %1", ops);
20075         }
20076     }
20077   else
20078     {
20079       for (i = 2; i >= 0; i--)
20080         {
20081           ops[0] = gen_rtx_REG (SImode, dest_start + i);
20082           ops[1] = gen_rtx_REG (SImode, src_start + i);
20083           output_asm_insn ("mov%?\t%0, %1", ops);
20084         }
20085     }
20086
20087   return "";
20088 }
20089
20090 void
20091 arm_emit_movpair (rtx dest, rtx src)
20092  {
20093   /* If the src is an immediate, simplify it.  */
20094   if (CONST_INT_P (src))
20095     {
20096       HOST_WIDE_INT val = INTVAL (src);
20097       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
20098       if ((val >> 16) & 0x0000ffff)
20099         {
20100           emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
20101                                                GEN_INT (16)),
20102                          GEN_INT ((val >> 16) & 0x0000ffff));
20103           rtx_insn *insn = get_last_insn ();
20104           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20105         }
20106       return;
20107     }
20108    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
20109    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
20110    rtx_insn *insn = get_last_insn ();
20111    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20112  }
20113
20114 /* Output a move between double words.  It must be REG<-MEM
20115    or MEM<-REG.  */
20116 const char *
20117 output_move_double (rtx *operands, bool emit, int *count)
20118 {
20119   enum rtx_code code0 = GET_CODE (operands[0]);
20120   enum rtx_code code1 = GET_CODE (operands[1]);
20121   rtx otherops[3];
20122   if (count)
20123     *count = 1;
20124
20125   /* The only case when this might happen is when
20126      you are looking at the length of a DImode instruction
20127      that has an invalid constant in it.  */
20128   if (code0 == REG && code1 != MEM)
20129     {
20130       gcc_assert (!emit);
20131       *count = 2;
20132       return "";
20133     }
20134
20135   if (code0 == REG)
20136     {
20137       unsigned int reg0 = REGNO (operands[0]);
20138       const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
20139
20140       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
20141
20142       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
20143
20144       switch (GET_CODE (XEXP (operands[1], 0)))
20145         {
20146         case REG:
20147
20148           if (emit)
20149             {
20150               if (can_ldrd
20151                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
20152                 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
20153               else
20154                 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20155             }
20156           break;
20157
20158         case PRE_INC:
20159           gcc_assert (can_ldrd);
20160           if (emit)
20161             output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20162           break;
20163
20164         case PRE_DEC:
20165           if (emit)
20166             {
20167               if (can_ldrd)
20168                 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20169               else
20170                 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20171             }
20172           break;
20173
20174         case POST_INC:
20175           if (emit)
20176             {
20177               if (can_ldrd)
20178                 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20179               else
20180                 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20181             }
20182           break;
20183
20184         case POST_DEC:
20185           gcc_assert (can_ldrd);
20186           if (emit)
20187             output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20188           break;
20189
20190         case PRE_MODIFY:
20191         case POST_MODIFY:
20192           /* Autoicrement addressing modes should never have overlapping
20193              base and destination registers, and overlapping index registers
20194              are already prohibited, so this doesn't need to worry about
20195              fix_cm3_ldrd.  */
20196           otherops[0] = operands[0];
20197           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20198           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20199
20200           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20201             {
20202               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20203                 {
20204                   /* Registers overlap so split out the increment.  */
20205                   if (emit)
20206                     {
20207                       gcc_assert (can_ldrd);
20208                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
20209                       output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20210                     }
20211                   if (count)
20212                     *count = 2;
20213                 }
20214               else
20215                 {
20216                   /* Use a single insn if we can.
20217                      FIXME: IWMMXT allows offsets larger than ldrd can
20218                      handle, fix these up with a pair of ldr.  */
20219                   if (can_ldrd
20220                       && (TARGET_THUMB2
20221                       || !CONST_INT_P (otherops[2])
20222                       || (INTVAL (otherops[2]) > -256
20223                           && INTVAL (otherops[2]) < 256)))
20224                     {
20225                       if (emit)
20226                         output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20227                     }
20228                   else
20229                     {
20230                       if (emit)
20231                         {
20232                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20233                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20234                         }
20235                       if (count)
20236                         *count = 2;
20237
20238                     }
20239                 }
20240             }
20241           else
20242             {
20243               /* Use a single insn if we can.
20244                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
20245                  fix these up with a pair of ldr.  */
20246               if (can_ldrd
20247                   && (TARGET_THUMB2
20248                   || !CONST_INT_P (otherops[2])
20249                   || (INTVAL (otherops[2]) > -256
20250                       && INTVAL (otherops[2]) < 256)))
20251                 {
20252                   if (emit)
20253                     output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20254                 }
20255               else
20256                 {
20257                   if (emit)
20258                     {
20259                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20260                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20261                     }
20262                   if (count)
20263                     *count = 2;
20264                 }
20265             }
20266           break;
20267
20268         case LABEL_REF:
20269         case CONST:
20270           /* We might be able to use ldrd %0, %1 here.  However the range is
20271              different to ldr/adr, and it is broken on some ARMv7-M
20272              implementations.  */
20273           /* Use the second register of the pair to avoid problematic
20274              overlap.  */
20275           otherops[1] = operands[1];
20276           if (emit)
20277             output_asm_insn ("adr%?\t%0, %1", otherops);
20278           operands[1] = otherops[0];
20279           if (emit)
20280             {
20281               if (can_ldrd)
20282                 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20283               else
20284                 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20285             }
20286
20287           if (count)
20288             *count = 2;
20289           break;
20290
20291           /* ??? This needs checking for thumb2.  */
20292         default:
20293           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20294                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20295             {
20296               otherops[0] = operands[0];
20297               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20298               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20299
20300               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20301                 {
20302                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20303                     {
20304                       switch ((int) INTVAL (otherops[2]))
20305                         {
20306                         case -8:
20307                           if (emit)
20308                             output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20309                           return "";
20310                         case -4:
20311                           if (TARGET_THUMB2)
20312                             break;
20313                           if (emit)
20314                             output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20315                           return "";
20316                         case 4:
20317                           if (TARGET_THUMB2)
20318                             break;
20319                           if (emit)
20320                             output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20321                           return "";
20322                         }
20323                     }
20324                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20325                   operands[1] = otherops[0];
20326                   if (can_ldrd
20327                       && (REG_P (otherops[2])
20328                           || TARGET_THUMB2
20329                           || (CONST_INT_P (otherops[2])
20330                               && INTVAL (otherops[2]) > -256
20331                               && INTVAL (otherops[2]) < 256)))
20332                     {
20333                       if (reg_overlap_mentioned_p (operands[0],
20334                                                    otherops[2]))
20335                         {
20336                           /* Swap base and index registers over to
20337                              avoid a conflict.  */
20338                           std::swap (otherops[1], otherops[2]);
20339                         }
20340                       /* If both registers conflict, it will usually
20341                          have been fixed by a splitter.  */
20342                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
20343                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20344                         {
20345                           if (emit)
20346                             {
20347                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
20348                               output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20349                             }
20350                           if (count)
20351                             *count = 2;
20352                         }
20353                       else
20354                         {
20355                           otherops[0] = operands[0];
20356                           if (emit)
20357                             output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20358                         }
20359                       return "";
20360                     }
20361
20362                   if (CONST_INT_P (otherops[2]))
20363                     {
20364                       if (emit)
20365                         {
20366                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20367                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20368                           else
20369                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
20370                         }
20371                     }
20372                   else
20373                     {
20374                       if (emit)
20375                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
20376                     }
20377                 }
20378               else
20379                 {
20380                   if (emit)
20381                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20382                 }
20383
20384               if (count)
20385                 *count = 2;
20386
20387               if (can_ldrd)
20388                 return "ldrd%?\t%0, [%1]";
20389
20390               return "ldmia%?\t%1, %M0";
20391             }
20392           else
20393             {
20394               otherops[1] = adjust_address (operands[1], SImode, 4);
20395               /* Take care of overlapping base/data reg.  */
20396               if (reg_mentioned_p (operands[0], operands[1]))
20397                 {
20398                   if (emit)
20399                     {
20400                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20401                       output_asm_insn ("ldr%?\t%0, %1", operands);
20402                     }
20403                   if (count)
20404                     *count = 2;
20405
20406                 }
20407               else
20408                 {
20409                   if (emit)
20410                     {
20411                       output_asm_insn ("ldr%?\t%0, %1", operands);
20412                       output_asm_insn ("ldr%?\t%0, %1", otherops);
20413                     }
20414                   if (count)
20415                     *count = 2;
20416                 }
20417             }
20418         }
20419     }
20420   else
20421     {
20422       /* Constraints should ensure this.  */
20423       gcc_assert (code0 == MEM && code1 == REG);
20424       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20425                   || (TARGET_ARM && TARGET_LDRD));
20426
20427       /* For TARGET_ARM the first source register of an STRD
20428          must be even.  This is usually the case for double-word
20429          values but user assembly constraints can force an odd
20430          starting register.  */
20431       bool allow_strd = TARGET_LDRD
20432                          && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20433       switch (GET_CODE (XEXP (operands[0], 0)))
20434         {
20435         case REG:
20436           if (emit)
20437             {
20438               if (allow_strd)
20439                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20440               else
20441                 output_asm_insn ("stm%?\t%m0, %M1", operands);
20442             }
20443           break;
20444
20445         case PRE_INC:
20446           gcc_assert (allow_strd);
20447           if (emit)
20448             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20449           break;
20450
20451         case PRE_DEC:
20452           if (emit)
20453             {
20454               if (allow_strd)
20455                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20456               else
20457                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20458             }
20459           break;
20460
20461         case POST_INC:
20462           if (emit)
20463             {
20464               if (allow_strd)
20465                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20466               else
20467                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20468             }
20469           break;
20470
20471         case POST_DEC:
20472           gcc_assert (allow_strd);
20473           if (emit)
20474             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20475           break;
20476
20477         case PRE_MODIFY:
20478         case POST_MODIFY:
20479           otherops[0] = operands[1];
20480           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20481           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20482
20483           /* IWMMXT allows offsets larger than strd can handle,
20484              fix these up with a pair of str.  */
20485           if (!TARGET_THUMB2
20486               && CONST_INT_P (otherops[2])
20487               && (INTVAL(otherops[2]) <= -256
20488                   || INTVAL(otherops[2]) >= 256))
20489             {
20490               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20491                 {
20492                   if (emit)
20493                     {
20494                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20495                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20496                     }
20497                   if (count)
20498                     *count = 2;
20499                 }
20500               else
20501                 {
20502                   if (emit)
20503                     {
20504                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20505                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20506                     }
20507                   if (count)
20508                     *count = 2;
20509                 }
20510             }
20511           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20512             {
20513               if (emit)
20514                 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20515             }
20516           else
20517             {
20518               if (emit)
20519                 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20520             }
20521           break;
20522
20523         case PLUS:
20524           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20525           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20526             {
20527               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20528                 {
20529                 case -8:
20530                   if (emit)
20531                     output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20532                   return "";
20533
20534                 case -4:
20535                   if (TARGET_THUMB2)
20536                     break;
20537                   if (emit)
20538                     output_asm_insn ("stmda%?\t%m0, %M1", operands);
20539                   return "";
20540
20541                 case 4:
20542                   if (TARGET_THUMB2)
20543                     break;
20544                   if (emit)
20545                     output_asm_insn ("stmib%?\t%m0, %M1", operands);
20546                   return "";
20547                 }
20548             }
20549           if (allow_strd
20550               && (REG_P (otherops[2])
20551                   || TARGET_THUMB2
20552                   || (CONST_INT_P (otherops[2])
20553                       && INTVAL (otherops[2]) > -256
20554                       && INTVAL (otherops[2]) < 256)))
20555             {
20556               otherops[0] = operands[1];
20557               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20558               if (emit)
20559                 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20560               return "";
20561             }
20562           /* Fall through */
20563
20564         default:
20565           otherops[0] = adjust_address (operands[0], SImode, 4);
20566           otherops[1] = operands[1];
20567           if (emit)
20568             {
20569               output_asm_insn ("str%?\t%1, %0", operands);
20570               output_asm_insn ("str%?\t%H1, %0", otherops);
20571             }
20572           if (count)
20573             *count = 2;
20574         }
20575     }
20576
20577   return "";
20578 }
20579
20580 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
20581    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
20582
20583 const char *
20584 output_move_quad (rtx *operands)
20585 {
20586   if (REG_P (operands[0]))
20587     {
20588       /* Load, or reg->reg move.  */
20589
20590       if (MEM_P (operands[1]))
20591         {
20592           switch (GET_CODE (XEXP (operands[1], 0)))
20593             {
20594             case REG:
20595               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20596               break;
20597
20598             case LABEL_REF:
20599             case CONST:
20600               output_asm_insn ("adr%?\t%0, %1", operands);
20601               output_asm_insn ("ldmia%?\t%0, %M0", operands);
20602               break;
20603
20604             default:
20605               gcc_unreachable ();
20606             }
20607         }
20608       else
20609         {
20610           rtx ops[2];
20611           int dest, src, i;
20612
20613           gcc_assert (REG_P (operands[1]));
20614
20615           dest = REGNO (operands[0]);
20616           src = REGNO (operands[1]);
20617
20618           /* This seems pretty dumb, but hopefully GCC won't try to do it
20619              very often.  */
20620           if (dest < src)
20621             for (i = 0; i < 4; i++)
20622               {
20623                 ops[0] = gen_rtx_REG (SImode, dest + i);
20624                 ops[1] = gen_rtx_REG (SImode, src + i);
20625                 output_asm_insn ("mov%?\t%0, %1", ops);
20626               }
20627           else
20628             for (i = 3; i >= 0; i--)
20629               {
20630                 ops[0] = gen_rtx_REG (SImode, dest + i);
20631                 ops[1] = gen_rtx_REG (SImode, src + i);
20632                 output_asm_insn ("mov%?\t%0, %1", ops);
20633               }
20634         }
20635     }
20636   else
20637     {
20638       gcc_assert (MEM_P (operands[0]));
20639       gcc_assert (REG_P (operands[1]));
20640       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20641
20642       switch (GET_CODE (XEXP (operands[0], 0)))
20643         {
20644         case REG:
20645           output_asm_insn ("stm%?\t%m0, %M1", operands);
20646           break;
20647
20648         default:
20649           gcc_unreachable ();
20650         }
20651     }
20652
20653   return "";
20654 }
20655
20656 /* Output a VFP load or store instruction.  */
20657
20658 const char *
20659 output_move_vfp (rtx *operands)
20660 {
20661   rtx reg, mem, addr, ops[2];
20662   int load = REG_P (operands[0]);
20663   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20664   int sp = (!TARGET_VFP_FP16INST
20665             || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20666   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20667   const char *templ;
20668   char buff[50];
20669   machine_mode mode;
20670
20671   reg = operands[!load];
20672   mem = operands[load];
20673
20674   mode = GET_MODE (reg);
20675
20676   gcc_assert (REG_P (reg));
20677   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20678   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20679               || mode == SFmode
20680               || mode == DFmode
20681               || mode == HImode
20682               || mode == SImode
20683               || mode == DImode
20684               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20685   gcc_assert (MEM_P (mem));
20686
20687   addr = XEXP (mem, 0);
20688
20689   switch (GET_CODE (addr))
20690     {
20691     case PRE_DEC:
20692       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20693       ops[0] = XEXP (addr, 0);
20694       ops[1] = reg;
20695       break;
20696
20697     case POST_INC:
20698       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20699       ops[0] = XEXP (addr, 0);
20700       ops[1] = reg;
20701       break;
20702
20703     default:
20704       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20705       ops[0] = reg;
20706       ops[1] = mem;
20707       break;
20708     }
20709
20710   sprintf (buff, templ,
20711            load ? "ld" : "st",
20712            dp ? "64" : sp ? "32" : "16",
20713            dp ? "P" : "",
20714            integer_p ? "\t%@ int" : "");
20715   output_asm_insn (buff, ops);
20716
20717   return "";
20718 }
20719
20720 /* Output a Neon double-word or quad-word load or store, or a load
20721    or store for larger structure modes.
20722
20723    WARNING: The ordering of elements is weird in big-endian mode,
20724    because the EABI requires that vectors stored in memory appear
20725    as though they were stored by a VSTM, as required by the EABI.
20726    GCC RTL defines element ordering based on in-memory order.
20727    This can be different from the architectural ordering of elements
20728    within a NEON register. The intrinsics defined in arm_neon.h use the
20729    NEON register element ordering, not the GCC RTL element ordering.
20730
20731    For example, the in-memory ordering of a big-endian a quadword
20732    vector with 16-bit elements when stored from register pair {d0,d1}
20733    will be (lowest address first, d0[N] is NEON register element N):
20734
20735      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20736
20737    When necessary, quadword registers (dN, dN+1) are moved to ARM
20738    registers from rN in the order:
20739
20740      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20741
20742    So that STM/LDM can be used on vectors in ARM registers, and the
20743    same memory layout will result as if VSTM/VLDM were used.
20744
20745    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20746    possible, which allows use of appropriate alignment tags.
20747    Note that the choice of "64" is independent of the actual vector
20748    element size; this size simply ensures that the behavior is
20749    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20750
20751    Due to limitations of those instructions, use of VST1.64/VLD1.64
20752    is not possible if:
20753     - the address contains PRE_DEC, or
20754     - the mode refers to more than 4 double-word registers
20755
20756    In those cases, it would be possible to replace VSTM/VLDM by a
20757    sequence of instructions; this is not currently implemented since
20758    this is not certain to actually improve performance.  */
20759
20760 const char *
20761 output_move_neon (rtx *operands)
20762 {
20763   rtx reg, mem, addr, ops[2];
20764   int regno, nregs, load = REG_P (operands[0]);
20765   const char *templ;
20766   char buff[50];
20767   machine_mode mode;
20768
20769   reg = operands[!load];
20770   mem = operands[load];
20771
20772   mode = GET_MODE (reg);
20773
20774   gcc_assert (REG_P (reg));
20775   regno = REGNO (reg);
20776   nregs = REG_NREGS (reg) / 2;
20777   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20778               || NEON_REGNO_OK_FOR_QUAD (regno));
20779   gcc_assert ((TARGET_NEON
20780                && (VALID_NEON_DREG_MODE (mode)
20781                    || VALID_NEON_QREG_MODE (mode)
20782                    || VALID_NEON_STRUCT_MODE (mode)))
20783               || (TARGET_HAVE_MVE
20784                   && (VALID_MVE_MODE (mode)
20785                       || VALID_MVE_STRUCT_MODE (mode))));
20786   gcc_assert (MEM_P (mem));
20787
20788   addr = XEXP (mem, 0);
20789
20790   /* Strip off const from addresses like (const (plus (...))).  */
20791   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20792     addr = XEXP (addr, 0);
20793
20794   switch (GET_CODE (addr))
20795     {
20796     case POST_INC:
20797       /* We have to use vldm / vstm for too-large modes.  */
20798       if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20799         {
20800           templ = "v%smia%%?\t%%0!, %%h1";
20801           ops[0] = XEXP (addr, 0);
20802         }
20803       else
20804         {
20805           templ = "v%s1.64\t%%h1, %%A0";
20806           ops[0] = mem;
20807         }
20808       ops[1] = reg;
20809       break;
20810
20811     case PRE_DEC:
20812       /* We have to use vldm / vstm in this case, since there is no
20813          pre-decrement form of the vld1 / vst1 instructions.  */
20814       templ = "v%smdb%%?\t%%0!, %%h1";
20815       ops[0] = XEXP (addr, 0);
20816       ops[1] = reg;
20817       break;
20818
20819     case POST_MODIFY:
20820       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
20821       gcc_unreachable ();
20822
20823     case REG:
20824       /* We have to use vldm / vstm for too-large modes.  */
20825       if (nregs > 1)
20826         {
20827           if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20828             templ = "v%smia%%?\t%%m0, %%h1";
20829           else
20830             templ = "v%s1.64\t%%h1, %%A0";
20831
20832           ops[0] = mem;
20833           ops[1] = reg;
20834           break;
20835         }
20836       /* Fall through.  */
20837     case PLUS:
20838       if (GET_CODE (addr) == PLUS)
20839         addr = XEXP (addr, 0);
20840       /* Fall through.  */
20841     case LABEL_REF:
20842       {
20843         int i;
20844         int overlap = -1;
20845         for (i = 0; i < nregs; i++)
20846           {
20847             /* Use DFmode for vldr/vstr.  */
20848             ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * i);
20849             ops[1] = adjust_address_nv (mem, DFmode, 8 * i);
20850             if (reg_overlap_mentioned_p (ops[0], mem))
20851               {
20852                 gcc_assert (overlap == -1);
20853                 overlap = i;
20854               }
20855             else
20856               {
20857                 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20858                   sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20859                 else
20860                   sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20861                 output_asm_insn (buff, ops);
20862               }
20863           }
20864         if (overlap != -1)
20865           {
20866             ops[0] = gen_rtx_REG (DFmode, REGNO (reg) + 2 * overlap);
20867             ops[1] = adjust_address_nv (mem, DFmode, 8 * overlap);
20868             if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20869               sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20870             else
20871               sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20872             output_asm_insn (buff, ops);
20873           }
20874
20875         return "";
20876       }
20877
20878     default:
20879       gcc_unreachable ();
20880     }
20881
20882   sprintf (buff, templ, load ? "ld" : "st");
20883   output_asm_insn (buff, ops);
20884
20885   return "";
20886 }
20887
20888 /* Compute and return the length of neon_mov<mode>, where <mode> is one of
20889    VSTRUCT modes: EI, OI, CI or XI for Neon, and V2x16QI, V2x8HI, V2x4SI,
20890    V2x8HF, V2x4SF, V2x16QI, V2x8HI, V2x4SI, V2x8HF, V2x4SF for MVE.  */
20891 int
20892 arm_attr_length_move_neon (rtx_insn *insn)
20893 {
20894   rtx reg, mem, addr;
20895   int load;
20896   machine_mode mode;
20897
20898   extract_insn_cached (insn);
20899
20900   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20901     {
20902       mode = GET_MODE (recog_data.operand[0]);
20903       switch (mode)
20904         {
20905         case E_EImode:
20906         case E_OImode:
20907         case E_V2x16QImode:
20908         case E_V2x8HImode:
20909         case E_V2x4SImode:
20910         case E_V2x8HFmode:
20911         case E_V2x4SFmode:
20912           return 8;
20913         case E_CImode:
20914           return 12;
20915         case E_XImode:
20916         case E_V4x16QImode:
20917         case E_V4x8HImode:
20918         case E_V4x4SImode:
20919         case E_V4x8HFmode:
20920         case E_V4x4SFmode:
20921           return 16;
20922         default:
20923           gcc_unreachable ();
20924         }
20925     }
20926
20927   load = REG_P (recog_data.operand[0]);
20928   reg = recog_data.operand[!load];
20929   mem = recog_data.operand[load];
20930
20931   gcc_assert (MEM_P (mem));
20932
20933   addr = XEXP (mem, 0);
20934
20935   /* Strip off const from addresses like (const (plus (...))).  */
20936   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20937     addr = XEXP (addr, 0);
20938
20939   if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20940     {
20941       int insns = REG_NREGS (reg) / 2;
20942       return insns * 4;
20943     }
20944   else
20945     return 4;
20946 }
20947
20948 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
20949    return zero.  */
20950
20951 int
20952 arm_address_offset_is_imm (rtx_insn *insn)
20953 {
20954   rtx mem, addr;
20955
20956   extract_insn_cached (insn);
20957
20958   if (REG_P (recog_data.operand[0]))
20959     return 0;
20960
20961   mem = recog_data.operand[0];
20962
20963   gcc_assert (MEM_P (mem));
20964
20965   addr = XEXP (mem, 0);
20966
20967   if (REG_P (addr)
20968       || (GET_CODE (addr) == PLUS
20969           && REG_P (XEXP (addr, 0))
20970           && CONST_INT_P (XEXP (addr, 1))))
20971     return 1;
20972   else
20973     return 0;
20974 }
20975
20976 /* Output an ADD r, s, #n where n may be too big for one instruction.
20977    If adding zero to one register, output nothing.  */
20978 const char *
20979 output_add_immediate (rtx *operands)
20980 {
20981   HOST_WIDE_INT n = INTVAL (operands[2]);
20982
20983   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20984     {
20985       if (n < 0)
20986         output_multi_immediate (operands,
20987                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20988                                 -n);
20989       else
20990         output_multi_immediate (operands,
20991                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20992                                 n);
20993     }
20994
20995   return "";
20996 }
20997
20998 /* Output a multiple immediate operation.
20999    OPERANDS is the vector of operands referred to in the output patterns.
21000    INSTR1 is the output pattern to use for the first constant.
21001    INSTR2 is the output pattern to use for subsequent constants.
21002    IMMED_OP is the index of the constant slot in OPERANDS.
21003    N is the constant value.  */
21004 static const char *
21005 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
21006                         int immed_op, HOST_WIDE_INT n)
21007 {
21008 #if HOST_BITS_PER_WIDE_INT > 32
21009   n &= 0xffffffff;
21010 #endif
21011
21012   if (n == 0)
21013     {
21014       /* Quick and easy output.  */
21015       operands[immed_op] = const0_rtx;
21016       output_asm_insn (instr1, operands);
21017     }
21018   else
21019     {
21020       int i;
21021       const char * instr = instr1;
21022
21023       /* Note that n is never zero here (which would give no output).  */
21024       for (i = 0; i < 32; i += 2)
21025         {
21026           if (n & (3 << i))
21027             {
21028               operands[immed_op] = GEN_INT (n & (255 << i));
21029               output_asm_insn (instr, operands);
21030               instr = instr2;
21031               i += 6;
21032             }
21033         }
21034     }
21035
21036   return "";
21037 }
21038
21039 /* Return the name of a shifter operation.  */
21040 static const char *
21041 arm_shift_nmem(enum rtx_code code)
21042 {
21043   switch (code)
21044     {
21045     case ASHIFT:
21046       return ARM_LSL_NAME;
21047
21048     case ASHIFTRT:
21049       return "asr";
21050
21051     case LSHIFTRT:
21052       return "lsr";
21053
21054     case ROTATERT:
21055       return "ror";
21056
21057     default:
21058       abort();
21059     }
21060 }
21061
21062 /* Return the appropriate ARM instruction for the operation code.
21063    The returned result should not be overwritten.  OP is the rtx of the
21064    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
21065    was shifted.  */
21066 const char *
21067 arithmetic_instr (rtx op, int shift_first_arg)
21068 {
21069   switch (GET_CODE (op))
21070     {
21071     case PLUS:
21072       return "add";
21073
21074     case MINUS:
21075       return shift_first_arg ? "rsb" : "sub";
21076
21077     case IOR:
21078       return "orr";
21079
21080     case XOR:
21081       return "eor";
21082
21083     case AND:
21084       return "and";
21085
21086     case ASHIFT:
21087     case ASHIFTRT:
21088     case LSHIFTRT:
21089     case ROTATERT:
21090       return arm_shift_nmem(GET_CODE(op));
21091
21092     default:
21093       gcc_unreachable ();
21094     }
21095 }
21096
21097 /* Ensure valid constant shifts and return the appropriate shift mnemonic
21098    for the operation code.  The returned result should not be overwritten.
21099    OP is the rtx code of the shift.
21100    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
21101    shift.  */
21102 static const char *
21103 shift_op (rtx op, HOST_WIDE_INT *amountp)
21104 {
21105   const char * mnem;
21106   enum rtx_code code = GET_CODE (op);
21107
21108   switch (code)
21109     {
21110     case ROTATE:
21111       if (!CONST_INT_P (XEXP (op, 1)))
21112         {
21113           output_operand_lossage ("invalid shift operand");
21114           return NULL;
21115         }
21116
21117       code = ROTATERT;
21118       *amountp = 32 - INTVAL (XEXP (op, 1));
21119       mnem = "ror";
21120       break;
21121
21122     case ASHIFT:
21123     case ASHIFTRT:
21124     case LSHIFTRT:
21125     case ROTATERT:
21126       mnem = arm_shift_nmem(code);
21127       if (CONST_INT_P (XEXP (op, 1)))
21128         {
21129           *amountp = INTVAL (XEXP (op, 1));
21130         }
21131       else if (REG_P (XEXP (op, 1)))
21132         {
21133           *amountp = -1;
21134           return mnem;
21135         }
21136       else
21137         {
21138           output_operand_lossage ("invalid shift operand");
21139           return NULL;
21140         }
21141       break;
21142
21143     case MULT:
21144       /* We never have to worry about the amount being other than a
21145          power of 2, since this case can never be reloaded from a reg.  */
21146       if (!CONST_INT_P (XEXP (op, 1)))
21147         {
21148           output_operand_lossage ("invalid shift operand");
21149           return NULL;
21150         }
21151
21152       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
21153
21154       /* Amount must be a power of two.  */
21155       if (*amountp & (*amountp - 1))
21156         {
21157           output_operand_lossage ("invalid shift operand");
21158           return NULL;
21159         }
21160
21161       *amountp = exact_log2 (*amountp);
21162       gcc_assert (IN_RANGE (*amountp, 0, 31));
21163       return ARM_LSL_NAME;
21164
21165     default:
21166       output_operand_lossage ("invalid shift operand");
21167       return NULL;
21168     }
21169
21170   /* This is not 100% correct, but follows from the desire to merge
21171      multiplication by a power of 2 with the recognizer for a
21172      shift.  >=32 is not a valid shift for "lsl", so we must try and
21173      output a shift that produces the correct arithmetical result.
21174      Using lsr #32 is identical except for the fact that the carry bit
21175      is not set correctly if we set the flags; but we never use the
21176      carry bit from such an operation, so we can ignore that.  */
21177   if (code == ROTATERT)
21178     /* Rotate is just modulo 32.  */
21179     *amountp &= 31;
21180   else if (*amountp != (*amountp & 31))
21181     {
21182       if (code == ASHIFT)
21183         mnem = "lsr";
21184       *amountp = 32;
21185     }
21186
21187   /* Shifts of 0 are no-ops.  */
21188   if (*amountp == 0)
21189     return NULL;
21190
21191   return mnem;
21192 }
21193
21194 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
21195    because /bin/as is horribly restrictive.  The judgement about
21196    whether or not each character is 'printable' (and can be output as
21197    is) or not (and must be printed with an octal escape) must be made
21198    with reference to the *host* character set -- the situation is
21199    similar to that discussed in the comments above pp_c_char in
21200    c-pretty-print.cc.  */
21201
21202 #define MAX_ASCII_LEN 51
21203
21204 void
21205 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21206 {
21207   int i;
21208   int len_so_far = 0;
21209
21210   fputs ("\t.ascii\t\"", stream);
21211
21212   for (i = 0; i < len; i++)
21213     {
21214       int c = p[i];
21215
21216       if (len_so_far >= MAX_ASCII_LEN)
21217         {
21218           fputs ("\"\n\t.ascii\t\"", stream);
21219           len_so_far = 0;
21220         }
21221
21222       if (ISPRINT (c))
21223         {
21224           if (c == '\\' || c == '\"')
21225             {
21226               putc ('\\', stream);
21227               len_so_far++;
21228             }
21229           putc (c, stream);
21230           len_so_far++;
21231         }
21232       else
21233         {
21234           fprintf (stream, "\\%03o", c);
21235           len_so_far += 4;
21236         }
21237     }
21238
21239   fputs ("\"\n", stream);
21240 }
21241 \f
21242
21243 /* Compute the register save mask for registers 0 through 12
21244    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
21245
21246 static unsigned long
21247 arm_compute_save_reg0_reg12_mask (void)
21248 {
21249   unsigned long func_type = arm_current_func_type ();
21250   unsigned long save_reg_mask = 0;
21251   unsigned int reg;
21252
21253   if (IS_INTERRUPT (func_type))
21254     {
21255       unsigned int max_reg;
21256       /* Interrupt functions must not corrupt any registers,
21257          even call clobbered ones.  If this is a leaf function
21258          we can just examine the registers used by the RTL, but
21259          otherwise we have to assume that whatever function is
21260          called might clobber anything, and so we have to save
21261          all the call-clobbered registers as well.  */
21262       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21263         /* FIQ handlers have registers r8 - r12 banked, so
21264            we only need to check r0 - r7, Normal ISRs only
21265            bank r14 and r15, so we must check up to r12.
21266            r13 is the stack pointer which is always preserved,
21267            so we do not need to consider it here.  */
21268         max_reg = 7;
21269       else
21270         max_reg = 12;
21271
21272       for (reg = 0; reg <= max_reg; reg++)
21273         if (reg_needs_saving_p (reg))
21274           save_reg_mask |= (1 << reg);
21275
21276       /* Also save the pic base register if necessary.  */
21277       if (PIC_REGISTER_MAY_NEED_SAVING
21278           && crtl->uses_pic_offset_table)
21279         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21280     }
21281   else if (IS_VOLATILE(func_type))
21282     {
21283       /* For noreturn functions we historically omitted register saves
21284          altogether.  However this really messes up debugging.  As a
21285          compromise save just the frame pointers.  Combined with the link
21286          register saved elsewhere this should be sufficient to get
21287          a backtrace.  */
21288       if (frame_pointer_needed)
21289         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21290       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21291         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21292       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21293         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21294     }
21295   else
21296     {
21297       /* In the normal case we only need to save those registers
21298          which are call saved and which are used by this function.  */
21299       for (reg = 0; reg <= 11; reg++)
21300         if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21301           save_reg_mask |= (1 << reg);
21302
21303       /* Handle the frame pointer as a special case.  */
21304       if (frame_pointer_needed)
21305         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21306
21307       /* If we aren't loading the PIC register,
21308          don't stack it even though it may be live.  */
21309       if (PIC_REGISTER_MAY_NEED_SAVING
21310           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21311               || crtl->uses_pic_offset_table))
21312         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21313
21314       /* The prologue will copy SP into R0, so save it.  */
21315       if (IS_STACKALIGN (func_type))
21316         save_reg_mask |= 1;
21317     }
21318
21319   /* Save registers so the exception handler can modify them.  */
21320   if (crtl->calls_eh_return)
21321     {
21322       unsigned int i;
21323
21324       for (i = 0; ; i++)
21325         {
21326           reg = EH_RETURN_DATA_REGNO (i);
21327           if (reg == INVALID_REGNUM)
21328             break;
21329           save_reg_mask |= 1 << reg;
21330         }
21331     }
21332
21333   return save_reg_mask;
21334 }
21335
21336 /* Return true if r3 is live at the start of the function.  */
21337
21338 static bool
21339 arm_r3_live_at_start_p (void)
21340 {
21341   /* Just look at cfg info, which is still close enough to correct at this
21342      point.  This gives false positives for broken functions that might use
21343      uninitialized data that happens to be allocated in r3, but who cares?  */
21344   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21345 }
21346
21347 /* Compute the number of bytes used to store the static chain register on the
21348    stack, above the stack frame.  We need to know this accurately to get the
21349    alignment of the rest of the stack frame correct.  */
21350
21351 static int
21352 arm_compute_static_chain_stack_bytes (void)
21353 {
21354   /* Once the value is updated from the init value of -1, do not
21355      re-compute.  */
21356   if (cfun->machine->static_chain_stack_bytes != -1)
21357     return cfun->machine->static_chain_stack_bytes;
21358
21359   /* See the defining assertion in arm_expand_prologue.  */
21360   if (IS_NESTED (arm_current_func_type ())
21361       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21362           || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21363                || flag_stack_clash_protection)
21364               && !df_regs_ever_live_p (LR_REGNUM)))
21365       && arm_r3_live_at_start_p ()
21366       && crtl->args.pretend_args_size == 0)
21367     return 4;
21368
21369   return 0;
21370 }
21371
21372 /* Compute a bit mask of which core registers need to be
21373    saved on the stack for the current function.
21374    This is used by arm_compute_frame_layout, which may add extra registers.  */
21375
21376 static unsigned long
21377 arm_compute_save_core_reg_mask (void)
21378 {
21379   unsigned int save_reg_mask = 0;
21380   unsigned long func_type = arm_current_func_type ();
21381   unsigned int reg;
21382
21383   if (IS_NAKED (func_type))
21384     /* This should never really happen.  */
21385     return 0;
21386
21387   /* If we are creating a stack frame, then we must save the frame pointer,
21388      IP (which will hold the old stack pointer), LR and the PC.  */
21389   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21390     save_reg_mask |=
21391       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21392       | (1 << IP_REGNUM)
21393       | (1 << LR_REGNUM)
21394       | (1 << PC_REGNUM);
21395
21396   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21397
21398   if (arm_current_function_pac_enabled_p ())
21399     save_reg_mask |= 1 << IP_REGNUM;
21400
21401   /* Decide if we need to save the link register.
21402      Interrupt routines have their own banked link register,
21403      so they never need to save it.
21404      Otherwise if we do not use the link register we do not need to save
21405      it.  If we are pushing other registers onto the stack however, we
21406      can save an instruction in the epilogue by pushing the link register
21407      now and then popping it back into the PC.  This incurs extra memory
21408      accesses though, so we only do it when optimizing for size, and only
21409      if we know that we will not need a fancy return sequence.  */
21410   if (df_regs_ever_live_p (LR_REGNUM)
21411       || (save_reg_mask
21412           && optimize_size
21413           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21414           && !crtl->tail_call_emit
21415           && !crtl->calls_eh_return))
21416     save_reg_mask |= 1 << LR_REGNUM;
21417
21418   if (cfun->machine->lr_save_eliminated)
21419     save_reg_mask &= ~ (1 << LR_REGNUM);
21420
21421   if (TARGET_REALLY_IWMMXT
21422       && ((bit_count (save_reg_mask)
21423            + ARM_NUM_INTS (crtl->args.pretend_args_size +
21424                            arm_compute_static_chain_stack_bytes())
21425            ) % 2) != 0)
21426     {
21427       /* The total number of registers that are going to be pushed
21428          onto the stack is odd.  We need to ensure that the stack
21429          is 64-bit aligned before we start to save iWMMXt registers,
21430          and also before we start to create locals.  (A local variable
21431          might be a double or long long which we will load/store using
21432          an iWMMXt instruction).  Therefore we need to push another
21433          ARM register, so that the stack will be 64-bit aligned.  We
21434          try to avoid using the arg registers (r0 -r3) as they might be
21435          used to pass values in a tail call.  */
21436       for (reg = 4; reg <= 12; reg++)
21437         if ((save_reg_mask & (1 << reg)) == 0)
21438           break;
21439
21440       if (reg <= 12)
21441         save_reg_mask |= (1 << reg);
21442       else
21443         {
21444           cfun->machine->sibcall_blocked = 1;
21445           save_reg_mask |= (1 << 3);
21446         }
21447     }
21448
21449   /* We may need to push an additional register for use initializing the
21450      PIC base register.  */
21451   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21452       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21453     {
21454       reg = thumb_find_work_register (1 << 4);
21455       if (!call_used_or_fixed_reg_p (reg))
21456         save_reg_mask |= (1 << reg);
21457     }
21458
21459   return save_reg_mask;
21460 }
21461
21462 /* Compute a bit mask of which core registers need to be
21463    saved on the stack for the current function.  */
21464 static unsigned long
21465 thumb1_compute_save_core_reg_mask (void)
21466 {
21467   unsigned long mask;
21468   unsigned reg;
21469
21470   mask = 0;
21471   for (reg = 0; reg < 12; reg ++)
21472     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21473       mask |= 1 << reg;
21474
21475   /* Handle the frame pointer as a special case.  */
21476   if (frame_pointer_needed)
21477     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21478
21479   if (flag_pic
21480       && !TARGET_SINGLE_PIC_BASE
21481       && arm_pic_register != INVALID_REGNUM
21482       && crtl->uses_pic_offset_table)
21483     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21484
21485   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
21486   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21487     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21488
21489   /* LR will also be pushed if any lo regs are pushed.  */
21490   if (mask & 0xff || thumb_force_lr_save ())
21491     mask |= (1 << LR_REGNUM);
21492
21493   bool call_clobbered_scratch
21494     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21495        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21496
21497   /* Make sure we have a low work register if we need one.  We will
21498      need one if we are going to push a high register, but we are not
21499      currently intending to push a low register.  However if both the
21500      prologue and epilogue have a spare call-clobbered low register,
21501      then we won't need to find an additional work register.  It does
21502      not need to be the same register in the prologue and
21503      epilogue.  */
21504   if ((mask & 0xff) == 0
21505       && !call_clobbered_scratch
21506       && ((mask & 0x0f00) || TARGET_BACKTRACE))
21507     {
21508       /* Use thumb_find_work_register to choose which register
21509          we will use.  If the register is live then we will
21510          have to push it.  Use LAST_LO_REGNUM as our fallback
21511          choice for the register to select.  */
21512       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21513       /* Make sure the register returned by thumb_find_work_register is
21514          not part of the return value.  */
21515       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21516         reg = LAST_LO_REGNUM;
21517
21518       if (callee_saved_reg_p (reg))
21519         mask |= 1 << reg;
21520     }
21521
21522   /* The 504 below is 8 bytes less than 512 because there are two possible
21523      alignment words.  We can't tell here if they will be present or not so we
21524      have to play it safe and assume that they are. */
21525   if ((CALLER_INTERWORKING_SLOT_SIZE +
21526        ROUND_UP_WORD (get_frame_size ()) +
21527        crtl->outgoing_args_size) >= 504)
21528     {
21529       /* This is the same as the code in thumb1_expand_prologue() which
21530          determines which register to use for stack decrement. */
21531       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21532         if (mask & (1 << reg))
21533           break;
21534
21535       if (reg > LAST_LO_REGNUM)
21536         {
21537           /* Make sure we have a register available for stack decrement. */
21538           mask |= 1 << LAST_LO_REGNUM;
21539         }
21540     }
21541
21542   return mask;
21543 }
21544
21545 /* Return the number of bytes required to save VFP registers.  */
21546 static int
21547 arm_get_vfp_saved_size (void)
21548 {
21549   unsigned int regno;
21550   int count;
21551   int saved;
21552
21553   saved = 0;
21554   /* Space for saved VFP registers.  */
21555   if (TARGET_VFP_BASE)
21556     {
21557       count = 0;
21558       for (regno = FIRST_VFP_REGNUM;
21559            regno < LAST_VFP_REGNUM;
21560            regno += 2)
21561         {
21562           if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21563             {
21564               if (count > 0)
21565                 {
21566                   /* Workaround ARM10 VFPr1 bug.  */
21567                   if (count == 2 && !arm_arch6)
21568                     count++;
21569                   saved += count * 8;
21570                 }
21571               count = 0;
21572             }
21573           else
21574             count++;
21575         }
21576       if (count > 0)
21577         {
21578           if (count == 2 && !arm_arch6)
21579             count++;
21580           saved += count * 8;
21581         }
21582     }
21583   return saved;
21584 }
21585
21586
21587 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
21588    everything bar the final return instruction.  If simple_return is true,
21589    then do not output epilogue, because it has already been emitted in RTL.
21590
21591    Note: do not forget to update length attribute of corresponding insn pattern
21592    when changing assembly output (eg. length attribute of
21593    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21594    register clearing sequences).  */
21595 const char *
21596 output_return_instruction (rtx operand, bool really_return, bool reverse,
21597                            bool simple_return)
21598 {
21599   char conditional[10];
21600   char instr[100];
21601   unsigned reg;
21602   unsigned long live_regs_mask;
21603   unsigned long func_type;
21604   arm_stack_offsets *offsets;
21605
21606   func_type = arm_current_func_type ();
21607
21608   if (IS_NAKED (func_type))
21609     return "";
21610
21611   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21612     {
21613       /* If this function was declared non-returning, and we have
21614          found a tail call, then we have to trust that the called
21615          function won't return.  */
21616       if (really_return)
21617         {
21618           rtx ops[2];
21619
21620           /* Otherwise, trap an attempted return by aborting.  */
21621           ops[0] = operand;
21622           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21623                                        : "abort");
21624           assemble_external_libcall (ops[1]);
21625           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21626         }
21627
21628       return "";
21629     }
21630
21631   gcc_assert (!cfun->calls_alloca || really_return);
21632
21633   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21634
21635   cfun->machine->return_used_this_function = 1;
21636
21637   offsets = arm_get_frame_offsets ();
21638   live_regs_mask = offsets->saved_regs_mask;
21639
21640   if (!simple_return && live_regs_mask)
21641     {
21642       const char * return_reg;
21643
21644       /* If we do not have any special requirements for function exit
21645          (e.g. interworking) then we can load the return address
21646          directly into the PC.  Otherwise we must load it into LR.  */
21647       if (really_return
21648           && !IS_CMSE_ENTRY (func_type)
21649           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21650         return_reg = reg_names[PC_REGNUM];
21651       else
21652         return_reg = reg_names[LR_REGNUM];
21653
21654       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21655         {
21656           /* There are three possible reasons for the IP register
21657              being saved.  1) a stack frame was created, in which case
21658              IP contains the old stack pointer, or 2) an ISR routine
21659              corrupted it, or 3) it was saved to align the stack on
21660              iWMMXt.  In case 1, restore IP into SP, otherwise just
21661              restore IP.  */
21662           if (frame_pointer_needed)
21663             {
21664               live_regs_mask &= ~ (1 << IP_REGNUM);
21665               live_regs_mask |=   (1 << SP_REGNUM);
21666             }
21667           else
21668             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21669         }
21670
21671       /* On some ARM architectures it is faster to use LDR rather than
21672          LDM to load a single register.  On other architectures, the
21673          cost is the same.  In 26 bit mode, or for exception handlers,
21674          we have to use LDM to load the PC so that the CPSR is also
21675          restored.  */
21676       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21677         if (live_regs_mask == (1U << reg))
21678           break;
21679
21680       if (reg <= LAST_ARM_REGNUM
21681           && (reg != LR_REGNUM
21682               || ! really_return
21683               || ! IS_INTERRUPT (func_type)))
21684         {
21685           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21686                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21687         }
21688       else
21689         {
21690           char *p;
21691           int first = 1;
21692
21693           /* Generate the load multiple instruction to restore the
21694              registers.  Note we can get here, even if
21695              frame_pointer_needed is true, but only if sp already
21696              points to the base of the saved core registers.  */
21697           if (live_regs_mask & (1 << SP_REGNUM))
21698             {
21699               unsigned HOST_WIDE_INT stack_adjust;
21700
21701               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21702               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21703
21704               if (stack_adjust && arm_arch5t && TARGET_ARM)
21705                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21706               else
21707                 {
21708                   /* If we can't use ldmib (SA110 bug),
21709                      then try to pop r3 instead.  */
21710                   if (stack_adjust)
21711                     live_regs_mask |= 1 << 3;
21712
21713                   sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21714                 }
21715             }
21716           /* For interrupt returns we have to use an LDM rather than
21717              a POP so that we can use the exception return variant.  */
21718           else if (IS_INTERRUPT (func_type))
21719             sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21720           else
21721             sprintf (instr, "pop%s\t{", conditional);
21722
21723           p = instr + strlen (instr);
21724
21725           for (reg = 0; reg <= SP_REGNUM; reg++)
21726             if (live_regs_mask & (1 << reg))
21727               {
21728                 int l = strlen (reg_names[reg]);
21729
21730                 if (first)
21731                   first = 0;
21732                 else
21733                   {
21734                     memcpy (p, ", ", 2);
21735                     p += 2;
21736                   }
21737
21738                 memcpy (p, "%|", 2);
21739                 memcpy (p + 2, reg_names[reg], l);
21740                 p += l + 2;
21741               }
21742
21743           if (live_regs_mask & (1 << LR_REGNUM))
21744             {
21745               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21746               /* If returning from an interrupt, restore the CPSR.  */
21747               if (IS_INTERRUPT (func_type))
21748                 strcat (p, "^");
21749             }
21750           else
21751             strcpy (p, "}");
21752         }
21753
21754       output_asm_insn (instr, & operand);
21755
21756       /* See if we need to generate an extra instruction to
21757          perform the actual function return.  */
21758       if (really_return
21759           && func_type != ARM_FT_INTERWORKED
21760           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21761         {
21762           /* The return has already been handled
21763              by loading the LR into the PC.  */
21764           return "";
21765         }
21766     }
21767
21768   if (really_return)
21769     {
21770       switch ((int) ARM_FUNC_TYPE (func_type))
21771         {
21772         case ARM_FT_ISR:
21773         case ARM_FT_FIQ:
21774           /* ??? This is wrong for unified assembly syntax.  */
21775           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21776           break;
21777
21778         case ARM_FT_INTERWORKED:
21779           gcc_assert (arm_arch5t || arm_arch4t);
21780           sprintf (instr, "bx%s\t%%|lr", conditional);
21781           break;
21782
21783         case ARM_FT_EXCEPTION:
21784           /* ??? This is wrong for unified assembly syntax.  */
21785           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21786           break;
21787
21788         default:
21789           if (IS_CMSE_ENTRY (func_type))
21790             {
21791               /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21792                  emitted by cmse_nonsecure_entry_clear_before_return () and the
21793                  VSTR/VLDR instructions in the prologue and epilogue.  */
21794               if (!TARGET_HAVE_FPCXT_CMSE)
21795                 {
21796                   /* Check if we have to clear the 'GE bits' which is only used if
21797                      parallel add and subtraction instructions are available.  */
21798                   if (TARGET_INT_SIMD)
21799                     snprintf (instr, sizeof (instr),
21800                               "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21801                   else
21802                     snprintf (instr, sizeof (instr),
21803                               "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21804
21805                   output_asm_insn (instr, & operand);
21806                   /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21807                      care of it.  */
21808                   if (TARGET_HARD_FLOAT)
21809                     {
21810                       /* Clear the cumulative exception-status bits (0-4,7) and
21811                          the condition code bits (28-31) of the FPSCR.  We need
21812                          to remember to clear the first scratch register used
21813                          (IP) and save and restore the second (r4).
21814
21815                          Important note: the length of the
21816                          thumb2_cmse_entry_return insn pattern must account for
21817                          the size of the below instructions.  */
21818                       output_asm_insn ("push\t{%|r4}", & operand);
21819                       output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21820                       output_asm_insn ("movw\t%|r4, #65376", & operand);
21821                       output_asm_insn ("movt\t%|r4, #4095", & operand);
21822                       output_asm_insn ("and\t%|ip, %|r4", & operand);
21823                       output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21824                       output_asm_insn ("pop\t{%|r4}", & operand);
21825                       output_asm_insn ("mov\t%|ip, %|lr", & operand);
21826                     }
21827                 }
21828               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21829             }
21830           /* Use bx if it's available.  */
21831           else if (arm_arch5t || arm_arch4t)
21832             sprintf (instr, "bx%s\t%%|lr", conditional);
21833           else
21834             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21835           break;
21836         }
21837
21838       output_asm_insn (instr, & operand);
21839     }
21840
21841   return "";
21842 }
21843
21844 /* Output in FILE asm statements needed to declare the NAME of the function
21845    defined by its DECL node.  */
21846
21847 void
21848 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21849 {
21850   size_t cmse_name_len;
21851   char *cmse_name = 0;
21852   char cmse_prefix[] = "__acle_se_";
21853
21854   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21855      extra function label for each function with the 'cmse_nonsecure_entry'
21856      attribute.  This extra function label should be prepended with
21857      '__acle_se_', telling the linker that it needs to create secure gateway
21858      veneers for this function.  */
21859   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21860                                     DECL_ATTRIBUTES (decl)))
21861     {
21862       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21863       cmse_name = XALLOCAVEC (char, cmse_name_len);
21864       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21865       targetm.asm_out.globalize_label (file, cmse_name);
21866
21867       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21868       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21869     }
21870
21871   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21872   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21873   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21874   ASM_OUTPUT_FUNCTION_LABEL (file, name, decl);
21875
21876   if (cmse_name)
21877     ASM_OUTPUT_LABEL (file, cmse_name);
21878
21879   ARM_OUTPUT_FN_UNWIND (file, TRUE);
21880 }
21881
21882 /* Write the function name into the code section, directly preceding
21883    the function prologue.
21884
21885    Code will be output similar to this:
21886      t0
21887          .ascii "arm_poke_function_name", 0
21888          .align
21889      t1
21890          .word 0xff000000 + (t1 - t0)
21891      arm_poke_function_name
21892          mov     ip, sp
21893          stmfd   sp!, {fp, ip, lr, pc}
21894          sub     fp, ip, #4
21895
21896    When performing a stack backtrace, code can inspect the value
21897    of 'pc' stored at 'fp' + 0.  If the trace function then looks
21898    at location pc - 12 and the top 8 bits are set, then we know
21899    that there is a function name embedded immediately preceding this
21900    location and has length ((pc[-3]) & 0xff000000).
21901
21902    We assume that pc is declared as a pointer to an unsigned long.
21903
21904    It is of no benefit to output the function name if we are assembling
21905    a leaf function.  These function types will not contain a stack
21906    backtrace structure, therefore it is not possible to determine the
21907    function name.  */
21908 void
21909 arm_poke_function_name (FILE *stream, const char *name)
21910 {
21911   unsigned long alignlength;
21912   unsigned long length;
21913   rtx           x;
21914
21915   length      = strlen (name) + 1;
21916   alignlength = ROUND_UP_WORD (length);
21917
21918   ASM_OUTPUT_ASCII (stream, name, length);
21919   ASM_OUTPUT_ALIGN (stream, 2);
21920   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21921   assemble_aligned_integer (UNITS_PER_WORD, x);
21922 }
21923
21924 /* Place some comments into the assembler stream
21925    describing the current function.  */
21926 static void
21927 arm_output_function_prologue (FILE *f)
21928 {
21929   unsigned long func_type;
21930
21931   /* Sanity check.  */
21932   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21933
21934   func_type = arm_current_func_type ();
21935
21936   switch ((int) ARM_FUNC_TYPE (func_type))
21937     {
21938     default:
21939     case ARM_FT_NORMAL:
21940       break;
21941     case ARM_FT_INTERWORKED:
21942       asm_fprintf (f, "\t%@ Function supports interworking.\n");
21943       break;
21944     case ARM_FT_ISR:
21945       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21946       break;
21947     case ARM_FT_FIQ:
21948       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21949       break;
21950     case ARM_FT_EXCEPTION:
21951       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21952       break;
21953     }
21954
21955   if (IS_NAKED (func_type))
21956     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21957
21958   if (IS_VOLATILE (func_type))
21959     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21960
21961   if (IS_NESTED (func_type))
21962     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21963   if (IS_STACKALIGN (func_type))
21964     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21965   if (IS_CMSE_ENTRY (func_type))
21966     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21967
21968   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21969                (HOST_WIDE_INT) crtl->args.size,
21970                crtl->args.pretend_args_size,
21971                (HOST_WIDE_INT) get_frame_size ());
21972
21973   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21974                frame_pointer_needed,
21975                cfun->machine->uses_anonymous_args);
21976
21977   if (cfun->machine->lr_save_eliminated)
21978     asm_fprintf (f, "\t%@ link register save eliminated.\n");
21979
21980   if (crtl->calls_eh_return)
21981     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21982
21983 }
21984
21985 static void
21986 arm_output_function_epilogue (FILE *)
21987 {
21988   arm_stack_offsets *offsets;
21989
21990   if (TARGET_THUMB1)
21991     {
21992       int regno;
21993
21994       /* Emit any call-via-reg trampolines that are needed for v4t support
21995          of call_reg and call_value_reg type insns.  */
21996       for (regno = 0; regno < LR_REGNUM; regno++)
21997         {
21998           rtx label = cfun->machine->call_via[regno];
21999
22000           if (label != NULL)
22001             {
22002               switch_to_section (function_section (current_function_decl));
22003               targetm.asm_out.internal_label (asm_out_file, "L",
22004                                               CODE_LABEL_NUMBER (label));
22005               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
22006             }
22007         }
22008
22009       /* ??? Probably not safe to set this here, since it assumes that a
22010          function will be emitted as assembly immediately after we generate
22011          RTL for it.  This does not happen for inline functions.  */
22012       cfun->machine->return_used_this_function = 0;
22013     }
22014   else /* TARGET_32BIT */
22015     {
22016       /* We need to take into account any stack-frame rounding.  */
22017       offsets = arm_get_frame_offsets ();
22018
22019       gcc_assert (!use_return_insn (FALSE, NULL)
22020                   || (cfun->machine->return_used_this_function != 0)
22021                   || offsets->saved_regs == offsets->outgoing_args
22022                   || frame_pointer_needed);
22023     }
22024 }
22025
22026 /* Generate and emit a sequence of insns equivalent to PUSH, but using
22027    STR and STRD.  If an even number of registers are being pushed, one
22028    or more STRD patterns are created for each register pair.  If an
22029    odd number of registers are pushed, emit an initial STR followed by
22030    as many STRD instructions as are needed.  This works best when the
22031    stack is initially 64-bit aligned (the normal case), since it
22032    ensures that each STRD is also 64-bit aligned.  */
22033 static void
22034 thumb2_emit_strd_push (unsigned long saved_regs_mask)
22035 {
22036   int num_regs = 0;
22037   int i;
22038   int regno;
22039   rtx par = NULL_RTX;
22040   rtx dwarf = NULL_RTX;
22041   rtx tmp;
22042   bool first = true;
22043
22044   num_regs = bit_count (saved_regs_mask);
22045
22046   /* Must be at least one register to save, and can't save SP or PC.  */
22047   gcc_assert (num_regs > 0 && num_regs <= 14);
22048   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22049   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22050
22051   /* Create sequence for DWARF info.  All the frame-related data for
22052      debugging is held in this wrapper.  */
22053   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22054
22055   /* Describe the stack adjustment.  */
22056   tmp = gen_rtx_SET (stack_pointer_rtx,
22057                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22058   RTX_FRAME_RELATED_P (tmp) = 1;
22059   XVECEXP (dwarf, 0, 0) = tmp;
22060
22061   /* Find the first register.  */
22062   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
22063     ;
22064
22065   i = 0;
22066
22067   /* If there's an odd number of registers to push.  Start off by
22068      pushing a single register.  This ensures that subsequent strd
22069      operations are dword aligned (assuming that SP was originally
22070      64-bit aligned).  */
22071   if ((num_regs & 1) != 0)
22072     {
22073       rtx reg, mem, insn;
22074
22075       reg = gen_rtx_REG (SImode, regno);
22076       if (num_regs == 1)
22077         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
22078                                                      stack_pointer_rtx));
22079       else
22080         mem = gen_frame_mem (Pmode,
22081                              gen_rtx_PRE_MODIFY
22082                              (Pmode, stack_pointer_rtx,
22083                               plus_constant (Pmode, stack_pointer_rtx,
22084                                              -4 * num_regs)));
22085
22086       tmp = gen_rtx_SET (mem, reg);
22087       RTX_FRAME_RELATED_P (tmp) = 1;
22088       insn = emit_insn (tmp);
22089       RTX_FRAME_RELATED_P (insn) = 1;
22090       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22091       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
22092       RTX_FRAME_RELATED_P (tmp) = 1;
22093       i++;
22094       regno++;
22095       XVECEXP (dwarf, 0, i) = tmp;
22096       first = false;
22097     }
22098
22099   while (i < num_regs)
22100     if (saved_regs_mask & (1 << regno))
22101       {
22102         rtx reg1, reg2, mem1, mem2;
22103         rtx tmp0, tmp1, tmp2;
22104         int regno2;
22105
22106         /* Find the register to pair with this one.  */
22107         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
22108              regno2++)
22109           ;
22110
22111         reg1 = gen_rtx_REG (SImode, regno);
22112         reg2 = gen_rtx_REG (SImode, regno2);
22113
22114         if (first)
22115           {
22116             rtx insn;
22117
22118             first = false;
22119             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22120                                                         stack_pointer_rtx,
22121                                                         -4 * num_regs));
22122             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22123                                                         stack_pointer_rtx,
22124                                                         -4 * (num_regs - 1)));
22125             tmp0 = gen_rtx_SET (stack_pointer_rtx,
22126                                 plus_constant (Pmode, stack_pointer_rtx,
22127                                                -4 * (num_regs)));
22128             tmp1 = gen_rtx_SET (mem1, reg1);
22129             tmp2 = gen_rtx_SET (mem2, reg2);
22130             RTX_FRAME_RELATED_P (tmp0) = 1;
22131             RTX_FRAME_RELATED_P (tmp1) = 1;
22132             RTX_FRAME_RELATED_P (tmp2) = 1;
22133             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
22134             XVECEXP (par, 0, 0) = tmp0;
22135             XVECEXP (par, 0, 1) = tmp1;
22136             XVECEXP (par, 0, 2) = tmp2;
22137             insn = emit_insn (par);
22138             RTX_FRAME_RELATED_P (insn) = 1;
22139             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22140           }
22141         else
22142           {
22143             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22144                                                         stack_pointer_rtx,
22145                                                         4 * i));
22146             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22147                                                         stack_pointer_rtx,
22148                                                         4 * (i + 1)));
22149             tmp1 = gen_rtx_SET (mem1, reg1);
22150             tmp2 = gen_rtx_SET (mem2, reg2);
22151             RTX_FRAME_RELATED_P (tmp1) = 1;
22152             RTX_FRAME_RELATED_P (tmp2) = 1;
22153             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22154             XVECEXP (par, 0, 0) = tmp1;
22155             XVECEXP (par, 0, 1) = tmp2;
22156             emit_insn (par);
22157           }
22158
22159         /* Create unwind information.  This is an approximation.  */
22160         tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
22161                                            plus_constant (Pmode,
22162                                                           stack_pointer_rtx,
22163                                                           4 * i)),
22164                             reg1);
22165         tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
22166                                            plus_constant (Pmode,
22167                                                           stack_pointer_rtx,
22168                                                           4 * (i + 1))),
22169                             reg2);
22170
22171         RTX_FRAME_RELATED_P (tmp1) = 1;
22172         RTX_FRAME_RELATED_P (tmp2) = 1;
22173         XVECEXP (dwarf, 0, i + 1) = tmp1;
22174         XVECEXP (dwarf, 0, i + 2) = tmp2;
22175         i += 2;
22176         regno = regno2 + 1;
22177       }
22178     else
22179       regno++;
22180
22181   return;
22182 }
22183
22184 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
22185    whenever possible, otherwise it emits single-word stores.  The first store
22186    also allocates stack space for all saved registers, using writeback with
22187    post-addressing mode.  All other stores use offset addressing.  If no STRD
22188    can be emitted, this function emits a sequence of single-word stores,
22189    and not an STM as before, because single-word stores provide more freedom
22190    scheduling and can be turned into an STM by peephole optimizations.  */
22191 static void
22192 arm_emit_strd_push (unsigned long saved_regs_mask)
22193 {
22194   int num_regs = 0;
22195   int i, j, dwarf_index  = 0;
22196   int offset = 0;
22197   rtx dwarf = NULL_RTX;
22198   rtx insn = NULL_RTX;
22199   rtx tmp, mem;
22200
22201   /* TODO: A more efficient code can be emitted by changing the
22202      layout, e.g., first push all pairs that can use STRD to keep the
22203      stack aligned, and then push all other registers.  */
22204   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22205     if (saved_regs_mask & (1 << i))
22206       num_regs++;
22207
22208   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22209   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22210   gcc_assert (num_regs > 0);
22211
22212   /* Create sequence for DWARF info.  */
22213   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22214
22215   /* For dwarf info, we generate explicit stack update.  */
22216   tmp = gen_rtx_SET (stack_pointer_rtx,
22217                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22218   RTX_FRAME_RELATED_P (tmp) = 1;
22219   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22220
22221   /* Save registers.  */
22222   offset = - 4 * num_regs;
22223   j = 0;
22224   while (j <= LAST_ARM_REGNUM)
22225     if (saved_regs_mask & (1 << j))
22226       {
22227         if ((j % 2 == 0)
22228             && (saved_regs_mask & (1 << (j + 1))))
22229           {
22230             /* Current register and previous register form register pair for
22231                which STRD can be generated.  */
22232             if (offset < 0)
22233               {
22234                 /* Allocate stack space for all saved registers.  */
22235                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22236                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22237                 mem = gen_frame_mem (DImode, tmp);
22238                 offset = 0;
22239               }
22240             else if (offset > 0)
22241               mem = gen_frame_mem (DImode,
22242                                    plus_constant (Pmode,
22243                                                   stack_pointer_rtx,
22244                                                   offset));
22245             else
22246               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22247
22248             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22249             RTX_FRAME_RELATED_P (tmp) = 1;
22250             tmp = emit_insn (tmp);
22251
22252             /* Record the first store insn.  */
22253             if (dwarf_index == 1)
22254               insn = tmp;
22255
22256             /* Generate dwarf info.  */
22257             mem = gen_frame_mem (SImode,
22258                                  plus_constant (Pmode,
22259                                                 stack_pointer_rtx,
22260                                                 offset));
22261             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22262             RTX_FRAME_RELATED_P (tmp) = 1;
22263             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22264
22265             mem = gen_frame_mem (SImode,
22266                                  plus_constant (Pmode,
22267                                                 stack_pointer_rtx,
22268                                                 offset + 4));
22269             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22270             RTX_FRAME_RELATED_P (tmp) = 1;
22271             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22272
22273             offset += 8;
22274             j += 2;
22275           }
22276         else
22277           {
22278             /* Emit a single word store.  */
22279             if (offset < 0)
22280               {
22281                 /* Allocate stack space for all saved registers.  */
22282                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22283                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22284                 mem = gen_frame_mem (SImode, tmp);
22285                 offset = 0;
22286               }
22287             else if (offset > 0)
22288               mem = gen_frame_mem (SImode,
22289                                    plus_constant (Pmode,
22290                                                   stack_pointer_rtx,
22291                                                   offset));
22292             else
22293               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22294
22295             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22296             RTX_FRAME_RELATED_P (tmp) = 1;
22297             tmp = emit_insn (tmp);
22298
22299             /* Record the first store insn.  */
22300             if (dwarf_index == 1)
22301               insn = tmp;
22302
22303             /* Generate dwarf info.  */
22304             mem = gen_frame_mem (SImode,
22305                                  plus_constant(Pmode,
22306                                                stack_pointer_rtx,
22307                                                offset));
22308             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22309             RTX_FRAME_RELATED_P (tmp) = 1;
22310             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22311
22312             offset += 4;
22313             j += 1;
22314           }
22315       }
22316     else
22317       j++;
22318
22319   /* Attach dwarf info to the first insn we generate.  */
22320   gcc_assert (insn != NULL_RTX);
22321   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22322   RTX_FRAME_RELATED_P (insn) = 1;
22323 }
22324
22325 /* Generate and emit an insn that we will recognize as a push_multi.
22326    Unfortunately, since this insn does not reflect very well the actual
22327    semantics of the operation, we need to annotate the insn for the benefit
22328    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
22329    MASK for registers that should be annotated for DWARF2 frame unwind
22330    information.  */
22331 static rtx
22332 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22333 {
22334   int num_regs = 0;
22335   int num_dwarf_regs = 0;
22336   int i, j;
22337   rtx par;
22338   rtx dwarf;
22339   int dwarf_par_index;
22340   rtx tmp, reg;
22341
22342   /* We don't record the PC in the dwarf frame information.  */
22343   dwarf_regs_mask &= ~(1 << PC_REGNUM);
22344
22345   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22346     {
22347       if (mask & (1 << i))
22348         num_regs++;
22349       if (dwarf_regs_mask & (1 << i))
22350         num_dwarf_regs++;
22351     }
22352
22353   gcc_assert (num_regs && num_regs <= 16);
22354   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22355
22356   /* For the body of the insn we are going to generate an UNSPEC in
22357      parallel with several USEs.  This allows the insn to be recognized
22358      by the push_multi pattern in the arm.md file.
22359
22360      The body of the insn looks something like this:
22361
22362        (parallel [
22363            (set (mem:BLK (pre_modify:SI (reg:SI sp)
22364                                         (const_int:SI <num>)))
22365                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22366            (use (reg:SI XX))
22367            (use (reg:SI YY))
22368            ...
22369         ])
22370
22371      For the frame note however, we try to be more explicit and actually
22372      show each register being stored into the stack frame, plus a (single)
22373      decrement of the stack pointer.  We do it this way in order to be
22374      friendly to the stack unwinding code, which only wants to see a single
22375      stack decrement per instruction.  The RTL we generate for the note looks
22376      something like this:
22377
22378       (sequence [
22379            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22380            (set (mem:SI (reg:SI sp)) (reg:SI r4))
22381            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22382            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22383            ...
22384         ])
22385
22386      FIXME:: In an ideal world the PRE_MODIFY would not exist and
22387      instead we'd have a parallel expression detailing all
22388      the stores to the various memory addresses so that debug
22389      information is more up-to-date. Remember however while writing
22390      this to take care of the constraints with the push instruction.
22391
22392      Note also that this has to be taken care of for the VFP registers.
22393
22394      For more see PR43399.  */
22395
22396   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22397   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22398   dwarf_par_index = 1;
22399
22400   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22401     {
22402       if (mask & (1 << i))
22403         {
22404           /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22405              following example reg-reg copy of SP to IP register is handled
22406              through .cfi_def_cfa_register directive and the .cfi_offset
22407              directive for IP register is skipped by dwarf code emitter.
22408              Example:
22409                 mov     ip, sp
22410                 .cfi_def_cfa_register 12
22411                 push    {fp, ip, lr, pc}
22412                 .cfi_offset 11, -16
22413                 .cfi_offset 13, -12
22414                 .cfi_offset 14, -8
22415
22416              Where as Arm-specific .save directive handling is different to that
22417              of dwarf code emitter and it doesn't consider reg-reg copies while
22418              updating the register list.  When PACBTI is enabled we manually
22419              updated the .save directive register list to use "ra_auth_code"
22420              (pseduo register 143) instead of IP register as shown in following
22421              pseduo code.
22422              Example:
22423                 pacbti  ip, lr, sp
22424                 .cfi_register 143, 12
22425                 push    {r3, r7, ip, lr}
22426                 .save {r3, r7, ra_auth_code, lr}
22427           */
22428           rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22429           if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22430             dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22431
22432           XVECEXP (par, 0, 0)
22433             = gen_rtx_SET (gen_frame_mem
22434                            (BLKmode,
22435                             gen_rtx_PRE_MODIFY (Pmode,
22436                                                 stack_pointer_rtx,
22437                                                 plus_constant
22438                                                 (Pmode, stack_pointer_rtx,
22439                                                  -4 * num_regs))
22440                             ),
22441                            gen_rtx_UNSPEC (BLKmode,
22442                                            gen_rtvec (1, reg),
22443                                            UNSPEC_PUSH_MULT));
22444
22445           if (dwarf_regs_mask & (1 << i))
22446             {
22447               tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22448                                  dwarf_reg);
22449               RTX_FRAME_RELATED_P (tmp) = 1;
22450               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22451             }
22452
22453           break;
22454         }
22455     }
22456
22457   for (j = 1, i++; j < num_regs; i++)
22458     {
22459       if (mask & (1 << i))
22460         {
22461           rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22462           if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22463             dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22464
22465           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22466
22467           if (dwarf_regs_mask & (1 << i))
22468             {
22469               tmp
22470                 = gen_rtx_SET (gen_frame_mem
22471                                (SImode,
22472                                 plus_constant (Pmode, stack_pointer_rtx,
22473                                                4 * j)),
22474                                dwarf_reg);
22475               RTX_FRAME_RELATED_P (tmp) = 1;
22476               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22477             }
22478
22479           j++;
22480         }
22481     }
22482
22483   par = emit_insn (par);
22484
22485   tmp = gen_rtx_SET (stack_pointer_rtx,
22486                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22487   RTX_FRAME_RELATED_P (tmp) = 1;
22488   XVECEXP (dwarf, 0, 0) = tmp;
22489
22490   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22491
22492   return par;
22493 }
22494
22495 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22496    SIZE is the offset to be adjusted.
22497    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
22498 static void
22499 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22500 {
22501   rtx dwarf;
22502
22503   RTX_FRAME_RELATED_P (insn) = 1;
22504   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22505   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22506 }
22507
22508 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22509    SAVED_REGS_MASK shows which registers need to be restored.
22510
22511    Unfortunately, since this insn does not reflect very well the actual
22512    semantics of the operation, we need to annotate the insn for the benefit
22513    of DWARF2 frame unwind information.  */
22514 static void
22515 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22516 {
22517   int num_regs = 0;
22518   int i, j;
22519   rtx par;
22520   rtx dwarf = NULL_RTX;
22521   rtx tmp, reg;
22522   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22523   int offset_adj;
22524   int emit_update;
22525
22526   offset_adj = return_in_pc ? 1 : 0;
22527   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22528     if (saved_regs_mask & (1 << i))
22529       num_regs++;
22530
22531   gcc_assert (num_regs && num_regs <= 16);
22532
22533   /* If SP is in reglist, then we don't emit SP update insn.  */
22534   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22535
22536   /* The parallel needs to hold num_regs SETs
22537      and one SET for the stack update.  */
22538   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22539
22540   if (return_in_pc)
22541     XVECEXP (par, 0, 0) = ret_rtx;
22542
22543   if (emit_update)
22544     {
22545       /* Increment the stack pointer, based on there being
22546          num_regs 4-byte registers to restore.  */
22547       tmp = gen_rtx_SET (stack_pointer_rtx,
22548                          plus_constant (Pmode,
22549                                         stack_pointer_rtx,
22550                                         4 * num_regs));
22551       RTX_FRAME_RELATED_P (tmp) = 1;
22552       XVECEXP (par, 0, offset_adj) = tmp;
22553     }
22554
22555   /* Now restore every reg, which may include PC.  */
22556   for (j = 0, i = 0; j < num_regs; i++)
22557     if (saved_regs_mask & (1 << i))
22558       {
22559         rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22560         if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22561           dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22562         if ((num_regs == 1) && emit_update && !return_in_pc)
22563           {
22564             /* Emit single load with writeback.  */
22565             tmp = gen_frame_mem (SImode,
22566                                  gen_rtx_POST_INC (Pmode,
22567                                                    stack_pointer_rtx));
22568             tmp = emit_insn (gen_rtx_SET (reg, tmp));
22569             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
22570                                               dwarf);
22571             return;
22572           }
22573
22574         tmp = gen_rtx_SET (reg,
22575                            gen_frame_mem
22576                            (SImode,
22577                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22578         RTX_FRAME_RELATED_P (tmp) = 1;
22579         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22580
22581         /* We need to maintain a sequence for DWARF info too.  As dwarf info
22582            should not have PC, skip PC.  */
22583         if (i != PC_REGNUM)
22584           dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
22585
22586         j++;
22587       }
22588
22589   if (return_in_pc)
22590     par = emit_jump_insn (par);
22591   else
22592     par = emit_insn (par);
22593
22594   REG_NOTES (par) = dwarf;
22595   if (!return_in_pc)
22596     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22597                                  stack_pointer_rtx, stack_pointer_rtx);
22598 }
22599
22600 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22601    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22602
22603    Unfortunately, since this insn does not reflect very well the actual
22604    semantics of the operation, we need to annotate the insn for the benefit
22605    of DWARF2 frame unwind information.  */
22606 static void
22607 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22608 {
22609   int i, j;
22610   rtx par;
22611   rtx dwarf = NULL_RTX;
22612   rtx tmp, reg;
22613
22614   gcc_assert (num_regs && num_regs <= 32);
22615
22616     /* Workaround ARM10 VFPr1 bug.  */
22617   if (num_regs == 2 && !arm_arch6)
22618     {
22619       if (first_reg == 15)
22620         first_reg--;
22621
22622       num_regs++;
22623     }
22624
22625   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22626      there could be up to 32 D-registers to restore.
22627      If there are more than 16 D-registers, make two recursive calls,
22628      each of which emits one pop_multi instruction.  */
22629   if (num_regs > 16)
22630     {
22631       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22632       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22633       return;
22634     }
22635
22636   /* The parallel needs to hold num_regs SETs
22637      and one SET for the stack update.  */
22638   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22639
22640   /* Increment the stack pointer, based on there being
22641      num_regs 8-byte registers to restore.  */
22642   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22643   RTX_FRAME_RELATED_P (tmp) = 1;
22644   XVECEXP (par, 0, 0) = tmp;
22645
22646   /* Now show every reg that will be restored, using a SET for each.  */
22647   for (j = 0, i=first_reg; j < num_regs; i += 2)
22648     {
22649       reg = gen_rtx_REG (DFmode, i);
22650
22651       tmp = gen_rtx_SET (reg,
22652                          gen_frame_mem
22653                          (DFmode,
22654                           plus_constant (Pmode, base_reg, 8 * j)));
22655       RTX_FRAME_RELATED_P (tmp) = 1;
22656       XVECEXP (par, 0, j + 1) = tmp;
22657
22658       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22659
22660       j++;
22661     }
22662
22663   par = emit_insn (par);
22664   REG_NOTES (par) = dwarf;
22665
22666   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
22667   if (REGNO (base_reg) == IP_REGNUM)
22668     {
22669       RTX_FRAME_RELATED_P (par) = 1;
22670       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22671     }
22672   else
22673     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22674                                  base_reg, base_reg);
22675 }
22676
22677 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
22678    number of registers are being popped, multiple LDRD patterns are created for
22679    all register pairs.  If odd number of registers are popped, last register is
22680    loaded by using LDR pattern.  */
22681 static void
22682 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22683 {
22684   int num_regs = 0;
22685   int i, j;
22686   rtx par = NULL_RTX;
22687   rtx dwarf = NULL_RTX;
22688   rtx tmp, reg, tmp1;
22689   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22690
22691   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22692     if (saved_regs_mask & (1 << i))
22693       num_regs++;
22694
22695   gcc_assert (num_regs && num_regs <= 16);
22696
22697   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
22698      to be popped.  So, if num_regs is even, now it will become odd,
22699      and we can generate pop with PC.  If num_regs is odd, it will be
22700      even now, and ldr with return can be generated for PC.  */
22701   if (return_in_pc)
22702     num_regs--;
22703
22704   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22705
22706   /* Var j iterates over all the registers to gather all the registers in
22707      saved_regs_mask.  Var i gives index of saved registers in stack frame.
22708      A PARALLEL RTX of register-pair is created here, so that pattern for
22709      LDRD can be matched.  As PC is always last register to be popped, and
22710      we have already decremented num_regs if PC, we don't have to worry
22711      about PC in this loop.  */
22712   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22713     if (saved_regs_mask & (1 << j))
22714       {
22715         /* Create RTX for memory load.  */
22716         reg = gen_rtx_REG (SImode, j);
22717         tmp = gen_rtx_SET (reg,
22718                            gen_frame_mem (SImode,
22719                                plus_constant (Pmode,
22720                                               stack_pointer_rtx, 4 * i)));
22721         RTX_FRAME_RELATED_P (tmp) = 1;
22722
22723         if (i % 2 == 0)
22724           {
22725             /* When saved-register index (i) is even, the RTX to be emitted is
22726                yet to be created.  Hence create it first.  The LDRD pattern we
22727                are generating is :
22728                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22729                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22730                where target registers need not be consecutive.  */
22731             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22732             dwarf = NULL_RTX;
22733           }
22734
22735         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
22736            added as 0th element and if i is odd, reg_i is added as 1st element
22737            of LDRD pattern shown above.  */
22738         XVECEXP (par, 0, (i % 2)) = tmp;
22739         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22740
22741         if ((i % 2) == 1)
22742           {
22743             /* When saved-register index (i) is odd, RTXs for both the registers
22744                to be loaded are generated in above given LDRD pattern, and the
22745                pattern can be emitted now.  */
22746             par = emit_insn (par);
22747             REG_NOTES (par) = dwarf;
22748             RTX_FRAME_RELATED_P (par) = 1;
22749           }
22750
22751         i++;
22752       }
22753
22754   /* If the number of registers pushed is odd AND return_in_pc is false OR
22755      number of registers are even AND return_in_pc is true, last register is
22756      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
22757      then LDR with post increment.  */
22758
22759   /* Increment the stack pointer, based on there being
22760      num_regs 4-byte registers to restore.  */
22761   tmp = gen_rtx_SET (stack_pointer_rtx,
22762                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22763   RTX_FRAME_RELATED_P (tmp) = 1;
22764   tmp = emit_insn (tmp);
22765   if (!return_in_pc)
22766     {
22767       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22768                                    stack_pointer_rtx, stack_pointer_rtx);
22769     }
22770
22771   dwarf = NULL_RTX;
22772
22773   if (((num_regs % 2) == 1 && !return_in_pc)
22774       || ((num_regs % 2) == 0 && return_in_pc))
22775     {
22776       /* Scan for the single register to be popped.  Skip until the saved
22777          register is found.  */
22778       for (; (saved_regs_mask & (1 << j)) == 0; j++);
22779
22780       /* Gen LDR with post increment here.  */
22781       tmp1 = gen_rtx_MEM (SImode,
22782                           gen_rtx_POST_INC (SImode,
22783                                             stack_pointer_rtx));
22784       set_mem_alias_set (tmp1, get_frame_alias_set ());
22785
22786       reg = gen_rtx_REG (SImode, j);
22787       tmp = gen_rtx_SET (reg, tmp1);
22788       RTX_FRAME_RELATED_P (tmp) = 1;
22789       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22790
22791       if (return_in_pc)
22792         {
22793           /* If return_in_pc, j must be PC_REGNUM.  */
22794           gcc_assert (j == PC_REGNUM);
22795           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22796           XVECEXP (par, 0, 0) = ret_rtx;
22797           XVECEXP (par, 0, 1) = tmp;
22798           par = emit_jump_insn (par);
22799         }
22800       else
22801         {
22802           par = emit_insn (tmp);
22803           REG_NOTES (par) = dwarf;
22804           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22805                                        stack_pointer_rtx, stack_pointer_rtx);
22806         }
22807
22808     }
22809   else if ((num_regs % 2) == 1 && return_in_pc)
22810     {
22811       /* There are 2 registers to be popped.  So, generate the pattern
22812          pop_multiple_with_stack_update_and_return to pop in PC.  */
22813       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22814     }
22815
22816   return;
22817 }
22818
22819 /* LDRD in ARM mode needs consecutive registers as operands.  This function
22820    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22821    offset addressing and then generates one separate stack udpate. This provides
22822    more scheduling freedom, compared to writeback on every load.  However,
22823    if the function returns using load into PC directly
22824    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22825    before the last load.  TODO: Add a peephole optimization to recognize
22826    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
22827    peephole optimization to merge the load at stack-offset zero
22828    with the stack update instruction using load with writeback
22829    in post-index addressing mode.  */
22830 static void
22831 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22832 {
22833   int j = 0;
22834   int offset = 0;
22835   rtx par = NULL_RTX;
22836   rtx dwarf = NULL_RTX;
22837   rtx tmp, mem;
22838
22839   /* Restore saved registers.  */
22840   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22841   j = 0;
22842   while (j <= LAST_ARM_REGNUM)
22843     if (saved_regs_mask & (1 << j))
22844       {
22845         if ((j % 2) == 0
22846             && (saved_regs_mask & (1 << (j + 1)))
22847             && (j + 1) != PC_REGNUM)
22848           {
22849             /* Current register and next register form register pair for which
22850                LDRD can be generated. PC is always the last register popped, and
22851                we handle it separately.  */
22852             if (offset > 0)
22853               mem = gen_frame_mem (DImode,
22854                                    plus_constant (Pmode,
22855                                                   stack_pointer_rtx,
22856                                                   offset));
22857             else
22858               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22859
22860             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22861             tmp = emit_insn (tmp);
22862             RTX_FRAME_RELATED_P (tmp) = 1;
22863
22864             /* Generate dwarf info.  */
22865
22866             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22867                                     gen_rtx_REG (SImode, j),
22868                                     NULL_RTX);
22869             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22870                                     gen_rtx_REG (SImode, j + 1),
22871                                     dwarf);
22872
22873             REG_NOTES (tmp) = dwarf;
22874
22875             offset += 8;
22876             j += 2;
22877           }
22878         else if (j != PC_REGNUM)
22879           {
22880             /* Emit a single word load.  */
22881             if (offset > 0)
22882               mem = gen_frame_mem (SImode,
22883                                    plus_constant (Pmode,
22884                                                   stack_pointer_rtx,
22885                                                   offset));
22886             else
22887               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22888
22889             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22890             tmp = emit_insn (tmp);
22891             RTX_FRAME_RELATED_P (tmp) = 1;
22892
22893             /* Generate dwarf info.  */
22894             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22895                                               gen_rtx_REG (SImode, j),
22896                                               NULL_RTX);
22897
22898             offset += 4;
22899             j += 1;
22900           }
22901         else /* j == PC_REGNUM */
22902           j++;
22903       }
22904     else
22905       j++;
22906
22907   /* Update the stack.  */
22908   if (offset > 0)
22909     {
22910       tmp = gen_rtx_SET (stack_pointer_rtx,
22911                          plus_constant (Pmode,
22912                                         stack_pointer_rtx,
22913                                         offset));
22914       tmp = emit_insn (tmp);
22915       arm_add_cfa_adjust_cfa_note (tmp, offset,
22916                                    stack_pointer_rtx, stack_pointer_rtx);
22917       offset = 0;
22918     }
22919
22920   if (saved_regs_mask & (1 << PC_REGNUM))
22921     {
22922       /* Only PC is to be popped.  */
22923       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22924       XVECEXP (par, 0, 0) = ret_rtx;
22925       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22926                          gen_frame_mem (SImode,
22927                                         gen_rtx_POST_INC (SImode,
22928                                                           stack_pointer_rtx)));
22929       RTX_FRAME_RELATED_P (tmp) = 1;
22930       XVECEXP (par, 0, 1) = tmp;
22931       par = emit_jump_insn (par);
22932
22933       /* Generate dwarf info.  */
22934       dwarf = alloc_reg_note (REG_CFA_RESTORE,
22935                               gen_rtx_REG (SImode, PC_REGNUM),
22936                               NULL_RTX);
22937       REG_NOTES (par) = dwarf;
22938       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22939                                    stack_pointer_rtx, stack_pointer_rtx);
22940     }
22941 }
22942
22943 /* Calculate the size of the return value that is passed in registers.  */
22944 static unsigned
22945 arm_size_return_regs (void)
22946 {
22947   machine_mode mode;
22948
22949   if (crtl->return_rtx != 0)
22950     mode = GET_MODE (crtl->return_rtx);
22951   else
22952     mode = DECL_MODE (DECL_RESULT (current_function_decl));
22953
22954   return GET_MODE_SIZE (mode);
22955 }
22956
22957 /* Return true if the current function needs to save/restore LR.  */
22958 static bool
22959 thumb_force_lr_save (void)
22960 {
22961   return !cfun->machine->lr_save_eliminated
22962          && (!crtl->is_leaf
22963              || thumb_far_jump_used_p ()
22964              || df_regs_ever_live_p (LR_REGNUM));
22965 }
22966
22967 /* We do not know if r3 will be available because
22968    we do have an indirect tailcall happening in this
22969    particular case.  */
22970 static bool
22971 is_indirect_tailcall_p (rtx call)
22972 {
22973   rtx pat = PATTERN (call);
22974
22975   /* Indirect tail call.  */
22976   pat = XVECEXP (pat, 0, 0);
22977   if (GET_CODE (pat) == SET)
22978     pat = SET_SRC (pat);
22979
22980   pat = XEXP (XEXP (pat, 0), 0);
22981   return REG_P (pat);
22982 }
22983
22984 /* Return true if r3 is used by any of the tail call insns in the
22985    current function.  */
22986 static bool
22987 any_sibcall_could_use_r3 (void)
22988 {
22989   edge_iterator ei;
22990   edge e;
22991
22992   if (!crtl->tail_call_emit)
22993     return false;
22994   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22995     if (e->flags & EDGE_SIBCALL)
22996       {
22997         rtx_insn *call = BB_END (e->src);
22998         if (!CALL_P (call))
22999           call = prev_nonnote_nondebug_insn (call);
23000         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
23001         if (find_regno_fusage (call, USE, 3)
23002             || is_indirect_tailcall_p (call))
23003           return true;
23004       }
23005   return false;
23006 }
23007
23008
23009 /* Compute the distance from register FROM to register TO.
23010    These can be the arg pointer (26), the soft frame pointer (25),
23011    the stack pointer (13) or the hard frame pointer (11).
23012    In thumb mode r7 is used as the soft frame pointer, if needed.
23013    Typical stack layout looks like this:
23014
23015        old stack pointer -> |    |
23016                              ----
23017                             |    | \
23018                             |    |   saved arguments for
23019                             |    |   vararg functions
23020                             |    | /
23021                               --
23022    hard FP & arg pointer -> |    | \
23023                             |    |   stack
23024                             |    |   frame
23025                             |    | /
23026                               --
23027                             |    | \
23028                             |    |   call saved
23029                             |    |   registers
23030       soft frame pointer -> |    | /
23031                               --
23032                             |    | \
23033                             |    |   local
23034                             |    |   variables
23035      locals base pointer -> |    | /
23036                               --
23037                             |    | \
23038                             |    |   outgoing
23039                             |    |   arguments
23040    current stack pointer -> |    | /
23041                               --
23042
23043   For a given function some or all of these stack components
23044   may not be needed, giving rise to the possibility of
23045   eliminating some of the registers.
23046
23047   The values returned by this function must reflect the behavior
23048   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
23049
23050   The sign of the number returned reflects the direction of stack
23051   growth, so the values are positive for all eliminations except
23052   from the soft frame pointer to the hard frame pointer.
23053
23054   SFP may point just inside the local variables block to ensure correct
23055   alignment.  */
23056
23057
23058 /* Return cached stack offsets.  */
23059
23060 static arm_stack_offsets *
23061 arm_get_frame_offsets (void)
23062 {
23063   struct arm_stack_offsets *offsets;
23064
23065   offsets = &cfun->machine->stack_offsets;
23066
23067   return offsets;
23068 }
23069
23070
23071 /* Calculate stack offsets.  These are used to calculate register elimination
23072    offsets and in prologue/epilogue code.  Also calculates which registers
23073    should be saved.  */
23074
23075 static void
23076 arm_compute_frame_layout (void)
23077 {
23078   struct arm_stack_offsets *offsets;
23079   unsigned long func_type;
23080   int saved;
23081   int core_saved;
23082   HOST_WIDE_INT frame_size;
23083   int i;
23084
23085   offsets = &cfun->machine->stack_offsets;
23086
23087   /* Initially this is the size of the local variables.  It will translated
23088      into an offset once we have determined the size of preceding data.  */
23089   frame_size = ROUND_UP_WORD (get_frame_size ());
23090
23091   /* Space for variadic functions.  */
23092   offsets->saved_args = crtl->args.pretend_args_size;
23093
23094   /* In Thumb mode this is incorrect, but never used.  */
23095   offsets->frame
23096     = (offsets->saved_args
23097        + arm_compute_static_chain_stack_bytes ()
23098        + (frame_pointer_needed ? 4 : 0));
23099
23100   if (TARGET_32BIT)
23101     {
23102       unsigned int regno;
23103
23104       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
23105       core_saved = bit_count (offsets->saved_regs_mask) * 4;
23106       saved = core_saved;
23107
23108       /* We know that SP will be doubleword aligned on entry, and we must
23109          preserve that condition at any subroutine call.  We also require the
23110          soft frame pointer to be doubleword aligned.  */
23111
23112       if (TARGET_REALLY_IWMMXT)
23113         {
23114           /* Check for the call-saved iWMMXt registers.  */
23115           for (regno = FIRST_IWMMXT_REGNUM;
23116                regno <= LAST_IWMMXT_REGNUM;
23117                regno++)
23118             if (reg_needs_saving_p (regno))
23119               saved += 8;
23120         }
23121
23122       func_type = arm_current_func_type ();
23123       /* Space for saved VFP registers.  */
23124       if (! IS_VOLATILE (func_type)
23125           && TARGET_VFP_BASE)
23126         saved += arm_get_vfp_saved_size ();
23127
23128       /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23129          nonecure entry functions with VSTR/VLDR.  */
23130       if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23131         saved += 4;
23132     }
23133   else /* TARGET_THUMB1 */
23134     {
23135       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
23136       core_saved = bit_count (offsets->saved_regs_mask) * 4;
23137       saved = core_saved;
23138       if (TARGET_BACKTRACE)
23139         saved += 16;
23140     }
23141
23142   /* Saved registers include the stack frame.  */
23143   offsets->saved_regs
23144     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
23145   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
23146
23147   /* A leaf function does not need any stack alignment if it has nothing
23148      on the stack.  */
23149   if (crtl->is_leaf && frame_size == 0
23150       /* However if it calls alloca(), we have a dynamically allocated
23151          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
23152       && ! cfun->calls_alloca)
23153     {
23154       offsets->outgoing_args = offsets->soft_frame;
23155       offsets->locals_base = offsets->soft_frame;
23156       return;
23157     }
23158
23159   /* Ensure SFP has the correct alignment.  */
23160   if (ARM_DOUBLEWORD_ALIGN
23161       && (offsets->soft_frame & 7))
23162     {
23163       offsets->soft_frame += 4;
23164       /* Try to align stack by pushing an extra reg.  Don't bother doing this
23165          when there is a stack frame as the alignment will be rolled into
23166          the normal stack adjustment.  */
23167       if (frame_size + crtl->outgoing_args_size == 0)
23168         {
23169           int reg = -1;
23170
23171           /* Register r3 is caller-saved.  Normally it does not need to be
23172              saved on entry by the prologue.  However if we choose to save
23173              it for padding then we may confuse the compiler into thinking
23174              a prologue sequence is required when in fact it is not.  This
23175              will occur when shrink-wrapping if r3 is used as a scratch
23176              register and there are no other callee-saved writes.
23177
23178              This situation can be avoided when other callee-saved registers
23179              are available and r3 is not mandatory if we choose a callee-saved
23180              register for padding.  */
23181           bool prefer_callee_reg_p = false;
23182
23183           /* If it is safe to use r3, then do so.  This sometimes
23184              generates better code on Thumb-2 by avoiding the need to
23185              use 32-bit push/pop instructions.  */
23186           if (! any_sibcall_could_use_r3 ()
23187               && arm_size_return_regs () <= 12
23188               && (offsets->saved_regs_mask & (1 << 3)) == 0
23189               && (TARGET_THUMB2
23190                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
23191             {
23192               reg = 3;
23193               if (!TARGET_THUMB2)
23194                 prefer_callee_reg_p = true;
23195             }
23196           if (reg == -1
23197               || prefer_callee_reg_p)
23198             {
23199               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
23200                 {
23201                   /* Avoid fixed registers; they may be changed at
23202                      arbitrary times so it's unsafe to restore them
23203                      during the epilogue.  */
23204                   if (!fixed_regs[i]
23205                       && (offsets->saved_regs_mask & (1 << i)) == 0)
23206                     {
23207                       reg = i;
23208                       break;
23209                     }
23210                 }
23211             }
23212
23213           if (reg != -1)
23214             {
23215               offsets->saved_regs += 4;
23216               offsets->saved_regs_mask |= (1 << reg);
23217             }
23218         }
23219     }
23220
23221   offsets->locals_base = offsets->soft_frame + frame_size;
23222   offsets->outgoing_args = (offsets->locals_base
23223                             + crtl->outgoing_args_size);
23224
23225   if (ARM_DOUBLEWORD_ALIGN)
23226     {
23227       /* Ensure SP remains doubleword aligned.  */
23228       if (offsets->outgoing_args & 7)
23229         offsets->outgoing_args += 4;
23230       gcc_assert (!(offsets->outgoing_args & 7));
23231     }
23232 }
23233
23234
23235 /* Calculate the relative offsets for the different stack pointers.  Positive
23236    offsets are in the direction of stack growth.  */
23237
23238 HOST_WIDE_INT
23239 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23240 {
23241   arm_stack_offsets *offsets;
23242
23243   offsets = arm_get_frame_offsets ();
23244
23245   /* OK, now we have enough information to compute the distances.
23246      There must be an entry in these switch tables for each pair
23247      of registers in ELIMINABLE_REGS, even if some of the entries
23248      seem to be redundant or useless.  */
23249   switch (from)
23250     {
23251     case ARG_POINTER_REGNUM:
23252       switch (to)
23253         {
23254         case THUMB_HARD_FRAME_POINTER_REGNUM:
23255           return 0;
23256
23257         case FRAME_POINTER_REGNUM:
23258           /* This is the reverse of the soft frame pointer
23259              to hard frame pointer elimination below.  */
23260           return offsets->soft_frame - offsets->saved_args;
23261
23262         case ARM_HARD_FRAME_POINTER_REGNUM:
23263           /* This is only non-zero in the case where the static chain register
23264              is stored above the frame.  */
23265           return offsets->frame - offsets->saved_args - 4;
23266
23267         case STACK_POINTER_REGNUM:
23268           /* If nothing has been pushed on the stack at all
23269              then this will return -4.  This *is* correct!  */
23270           return offsets->outgoing_args - (offsets->saved_args + 4);
23271
23272         default:
23273           gcc_unreachable ();
23274         }
23275       gcc_unreachable ();
23276
23277     case FRAME_POINTER_REGNUM:
23278       switch (to)
23279         {
23280         case THUMB_HARD_FRAME_POINTER_REGNUM:
23281           return 0;
23282
23283         case ARM_HARD_FRAME_POINTER_REGNUM:
23284           /* The hard frame pointer points to the top entry in the
23285              stack frame.  The soft frame pointer to the bottom entry
23286              in the stack frame.  If there is no stack frame at all,
23287              then they are identical.  */
23288
23289           return offsets->frame - offsets->soft_frame;
23290
23291         case STACK_POINTER_REGNUM:
23292           return offsets->outgoing_args - offsets->soft_frame;
23293
23294         default:
23295           gcc_unreachable ();
23296         }
23297       gcc_unreachable ();
23298
23299     default:
23300       /* You cannot eliminate from the stack pointer.
23301          In theory you could eliminate from the hard frame
23302          pointer to the stack pointer, but this will never
23303          happen, since if a stack frame is not needed the
23304          hard frame pointer will never be used.  */
23305       gcc_unreachable ();
23306     }
23307 }
23308
23309 /* Given FROM and TO register numbers, say whether this elimination is
23310    allowed.  Frame pointer elimination is automatically handled.
23311
23312    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
23313    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
23314    pointer, we must eliminate FRAME_POINTER_REGNUM into
23315    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23316    ARG_POINTER_REGNUM.  */
23317
23318 bool
23319 arm_can_eliminate (const int from, const int to)
23320 {
23321   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23322           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23323           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23324           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23325            true);
23326 }
23327
23328 /* Emit RTL to save coprocessor registers on function entry.  Returns the
23329    number of bytes pushed.  */
23330
23331 static int
23332 arm_save_coproc_regs(void)
23333 {
23334   int saved_size = 0;
23335   unsigned reg;
23336   unsigned start_reg;
23337   rtx insn;
23338
23339   if (TARGET_REALLY_IWMMXT)
23340   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23341     if (reg_needs_saving_p (reg))
23342       {
23343         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23344         insn = gen_rtx_MEM (V2SImode, insn);
23345         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23346         RTX_FRAME_RELATED_P (insn) = 1;
23347         saved_size += 8;
23348       }
23349
23350   if (TARGET_VFP_BASE)
23351     {
23352       start_reg = FIRST_VFP_REGNUM;
23353
23354       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23355         {
23356           if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23357             {
23358               if (start_reg != reg)
23359                 saved_size += vfp_emit_fstmd (start_reg,
23360                                               (reg - start_reg) / 2);
23361               start_reg = reg + 2;
23362             }
23363         }
23364       if (start_reg != reg)
23365         saved_size += vfp_emit_fstmd (start_reg,
23366                                       (reg - start_reg) / 2);
23367     }
23368   return saved_size;
23369 }
23370
23371
23372 /* Set the Thumb frame pointer from the stack pointer.  */
23373
23374 static void
23375 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23376 {
23377   HOST_WIDE_INT amount;
23378   rtx insn, dwarf;
23379
23380   amount = offsets->outgoing_args - offsets->locals_base;
23381   if (amount < 1024)
23382     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23383                                   stack_pointer_rtx, GEN_INT (amount)));
23384   else
23385     {
23386       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23387       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
23388          expects the first two operands to be the same.  */
23389       if (TARGET_THUMB2)
23390         {
23391           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23392                                         stack_pointer_rtx,
23393                                         hard_frame_pointer_rtx));
23394         }
23395       else
23396         {
23397           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23398                                         hard_frame_pointer_rtx,
23399                                         stack_pointer_rtx));
23400         }
23401       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23402                            plus_constant (Pmode, stack_pointer_rtx, amount));
23403       RTX_FRAME_RELATED_P (dwarf) = 1;
23404       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23405     }
23406
23407   RTX_FRAME_RELATED_P (insn) = 1;
23408 }
23409
23410 struct scratch_reg {
23411   rtx reg;
23412   bool saved;
23413 };
23414
23415 /* Return a short-lived scratch register for use as a 2nd scratch register on
23416    function entry after the registers are saved in the prologue.  This register
23417    must be released by means of release_scratch_register_on_entry.  IP is not
23418    considered since it is always used as the 1st scratch register if available.
23419
23420    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23421    mask of live registers.  */
23422
23423 static void
23424 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23425                                unsigned long live_regs)
23426 {
23427   int regno = -1;
23428
23429   sr->saved = false;
23430
23431   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23432     regno = LR_REGNUM;
23433   else
23434     {
23435       unsigned int i;
23436
23437       for (i = 4; i < 11; i++)
23438         if (regno1 != i && (live_regs & (1 << i)) != 0)
23439           {
23440             regno = i;
23441             break;
23442           }
23443
23444       if (regno < 0)
23445         {
23446           /* If IP is used as the 1st scratch register for a nested function,
23447              then either r3 wasn't available or is used to preserve IP.  */
23448           if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23449             regno1 = 3;
23450           regno = (regno1 == 3 ? 2 : 3);
23451           sr->saved
23452             = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23453                                regno);
23454         }
23455     }
23456
23457   sr->reg = gen_rtx_REG (SImode, regno);
23458   if (sr->saved)
23459     {
23460       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23461       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23462       rtx x = gen_rtx_SET (stack_pointer_rtx,
23463                            plus_constant (Pmode, stack_pointer_rtx, -4));
23464       RTX_FRAME_RELATED_P (insn) = 1;
23465       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23466     }
23467 }
23468
23469 /* Release a scratch register obtained from the preceding function.  */
23470
23471 static void
23472 release_scratch_register_on_entry (struct scratch_reg *sr)
23473 {
23474   if (sr->saved)
23475     {
23476       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23477       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23478       rtx x = gen_rtx_SET (stack_pointer_rtx,
23479                            plus_constant (Pmode, stack_pointer_rtx, 4));
23480       RTX_FRAME_RELATED_P (insn) = 1;
23481       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23482     }
23483 }
23484
23485 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23486
23487 #if PROBE_INTERVAL > 4096
23488 #error Cannot use indexed addressing mode for stack probing
23489 #endif
23490
23491 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23492    inclusive.  These are offsets from the current stack pointer.  REGNO1
23493    is the index number of the 1st scratch register and LIVE_REGS is the
23494    mask of live registers.  */
23495
23496 static void
23497 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23498                             unsigned int regno1, unsigned long live_regs)
23499 {
23500   rtx reg1 = gen_rtx_REG (Pmode, regno1);
23501
23502   /* See if we have a constant small number of probes to generate.  If so,
23503      that's the easy case.  */
23504   if (size <= PROBE_INTERVAL)
23505     {
23506       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23507       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23508       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23509     }
23510
23511   /* The run-time loop is made up of 10 insns in the generic case while the
23512      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
23513   else if (size <= 5 * PROBE_INTERVAL)
23514     {
23515       HOST_WIDE_INT i, rem;
23516
23517       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23518       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23519       emit_stack_probe (reg1);
23520
23521       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23522          it exceeds SIZE.  If only two probes are needed, this will not
23523          generate any code.  Then probe at FIRST + SIZE.  */
23524       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23525         {
23526           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23527           emit_stack_probe (reg1);
23528         }
23529
23530       rem = size - (i - PROBE_INTERVAL);
23531       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23532         {
23533           emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23534           emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23535         }
23536       else
23537         emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23538     }
23539
23540   /* Otherwise, do the same as above, but in a loop.  Note that we must be
23541      extra careful with variables wrapping around because we might be at
23542      the very top (or the very bottom) of the address space and we have
23543      to be able to handle this case properly; in particular, we use an
23544      equality test for the loop condition.  */
23545   else
23546     {
23547       HOST_WIDE_INT rounded_size;
23548       struct scratch_reg sr;
23549
23550       get_scratch_register_on_entry (&sr, regno1, live_regs);
23551
23552       emit_move_insn (reg1, GEN_INT (first));
23553
23554
23555       /* Step 1: round SIZE to the previous multiple of the interval.  */
23556
23557       rounded_size = size & -PROBE_INTERVAL;
23558       emit_move_insn (sr.reg, GEN_INT (rounded_size));
23559
23560
23561       /* Step 2: compute initial and final value of the loop counter.  */
23562
23563       /* TEST_ADDR = SP + FIRST.  */
23564       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23565
23566       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
23567       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23568
23569
23570       /* Step 3: the loop
23571
23572          do
23573            {
23574              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23575              probe at TEST_ADDR
23576            }
23577          while (TEST_ADDR != LAST_ADDR)
23578
23579          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23580          until it is equal to ROUNDED_SIZE.  */
23581
23582       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23583
23584
23585       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23586          that SIZE is equal to ROUNDED_SIZE.  */
23587
23588       if (size != rounded_size)
23589         {
23590           HOST_WIDE_INT rem = size - rounded_size;
23591
23592           if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23593             {
23594               emit_set_insn (sr.reg,
23595                              plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23596               emit_stack_probe (plus_constant (Pmode, sr.reg,
23597                                                PROBE_INTERVAL - rem));
23598             }
23599           else
23600             emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23601         }
23602
23603       release_scratch_register_on_entry (&sr);
23604     }
23605
23606   /* Make sure nothing is scheduled before we are done.  */
23607   emit_insn (gen_blockage ());
23608 }
23609
23610 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
23611    absolute addresses.  */
23612
23613 const char *
23614 output_probe_stack_range (rtx reg1, rtx reg2)
23615 {
23616   static int labelno = 0;
23617   char loop_lab[32];
23618   rtx xops[2];
23619
23620   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23621
23622   /* Loop.  */
23623   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23624
23625   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
23626   xops[0] = reg1;
23627   xops[1] = GEN_INT (PROBE_INTERVAL);
23628   output_asm_insn ("sub\t%0, %0, %1", xops);
23629
23630   /* Probe at TEST_ADDR.  */
23631   output_asm_insn ("str\tr0, [%0, #0]", xops);
23632
23633   /* Test if TEST_ADDR == LAST_ADDR.  */
23634   xops[1] = reg2;
23635   output_asm_insn ("cmp\t%0, %1", xops);
23636
23637   /* Branch.  */
23638   fputs ("\tbne\t", asm_out_file);
23639   assemble_name_raw (asm_out_file, loop_lab);
23640   fputc ('\n', asm_out_file);
23641
23642   return "";
23643 }
23644
23645 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23646    function.  */
23647 void
23648 arm_expand_prologue (void)
23649 {
23650   rtx amount;
23651   rtx insn;
23652   rtx ip_rtx;
23653   unsigned long live_regs_mask;
23654   unsigned long func_type;
23655   int fp_offset = 0;
23656   int saved_pretend_args = 0;
23657   int saved_regs = 0;
23658   unsigned HOST_WIDE_INT args_to_push;
23659   HOST_WIDE_INT size;
23660   arm_stack_offsets *offsets;
23661   bool clobber_ip;
23662
23663   func_type = arm_current_func_type ();
23664
23665   /* Naked functions don't have prologues.  */
23666   if (IS_NAKED (func_type))
23667     {
23668       if (flag_stack_usage_info)
23669         current_function_static_stack_size = 0;
23670       return;
23671     }
23672
23673   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
23674   args_to_push = crtl->args.pretend_args_size;
23675
23676   /* Compute which register we will have to save onto the stack.  */
23677   offsets = arm_get_frame_offsets ();
23678   live_regs_mask = offsets->saved_regs_mask;
23679
23680   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23681
23682   /* The AAPCS requires the callee to widen integral types narrower
23683      than 32 bits to the full width of the register; but when handling
23684      calls to non-secure space, we cannot trust the callee to have
23685      correctly done so.  So forcibly re-widen the result here.  */
23686   if (IS_CMSE_ENTRY (func_type))
23687     {
23688       function_args_iterator args_iter;
23689       CUMULATIVE_ARGS args_so_far_v;
23690       cumulative_args_t args_so_far;
23691       bool first_param = true;
23692       tree arg_type;
23693       tree fndecl = current_function_decl;
23694       tree fntype = TREE_TYPE (fndecl);
23695       arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
23696       args_so_far = pack_cumulative_args (&args_so_far_v);
23697       FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
23698         {
23699           rtx arg_rtx;
23700
23701           if (VOID_TYPE_P (arg_type))
23702             break;
23703
23704           function_arg_info arg (arg_type, /*named=*/true);
23705           if (!first_param)
23706             /* We should advance after processing the argument and pass
23707                the argument we're advancing past.  */
23708             arm_function_arg_advance (args_so_far, arg);
23709           first_param = false;
23710           arg_rtx = arm_function_arg (args_so_far, arg);
23711           gcc_assert (REG_P (arg_rtx));
23712           if ((TREE_CODE (arg_type) == INTEGER_TYPE
23713               || TREE_CODE (arg_type) == ENUMERAL_TYPE
23714               || TREE_CODE (arg_type) == BOOLEAN_TYPE)
23715               && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 4))
23716             {
23717               if (TYPE_UNSIGNED (arg_type))
23718                 emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)),
23719                                gen_rtx_ZERO_EXTEND (SImode, arg_rtx));
23720               else
23721                 emit_set_insn (gen_rtx_REG (SImode, REGNO (arg_rtx)),
23722                                gen_rtx_SIGN_EXTEND (SImode, arg_rtx));
23723             }
23724         }
23725     }
23726
23727   if (IS_STACKALIGN (func_type))
23728     {
23729       rtx r0, r1;
23730
23731       /* Handle a word-aligned stack pointer.  We generate the following:
23732
23733           mov r0, sp
23734           bic r1, r0, #7
23735           mov sp, r1
23736           <save and restore r0 in normal prologue/epilogue>
23737           mov sp, r0
23738           bx lr
23739
23740          The unwinder doesn't need to know about the stack realignment.
23741          Just tell it we saved SP in r0.  */
23742       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23743
23744       r0 = gen_rtx_REG (SImode, R0_REGNUM);
23745       r1 = gen_rtx_REG (SImode, R1_REGNUM);
23746
23747       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23748       RTX_FRAME_RELATED_P (insn) = 1;
23749       add_reg_note (insn, REG_CFA_REGISTER, NULL);
23750
23751       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23752
23753       /* ??? The CFA changes here, which may cause GDB to conclude that it
23754          has entered a different function.  That said, the unwind info is
23755          correct, individually, before and after this instruction because
23756          we've described the save of SP, which will override the default
23757          handling of SP as restoring from the CFA.  */
23758       emit_insn (gen_movsi (stack_pointer_rtx, r1));
23759     }
23760
23761   /* Let's compute the static_chain_stack_bytes required and store it.  Right
23762      now the value must be -1 as stored by arm_init_machine_status ().  */
23763   cfun->machine->static_chain_stack_bytes
23764     = arm_compute_static_chain_stack_bytes ();
23765
23766   /* The static chain register is the same as the IP register.  If it is
23767      clobbered when creating the frame, we need to save and restore it.  */
23768   clobber_ip = (IS_NESTED (func_type)
23769                 && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23770                      || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23771                           || flag_stack_clash_protection)
23772                          && !df_regs_ever_live_p (LR_REGNUM)
23773                          && arm_r3_live_at_start_p ()))
23774                     || arm_current_function_pac_enabled_p ()));
23775
23776   /* Find somewhere to store IP whilst the frame is being created.
23777      We try the following places in order:
23778
23779        1. The last argument register r3 if it is available.
23780        2. A slot on the stack above the frame if there are no
23781           arguments to push onto the stack.
23782        3. Register r3 again, after pushing the argument registers
23783           onto the stack, if this is a varargs function.
23784        4. The last slot on the stack created for the arguments to
23785           push, if this isn't a varargs function.
23786
23787      Note - we only need to tell the dwarf2 backend about the SP
23788      adjustment in the second variant; the static chain register
23789      doesn't need to be unwound, as it doesn't contain a value
23790      inherited from the caller.  */
23791   if (clobber_ip)
23792     {
23793       if (!arm_r3_live_at_start_p ())
23794         insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23795       else if (args_to_push == 0)
23796         {
23797           rtx addr, dwarf;
23798
23799           saved_regs += 4;
23800
23801           addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23802           insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23803           fp_offset = 4;
23804
23805           /* Just tell the dwarf backend that we adjusted SP.  */
23806           dwarf = gen_rtx_SET (stack_pointer_rtx,
23807                                plus_constant (Pmode, stack_pointer_rtx,
23808                                               -fp_offset));
23809           RTX_FRAME_RELATED_P (insn) = 1;
23810           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23811           if (arm_current_function_pac_enabled_p ())
23812             cfun->machine->pacspval_needed = 1;
23813         }
23814       else
23815         {
23816           /* Store the args on the stack.  */
23817           if (cfun->machine->uses_anonymous_args)
23818             {
23819               insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23820                                           (0xf0 >> (args_to_push / 4)) & 0xf);
23821               emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23822               saved_pretend_args = 1;
23823             }
23824           else
23825             {
23826               rtx addr, dwarf;
23827
23828               if (args_to_push == 4)
23829                 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23830               else
23831                 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23832                                            plus_constant (Pmode,
23833                                                           stack_pointer_rtx,
23834                                                           -args_to_push));
23835
23836               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23837
23838               /* Just tell the dwarf backend that we adjusted SP.  */
23839               dwarf = gen_rtx_SET (stack_pointer_rtx,
23840                                    plus_constant (Pmode, stack_pointer_rtx,
23841                                                   -args_to_push));
23842               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23843             }
23844
23845           RTX_FRAME_RELATED_P (insn) = 1;
23846           fp_offset = args_to_push;
23847           args_to_push = 0;
23848           if (arm_current_function_pac_enabled_p ())
23849             cfun->machine->pacspval_needed = 1;
23850         }
23851     }
23852
23853   if (arm_current_function_pac_enabled_p ())
23854     {
23855       /* If IP was clobbered we only emit a PAC instruction as the BTI
23856          one will be added before the push of the clobbered IP (if
23857          necessary) by the bti pass.  */
23858       if (aarch_bti_enabled () && !clobber_ip)
23859         insn = emit_insn (gen_pacbti_nop ());
23860       else
23861         insn = emit_insn (gen_pac_nop ());
23862
23863       rtx dwarf = gen_rtx_SET (ip_rtx, gen_rtx_REG (SImode, RA_AUTH_CODE));
23864       RTX_FRAME_RELATED_P (insn) = 1;
23865       add_reg_note (insn, REG_CFA_REGISTER, dwarf);
23866     }
23867
23868   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23869     {
23870       if (IS_INTERRUPT (func_type))
23871         {
23872           /* Interrupt functions must not corrupt any registers.
23873              Creating a frame pointer however, corrupts the IP
23874              register, so we must push it first.  */
23875           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23876
23877           /* Do not set RTX_FRAME_RELATED_P on this insn.
23878              The dwarf stack unwinding code only wants to see one
23879              stack decrement per function, and this is not it.  If
23880              this instruction is labeled as being part of the frame
23881              creation sequence then dwarf2out_frame_debug_expr will
23882              die when it encounters the assignment of IP to FP
23883              later on, since the use of SP here establishes SP as
23884              the CFA register and not IP.
23885
23886              Anyway this instruction is not really part of the stack
23887              frame creation although it is part of the prologue.  */
23888         }
23889
23890       insn = emit_set_insn (ip_rtx,
23891                             plus_constant (Pmode, stack_pointer_rtx,
23892                                            fp_offset));
23893       RTX_FRAME_RELATED_P (insn) = 1;
23894     }
23895
23896   /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR.  */
23897   if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23898     {
23899       saved_regs += 4;
23900       insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23901                                                 GEN_INT (FPCXTNS_ENUM)));
23902       rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23903                           plus_constant (Pmode, stack_pointer_rtx, -4));
23904       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23905       RTX_FRAME_RELATED_P (insn) = 1;
23906     }
23907
23908   if (args_to_push)
23909     {
23910       /* Push the argument registers, or reserve space for them.  */
23911       if (cfun->machine->uses_anonymous_args)
23912         insn = emit_multi_reg_push
23913           ((0xf0 >> (args_to_push / 4)) & 0xf,
23914            (0xf0 >> (args_to_push / 4)) & 0xf);
23915       else
23916         insn = emit_insn
23917           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23918                        GEN_INT (- args_to_push)));
23919       RTX_FRAME_RELATED_P (insn) = 1;
23920     }
23921
23922   /* If this is an interrupt service routine, and the link register
23923      is going to be pushed, and we're not generating extra
23924      push of IP (needed when frame is needed and frame layout if apcs),
23925      subtracting four from LR now will mean that the function return
23926      can be done with a single instruction.  */
23927   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23928       && (live_regs_mask & (1 << LR_REGNUM)) != 0
23929       && !(frame_pointer_needed && TARGET_APCS_FRAME)
23930       && TARGET_ARM)
23931     {
23932       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23933
23934       emit_set_insn (lr, plus_constant (SImode, lr, -4));
23935     }
23936
23937   if (live_regs_mask)
23938     {
23939       unsigned long dwarf_regs_mask = live_regs_mask;
23940
23941       saved_regs += bit_count (live_regs_mask) * 4;
23942       if (optimize_size && !frame_pointer_needed
23943           && saved_regs == offsets->saved_regs - offsets->saved_args)
23944         {
23945           /* If no coprocessor registers are being pushed and we don't have
23946              to worry about a frame pointer then push extra registers to
23947              create the stack frame.  This is done in a way that does not
23948              alter the frame layout, so is independent of the epilogue.  */
23949           int n;
23950           int frame;
23951           n = 0;
23952           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23953             n++;
23954           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23955           if (frame && n * 4 >= frame)
23956             {
23957               n = frame / 4;
23958               live_regs_mask |= (1 << n) - 1;
23959               saved_regs += frame;
23960             }
23961         }
23962
23963       if (TARGET_LDRD
23964           && current_tune->prefer_ldrd_strd
23965           && !optimize_function_for_size_p (cfun))
23966         {
23967           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23968           if (TARGET_THUMB2)
23969             thumb2_emit_strd_push (live_regs_mask);
23970           else if (TARGET_ARM
23971                    && !TARGET_APCS_FRAME
23972                    && !IS_INTERRUPT (func_type))
23973             arm_emit_strd_push (live_regs_mask);
23974           else
23975             {
23976               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23977               RTX_FRAME_RELATED_P (insn) = 1;
23978             }
23979         }
23980       else
23981         {
23982           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23983           RTX_FRAME_RELATED_P (insn) = 1;
23984         }
23985     }
23986
23987   if (! IS_VOLATILE (func_type))
23988     saved_regs += arm_save_coproc_regs ();
23989
23990   if (frame_pointer_needed && TARGET_ARM)
23991     {
23992       /* Create the new frame pointer.  */
23993       if (TARGET_APCS_FRAME)
23994         {
23995           insn = GEN_INT (-(4 + args_to_push + fp_offset));
23996           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23997           RTX_FRAME_RELATED_P (insn) = 1;
23998         }
23999       else
24000         {
24001           insn = GEN_INT (saved_regs - (4 + fp_offset));
24002           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24003                                         stack_pointer_rtx, insn));
24004           RTX_FRAME_RELATED_P (insn) = 1;
24005         }
24006     }
24007
24008   size = offsets->outgoing_args - offsets->saved_args;
24009   if (flag_stack_usage_info)
24010     current_function_static_stack_size = size;
24011
24012   /* If this isn't an interrupt service routine and we have a frame, then do
24013      stack checking.  We use IP as the first scratch register, except for the
24014      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
24015   if (!IS_INTERRUPT (func_type)
24016       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
24017           || flag_stack_clash_protection))
24018     {
24019       unsigned int regno;
24020
24021       if (!IS_NESTED (func_type) || clobber_ip)
24022         regno = IP_REGNUM;
24023       else if (df_regs_ever_live_p (LR_REGNUM))
24024         regno = LR_REGNUM;
24025       else
24026         regno = 3;
24027
24028       if (crtl->is_leaf && !cfun->calls_alloca)
24029         {
24030           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
24031             arm_emit_probe_stack_range (get_stack_check_protect (),
24032                                         size - get_stack_check_protect (),
24033                                         regno, live_regs_mask);
24034         }
24035       else if (size > 0)
24036         arm_emit_probe_stack_range (get_stack_check_protect (), size,
24037                                     regno, live_regs_mask);
24038     }
24039
24040   /* Recover the static chain register.  */
24041   if (clobber_ip)
24042     {
24043       if (!arm_r3_live_at_start_p () || saved_pretend_args)
24044         insn = gen_rtx_REG (SImode, 3);
24045       else
24046         {
24047           insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
24048           insn = gen_frame_mem (SImode, insn);
24049         }
24050       emit_set_insn (ip_rtx, insn);
24051       emit_insn (gen_force_register_use (ip_rtx));
24052     }
24053
24054   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
24055     {
24056       /* This add can produce multiple insns for a large constant, so we
24057          need to get tricky.  */
24058       rtx_insn *last = get_last_insn ();
24059
24060       amount = GEN_INT (offsets->saved_args + saved_regs
24061                         - offsets->outgoing_args);
24062
24063       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24064                                     amount));
24065       do
24066         {
24067           last = last ? NEXT_INSN (last) : get_insns ();
24068           RTX_FRAME_RELATED_P (last) = 1;
24069         }
24070       while (last != insn);
24071
24072       /* If the frame pointer is needed, emit a special barrier that
24073          will prevent the scheduler from moving stores to the frame
24074          before the stack adjustment.  */
24075       if (frame_pointer_needed)
24076         emit_insn (gen_stack_tie (stack_pointer_rtx,
24077                                   hard_frame_pointer_rtx));
24078     }
24079
24080
24081   if (frame_pointer_needed && TARGET_THUMB2)
24082     thumb_set_frame_pointer (offsets);
24083
24084   if (flag_pic && arm_pic_register != INVALID_REGNUM)
24085     {
24086       unsigned long mask;
24087
24088       mask = live_regs_mask;
24089       mask &= THUMB2_WORK_REGS;
24090       if (!IS_NESTED (func_type))
24091         mask |= (1 << IP_REGNUM);
24092       arm_load_pic_register (mask, NULL_RTX);
24093     }
24094
24095   /* If we are profiling, make sure no instructions are scheduled before
24096      the call to mcount.  Similarly if the user has requested no
24097      scheduling in the prolog.  Similarly if we want non-call exceptions
24098      using the EABI unwinder, to prevent faulting instructions from being
24099      swapped with a stack adjustment.  */
24100   if (crtl->profile || !TARGET_SCHED_PROLOG
24101       || (arm_except_unwind_info (&global_options) == UI_TARGET
24102           && cfun->can_throw_non_call_exceptions))
24103     emit_insn (gen_blockage ());
24104
24105   /* If the link register is being kept alive, with the return address in it,
24106      then make sure that it does not get reused by the ce2 pass.  */
24107   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
24108     cfun->machine->lr_save_eliminated = 1;
24109 }
24110 \f
24111 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
24112 static void
24113 arm_print_condition (FILE *stream)
24114 {
24115   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
24116     {
24117       /* Branch conversion is not implemented for Thumb-2.  */
24118       if (TARGET_THUMB)
24119         {
24120           output_operand_lossage ("predicated Thumb instruction");
24121           return;
24122         }
24123       if (current_insn_predicate != NULL)
24124         {
24125           output_operand_lossage
24126             ("predicated instruction in conditional sequence");
24127           return;
24128         }
24129
24130       fputs (arm_condition_codes[arm_current_cc], stream);
24131     }
24132   else if (current_insn_predicate)
24133     {
24134       enum arm_cond_code code;
24135
24136       if (TARGET_THUMB1)
24137         {
24138           output_operand_lossage ("predicated Thumb instruction");
24139           return;
24140         }
24141
24142       code = get_arm_condition_code (current_insn_predicate);
24143       fputs (arm_condition_codes[code], stream);
24144     }
24145 }
24146
24147
24148 /* Globally reserved letters: acln
24149    Puncutation letters currently used: @_|?().!#
24150    Lower case letters currently used: bcdefhimpqtvwxyz
24151    Upper case letters currently used: ABCDEFGHIJKLMOPQRSTUV
24152    Letters previously used, but now deprecated/obsolete: sNWXYZ.
24153
24154    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
24155
24156    If CODE is 'd', then the X is a condition operand and the instruction
24157    should only be executed if the condition is true.
24158    if CODE is 'D', then the X is a condition operand and the instruction
24159    should only be executed if the condition is false: however, if the mode
24160    of the comparison is CCFPEmode, then always execute the instruction -- we
24161    do this because in these circumstances !GE does not necessarily imply LT;
24162    in these cases the instruction pattern will take care to make sure that
24163    an instruction containing %d will follow, thereby undoing the effects of
24164    doing this instruction unconditionally.
24165    If CODE is 'B' then output a bitwise inverted value of X (a const int).
24166    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24167    If CODE is 'V', then the operand must be a CONST_INT representing
24168    the bits to preserve in the modified register (Rd) of a BFI or BFC
24169    instruction: print out both the width and lsb (shift) fields.  */
24170 static void
24171 arm_print_operand (FILE *stream, rtx x, int code)
24172 {
24173   switch (code)
24174     {
24175     case '@':
24176       fputs (ASM_COMMENT_START, stream);
24177       return;
24178
24179     case '_':
24180       fputs (user_label_prefix, stream);
24181       return;
24182
24183     case '|':
24184       fputs (REGISTER_PREFIX, stream);
24185       return;
24186
24187     case '?':
24188       arm_print_condition (stream);
24189       return;
24190
24191     case '.':
24192       /* The current condition code for a condition code setting instruction.
24193          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
24194       fputc('s', stream);
24195       arm_print_condition (stream);
24196       return;
24197
24198     case '!':
24199       /* If the instruction is conditionally executed then print
24200          the current condition code, otherwise print 's'.  */
24201       gcc_assert (TARGET_THUMB2);
24202       if (current_insn_predicate)
24203         arm_print_condition (stream);
24204       else
24205         fputc('s', stream);
24206       break;
24207
24208     /* %# is a "break" sequence. It doesn't output anything, but is used to
24209        separate e.g. operand numbers from following text, if that text consists
24210        of further digits which we don't want to be part of the operand
24211        number.  */
24212     case '#':
24213       return;
24214
24215     /* An integer or symbol address without a preceding # sign.  */
24216     case 'c':
24217       switch (GET_CODE (x))
24218         {
24219         case CONST_INT:
24220           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
24221           break;
24222
24223         case SYMBOL_REF:
24224           output_addr_const (stream, x);
24225           break;
24226
24227         case CONST:
24228           if (GET_CODE (XEXP (x, 0)) == PLUS
24229               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
24230             {
24231               output_addr_const (stream, x);
24232               break;
24233             }
24234           /* Fall through.  */
24235
24236         default:
24237           output_operand_lossage ("Unsupported operand for code '%c'", code);
24238         }
24239       return;
24240
24241     /* An integer that we want to print in HEX.  */
24242     case 'x':
24243       switch (GET_CODE (x))
24244         {
24245         case CONST_INT:
24246           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
24247           break;
24248
24249         default:
24250           output_operand_lossage ("Unsupported operand for code '%c'", code);
24251         }
24252       return;
24253
24254     case 'B':
24255       if (CONST_INT_P (x))
24256         {
24257           HOST_WIDE_INT val;
24258           val = ARM_SIGN_EXTEND (~INTVAL (x));
24259           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24260         }
24261       else
24262         {
24263           putc ('~', stream);
24264           output_addr_const (stream, x);
24265         }
24266       return;
24267
24268     case 'b':
24269       /* Print the log2 of a CONST_INT.  */
24270       {
24271         HOST_WIDE_INT val;
24272
24273         if (!CONST_INT_P (x)
24274             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24275           output_operand_lossage ("Unsupported operand for code '%c'", code);
24276         else
24277           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24278       }
24279       return;
24280
24281     case 'L':
24282       /* The low 16 bits of an immediate constant.  */
24283       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24284       return;
24285
24286     case 'i':
24287       fprintf (stream, "%s", arithmetic_instr (x, 1));
24288       return;
24289
24290     case 'I':
24291       fprintf (stream, "%s", arithmetic_instr (x, 0));
24292       return;
24293
24294     case 'S':
24295       {
24296         HOST_WIDE_INT val;
24297         const char *shift;
24298
24299         shift = shift_op (x, &val);
24300
24301         if (shift)
24302           {
24303             fprintf (stream, ", %s ", shift);
24304             if (val == -1)
24305               arm_print_operand (stream, XEXP (x, 1), 0);
24306             else
24307               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24308           }
24309       }
24310       return;
24311
24312       /* An explanation of the 'Q', 'R' and 'H' register operands:
24313
24314          In a pair of registers containing a DI or DF value the 'Q'
24315          operand returns the register number of the register containing
24316          the least significant part of the value.  The 'R' operand returns
24317          the register number of the register containing the most
24318          significant part of the value.
24319
24320          The 'H' operand returns the higher of the two register numbers.
24321          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24322          same as the 'Q' operand, since the most significant part of the
24323          value is held in the lower number register.  The reverse is true
24324          on systems where WORDS_BIG_ENDIAN is false.
24325
24326          The purpose of these operands is to distinguish between cases
24327          where the endian-ness of the values is important (for example
24328          when they are added together), and cases where the endian-ness
24329          is irrelevant, but the order of register operations is important.
24330          For example when loading a value from memory into a register
24331          pair, the endian-ness does not matter.  Provided that the value
24332          from the lower memory address is put into the lower numbered
24333          register, and the value from the higher address is put into the
24334          higher numbered register, the load will work regardless of whether
24335          the value being loaded is big-wordian or little-wordian.  The
24336          order of the two register loads can matter however, if the address
24337          of the memory location is actually held in one of the registers
24338          being overwritten by the load.
24339
24340          The 'Q' and 'R' constraints are also available for 64-bit
24341          constants.  */
24342     case 'Q':
24343       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24344         {
24345           rtx part = gen_lowpart (SImode, x);
24346           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24347           return;
24348         }
24349
24350       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24351         {
24352           output_operand_lossage ("invalid operand for code '%c'", code);
24353           return;
24354         }
24355
24356       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24357       return;
24358
24359     case 'R':
24360       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24361         {
24362           machine_mode mode = GET_MODE (x);
24363           rtx part;
24364
24365           if (mode == VOIDmode)
24366             mode = DImode;
24367           part = gen_highpart_mode (SImode, mode, x);
24368           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24369           return;
24370         }
24371
24372       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24373         {
24374           output_operand_lossage ("invalid operand for code '%c'", code);
24375           return;
24376         }
24377
24378       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24379       return;
24380
24381     case 'H':
24382       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24383         {
24384           output_operand_lossage ("invalid operand for code '%c'", code);
24385           return;
24386         }
24387
24388       asm_fprintf (stream, "%r", REGNO (x) + 1);
24389       return;
24390
24391     case 'J':
24392       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24393         {
24394           output_operand_lossage ("invalid operand for code '%c'", code);
24395           return;
24396         }
24397
24398       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24399       return;
24400
24401     case 'K':
24402       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24403         {
24404           output_operand_lossage ("invalid operand for code '%c'", code);
24405           return;
24406         }
24407
24408       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24409       return;
24410
24411     case 'm':
24412       asm_fprintf (stream, "%r",
24413                    REG_P (XEXP (x, 0))
24414                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24415       return;
24416
24417     case 'M':
24418       asm_fprintf (stream, "{%r-%r}",
24419                    REGNO (x),
24420                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24421       return;
24422
24423     /* Like 'M', but writing doubleword vector registers, for use by Neon
24424        insns.  */
24425     case 'h':
24426       {
24427         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24428         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24429         if (numregs == 1)
24430           asm_fprintf (stream, "{d%d}", regno);
24431         else
24432           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24433       }
24434       return;
24435
24436     case 'd':
24437       /* CONST_TRUE_RTX means always -- that's the default.  */
24438       if (x == const_true_rtx)
24439         return;
24440
24441       if (!COMPARISON_P (x))
24442         {
24443           output_operand_lossage ("invalid operand for code '%c'", code);
24444           return;
24445         }
24446
24447       fputs (arm_condition_codes[get_arm_condition_code (x)],
24448              stream);
24449       return;
24450
24451     case 'D':
24452       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
24453          want to do that.  */
24454       if (x == const_true_rtx)
24455         {
24456           output_operand_lossage ("instruction never executed");
24457           return;
24458         }
24459       if (!COMPARISON_P (x))
24460         {
24461           output_operand_lossage ("invalid operand for code '%c'", code);
24462           return;
24463         }
24464
24465       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24466                                  (get_arm_condition_code (x))],
24467              stream);
24468       return;
24469
24470     case 'V':
24471       {
24472         /* Output the LSB (shift) and width for a bitmask instruction
24473            based on a literal mask.  The LSB is printed first,
24474            followed by the width.
24475
24476            Eg. For 0b1...1110001, the result is #1, #3.  */
24477         if (!CONST_INT_P (x))
24478           {
24479             output_operand_lossage ("invalid operand for code '%c'", code);
24480             return;
24481           }
24482
24483         unsigned HOST_WIDE_INT val
24484           = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24485         int lsb = exact_log2 (val & -val);
24486         asm_fprintf (stream, "#%d, #%d", lsb,
24487                      (exact_log2 (val + (val & -val)) - lsb));
24488       }
24489       return;
24490
24491     case 'N':
24492       /* Former FPA support, effectively unused after GCC-4.7, but not
24493          removed until gcc-15.  */
24494       output_operand_lossage ("obsolete FPA format code '%c'", code);
24495       return;
24496
24497     case 's':
24498     case 'W':
24499     case 'X':
24500     case 'Y':
24501     case 'Z':
24502       /* Former Maverick support, removed after GCC-4.7.  */
24503       output_operand_lossage ("obsolete Maverick format code '%c'", code);
24504       return;
24505
24506     case 'U':
24507       if (!REG_P (x)
24508           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24509           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24510         /* Bad value for wCG register number.  */
24511         {
24512           output_operand_lossage ("invalid operand for code '%c'", code);
24513           return;
24514         }
24515
24516       else
24517         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24518       return;
24519
24520       /* Print an iWMMXt control register name.  */
24521     case 'w':
24522       if (!CONST_INT_P (x)
24523           || INTVAL (x) < 0
24524           || INTVAL (x) >= 16)
24525         /* Bad value for wC register number.  */
24526         {
24527           output_operand_lossage ("invalid operand for code '%c'", code);
24528           return;
24529         }
24530
24531       else
24532         {
24533           static const char * wc_reg_names [16] =
24534             {
24535               "wCID",  "wCon",  "wCSSF", "wCASF",
24536               "wC4",   "wC5",   "wC6",   "wC7",
24537               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24538               "wC12",  "wC13",  "wC14",  "wC15"
24539             };
24540
24541           fputs (wc_reg_names [INTVAL (x)], stream);
24542         }
24543       return;
24544
24545     /* Print the high single-precision register of a VFP double-precision
24546        register.  */
24547     case 'p':
24548       {
24549         machine_mode mode = GET_MODE (x);
24550         int regno;
24551
24552         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24553           {
24554             output_operand_lossage ("invalid operand for code '%c'", code);
24555             return;
24556           }
24557
24558         regno = REGNO (x);
24559         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24560           {
24561             output_operand_lossage ("invalid operand for code '%c'", code);
24562             return;
24563           }
24564
24565         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24566       }
24567       return;
24568
24569     /* Print a VFP/Neon double precision or quad precision register name.  */
24570     case 'P':
24571     case 'q':
24572       {
24573         machine_mode mode = GET_MODE (x);
24574         int is_quad = (code == 'q');
24575         int regno;
24576
24577         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24578           {
24579             output_operand_lossage ("invalid operand for code '%c'", code);
24580             return;
24581           }
24582
24583         if (!REG_P (x)
24584             || !IS_VFP_REGNUM (REGNO (x)))
24585           {
24586             output_operand_lossage ("invalid operand for code '%c'", code);
24587             return;
24588           }
24589
24590         regno = REGNO (x);
24591         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24592             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24593           {
24594             output_operand_lossage ("invalid operand for code '%c'", code);
24595             return;
24596           }
24597
24598         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24599           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24600       }
24601       return;
24602
24603     /* These two codes print the low/high doubleword register of a Neon quad
24604        register, respectively.  For pair-structure types, can also print
24605        low/high quadword registers.  */
24606     case 'e':
24607     case 'f':
24608       {
24609         machine_mode mode = GET_MODE (x);
24610         int regno;
24611
24612         if ((GET_MODE_SIZE (mode) != 16
24613              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24614           {
24615             output_operand_lossage ("invalid operand for code '%c'", code);
24616             return;
24617           }
24618
24619         regno = REGNO (x);
24620         if (!NEON_REGNO_OK_FOR_QUAD (regno))
24621           {
24622             output_operand_lossage ("invalid operand for code '%c'", code);
24623             return;
24624           }
24625
24626         if (GET_MODE_SIZE (mode) == 16)
24627           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24628                                   + (code == 'f' ? 1 : 0));
24629         else
24630           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24631                                   + (code == 'f' ? 1 : 0));
24632       }
24633       return;
24634
24635     /* Print a VFPv3 floating-point constant, represented as an integer
24636        index.  */
24637     case 'G':
24638       {
24639         int index = vfp3_const_double_index (x);
24640         gcc_assert (index != -1);
24641         fprintf (stream, "%d", index);
24642       }
24643       return;
24644
24645     /* Print bits representing opcode features for Neon.
24646
24647        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
24648        and polynomials as unsigned.
24649
24650        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24651
24652        Bit 2 is 1 for rounding functions, 0 otherwise.  */
24653
24654     /* Identify the type as 's', 'u', 'p' or 'f'.  */
24655     case 'T':
24656       {
24657         HOST_WIDE_INT bits = INTVAL (x);
24658         fputc ("uspf"[bits & 3], stream);
24659       }
24660       return;
24661
24662     /* Likewise, but signed and unsigned integers are both 'i'.  */
24663     case 'F':
24664       {
24665         HOST_WIDE_INT bits = INTVAL (x);
24666         fputc ("iipf"[bits & 3], stream);
24667       }
24668       return;
24669
24670     /* As for 'T', but emit 'u' instead of 'p'.  */
24671     case 't':
24672       {
24673         HOST_WIDE_INT bits = INTVAL (x);
24674         fputc ("usuf"[bits & 3], stream);
24675       }
24676       return;
24677
24678     /* Bit 2: rounding (vs none).  */
24679     case 'O':
24680       {
24681         HOST_WIDE_INT bits = INTVAL (x);
24682         fputs ((bits & 4) != 0 ? "r" : "", stream);
24683       }
24684       return;
24685
24686     /* Memory operand for vld1/vst1 instruction.  */
24687     case 'A':
24688       {
24689         rtx addr;
24690         bool postinc = FALSE;
24691         rtx postinc_reg = NULL;
24692         unsigned align, memsize, align_bits;
24693
24694         gcc_assert (MEM_P (x));
24695         addr = XEXP (x, 0);
24696         if (GET_CODE (addr) == POST_INC)
24697           {
24698             postinc = 1;
24699             addr = XEXP (addr, 0);
24700           }
24701         if (GET_CODE (addr) == POST_MODIFY)
24702           {
24703             postinc_reg = XEXP( XEXP (addr, 1), 1);
24704             addr = XEXP (addr, 0);
24705           }
24706         asm_fprintf (stream, "[%r", REGNO (addr));
24707
24708         /* We know the alignment of this access, so we can emit a hint in the
24709            instruction (for some alignments) as an aid to the memory subsystem
24710            of the target.  */
24711         align = MEM_ALIGN (x) >> 3;
24712         memsize = MEM_SIZE (x);
24713
24714         /* Only certain alignment specifiers are supported by the hardware.  */
24715         if (memsize == 32 && (align % 32) == 0)
24716           align_bits = 256;
24717         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24718           align_bits = 128;
24719         else if (memsize >= 8 && (align % 8) == 0)
24720           align_bits = 64;
24721         else
24722           align_bits = 0;
24723
24724         if (align_bits != 0)
24725           asm_fprintf (stream, ":%d", align_bits);
24726
24727         asm_fprintf (stream, "]");
24728
24729         if (postinc)
24730           fputs("!", stream);
24731         if (postinc_reg)
24732           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24733       }
24734       return;
24735
24736     /* To print the memory operand with "Ux" or "Uj" constraint.  Based on the
24737        rtx_code the memory operands output looks like following.
24738        1. [Rn], #+/-<imm>
24739        2. [Rn, #+/-<imm>]!
24740        3. [Rn, #+/-<imm>]
24741        4. [Rn].  */
24742     case 'E':
24743       {
24744         rtx addr;
24745         rtx postinc_reg = NULL;
24746         unsigned inc_val = 0;
24747         enum rtx_code code;
24748
24749         gcc_assert (MEM_P (x));
24750         addr = XEXP (x, 0);
24751         code = GET_CODE (addr);
24752         if (code == POST_INC || code == POST_DEC || code == PRE_INC
24753             || code  == PRE_DEC)
24754           {
24755             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24756             inc_val = GET_MODE_SIZE (GET_MODE (x));
24757             if (code == POST_INC || code == POST_DEC)
24758               asm_fprintf (stream, "], #%s%d", (code == POST_INC)
24759                                                ? "" : "-", inc_val);
24760             else
24761               asm_fprintf (stream, ", #%s%d]!", (code == PRE_INC)
24762                                                 ? "" : "-", inc_val);
24763           }
24764         else if (code == POST_MODIFY || code == PRE_MODIFY)
24765           {
24766             asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24767             postinc_reg = XEXP (XEXP (addr, 1), 1);
24768             if (postinc_reg && CONST_INT_P (postinc_reg))
24769               {
24770                 if (code == POST_MODIFY)
24771                   asm_fprintf (stream, "], #%wd", INTVAL (postinc_reg));
24772                 else
24773                   asm_fprintf (stream, ", #%wd]!", INTVAL (postinc_reg));
24774               }
24775           }
24776         else if (code == PLUS)
24777           {
24778             rtx base = XEXP (addr, 0);
24779             rtx index = XEXP (addr, 1);
24780
24781             gcc_assert (REG_P (base) && CONST_INT_P (index));
24782
24783             HOST_WIDE_INT offset = INTVAL (index);
24784             asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24785           }
24786         else
24787           {
24788             gcc_assert (REG_P (addr));
24789             asm_fprintf (stream, "[%r]",REGNO (addr));
24790           }
24791       }
24792       return;
24793
24794     case 'C':
24795       {
24796         rtx addr;
24797
24798         gcc_assert (MEM_P (x));
24799         addr = XEXP (x, 0);
24800         gcc_assert (REG_P (addr));
24801         asm_fprintf (stream, "[%r]", REGNO (addr));
24802       }
24803       return;
24804
24805     /* Translate an S register number into a D register number and element index.  */
24806     case 'y':
24807       {
24808         machine_mode mode = GET_MODE (x);
24809         int regno;
24810
24811         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24812           {
24813             output_operand_lossage ("invalid operand for code '%c'", code);
24814             return;
24815           }
24816
24817         regno = REGNO (x);
24818         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24819           {
24820             output_operand_lossage ("invalid operand for code '%c'", code);
24821             return;
24822           }
24823
24824         regno = regno - FIRST_VFP_REGNUM;
24825         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24826       }
24827       return;
24828
24829     case 'v':
24830         gcc_assert (CONST_DOUBLE_P (x));
24831         int result;
24832         result = vfp3_const_double_for_fract_bits (x);
24833         if (result == 0)
24834           result = vfp3_const_double_for_bits (x);
24835         fprintf (stream, "#%d", result);
24836         return;
24837
24838     /* Register specifier for vld1.16/vst1.16.  Translate the S register
24839        number into a D register number and element index.  */
24840     case 'z':
24841       {
24842         machine_mode mode = GET_MODE (x);
24843         int regno;
24844
24845         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24846           {
24847             output_operand_lossage ("invalid operand for code '%c'", code);
24848             return;
24849           }
24850
24851         regno = REGNO (x);
24852         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24853           {
24854             output_operand_lossage ("invalid operand for code '%c'", code);
24855             return;
24856           }
24857
24858         regno = regno - FIRST_VFP_REGNUM;
24859         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24860       }
24861       return;
24862
24863     default:
24864       if (x == 0)
24865         {
24866           output_operand_lossage ("missing operand");
24867           return;
24868         }
24869
24870       switch (GET_CODE (x))
24871         {
24872         case REG:
24873           asm_fprintf (stream, "%r", REGNO (x));
24874           break;
24875
24876         case MEM:
24877           output_address (GET_MODE (x), XEXP (x, 0));
24878           break;
24879
24880         case CONST_DOUBLE:
24881           {
24882             char fpstr[20];
24883             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24884                               sizeof (fpstr), 0, 1);
24885             fprintf (stream, "#%s", fpstr);
24886           }
24887           break;
24888
24889         default:
24890           gcc_assert (GET_CODE (x) != NEG);
24891           fputc ('#', stream);
24892           if (GET_CODE (x) == HIGH)
24893             {
24894               fputs (":lower16:", stream);
24895               x = XEXP (x, 0);
24896             }
24897
24898           output_addr_const (stream, x);
24899           break;
24900         }
24901     }
24902 }
24903 \f
24904 /* Target hook for printing a memory address.  */
24905 static void
24906 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24907 {
24908   if (TARGET_32BIT)
24909     {
24910       int is_minus = GET_CODE (x) == MINUS;
24911
24912       if (REG_P (x))
24913         asm_fprintf (stream, "[%r]", REGNO (x));
24914       else if (GET_CODE (x) == PLUS || is_minus)
24915         {
24916           rtx base = XEXP (x, 0);
24917           rtx index = XEXP (x, 1);
24918           HOST_WIDE_INT offset = 0;
24919           if (!REG_P (base)
24920               || (REG_P (index) && REGNO (index) == SP_REGNUM))
24921             {
24922               /* Ensure that BASE is a register.  */
24923               /* (one of them must be).  */
24924               /* Also ensure the SP is not used as in index register.  */
24925               std::swap (base, index);
24926             }
24927           switch (GET_CODE (index))
24928             {
24929             case CONST_INT:
24930               offset = INTVAL (index);
24931               if (is_minus)
24932                 offset = -offset;
24933               asm_fprintf (stream, "[%r, #%wd]",
24934                            REGNO (base), offset);
24935               break;
24936
24937             case REG:
24938               asm_fprintf (stream, "[%r, %s%r]",
24939                            REGNO (base), is_minus ? "-" : "",
24940                            REGNO (index));
24941               break;
24942
24943             case MULT:
24944             case ASHIFTRT:
24945             case LSHIFTRT:
24946             case ASHIFT:
24947             case ROTATERT:
24948               {
24949                 asm_fprintf (stream, "[%r, %s%r",
24950                              REGNO (base), is_minus ? "-" : "",
24951                              REGNO (XEXP (index, 0)));
24952                 arm_print_operand (stream, index, 'S');
24953                 fputs ("]", stream);
24954                 break;
24955               }
24956
24957             default:
24958               gcc_unreachable ();
24959             }
24960         }
24961       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24962                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24963         {
24964           gcc_assert (REG_P (XEXP (x, 0)));
24965
24966           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24967             asm_fprintf (stream, "[%r, #%s%d]!",
24968                          REGNO (XEXP (x, 0)),
24969                          GET_CODE (x) == PRE_DEC ? "-" : "",
24970                          GET_MODE_SIZE (mode));
24971           else if (TARGET_HAVE_MVE
24972                    && VALID_MVE_STRUCT_MODE (mode))
24973             asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24974           else
24975             asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24976                          GET_CODE (x) == POST_DEC ? "-" : "",
24977                          GET_MODE_SIZE (mode));
24978         }
24979       else if (GET_CODE (x) == PRE_MODIFY)
24980         {
24981           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24982           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24983             asm_fprintf (stream, "#%wd]!",
24984                          INTVAL (XEXP (XEXP (x, 1), 1)));
24985           else
24986             asm_fprintf (stream, "%r]!",
24987                          REGNO (XEXP (XEXP (x, 1), 1)));
24988         }
24989       else if (GET_CODE (x) == POST_MODIFY)
24990         {
24991           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24992           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24993             asm_fprintf (stream, "#%wd",
24994                          INTVAL (XEXP (XEXP (x, 1), 1)));
24995           else
24996             asm_fprintf (stream, "%r",
24997                          REGNO (XEXP (XEXP (x, 1), 1)));
24998         }
24999       else output_addr_const (stream, x);
25000     }
25001   else
25002     {
25003       if (REG_P (x))
25004         asm_fprintf (stream, "[%r]", REGNO (x));
25005       else if (GET_CODE (x) == POST_INC)
25006         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
25007       else if (GET_CODE (x) == PLUS)
25008         {
25009           gcc_assert (REG_P (XEXP (x, 0)));
25010           if (CONST_INT_P (XEXP (x, 1)))
25011             asm_fprintf (stream, "[%r, #%wd]",
25012                          REGNO (XEXP (x, 0)),
25013                          INTVAL (XEXP (x, 1)));
25014           else
25015             asm_fprintf (stream, "[%r, %r]",
25016                          REGNO (XEXP (x, 0)),
25017                          REGNO (XEXP (x, 1)));
25018         }
25019       else
25020         output_addr_const (stream, x);
25021     }
25022 }
25023 \f
25024 /* Target hook for indicating whether a punctuation character for
25025    TARGET_PRINT_OPERAND is valid.  */
25026 static bool
25027 arm_print_operand_punct_valid_p (unsigned char code)
25028 {
25029   return (code == '@' || code == '|' || code == '.'
25030           || code == '(' || code == ')' || code == '#'
25031           || (TARGET_32BIT && (code == '?'))
25032           || (TARGET_THUMB2 && (code == '!'))
25033           || (TARGET_THUMB && (code == '_')));
25034 }
25035 \f
25036 /* Target hook for assembling integer objects.  The ARM version needs to
25037    handle word-sized values specially.  */
25038 static bool
25039 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
25040 {
25041   machine_mode mode;
25042
25043   if (size == UNITS_PER_WORD && aligned_p)
25044     {
25045       fputs ("\t.word\t", asm_out_file);
25046       output_addr_const (asm_out_file, x);
25047
25048       /* Mark symbols as position independent.  We only do this in the
25049          .text segment, not in the .data segment.  */
25050       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
25051           (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
25052         {
25053           /* See legitimize_pic_address for an explanation of the
25054              TARGET_VXWORKS_RTP check.  */
25055           /* References to weak symbols cannot be resolved locally:
25056              they may be overridden by a non-weak definition at link
25057              time.  */
25058           if (!arm_pic_data_is_text_relative
25059               || (SYMBOL_REF_P (x)
25060                   && (!SYMBOL_REF_LOCAL_P (x)
25061                       || (SYMBOL_REF_DECL (x)
25062                           ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
25063                       || (SYMBOL_REF_FUNCTION_P (x)
25064                           && !arm_fdpic_local_funcdesc_p (x)))))
25065             {
25066               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
25067                 fputs ("(GOTFUNCDESC)", asm_out_file);
25068               else
25069                 fputs ("(GOT)", asm_out_file);
25070             }
25071           else
25072             {
25073               if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
25074                 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
25075               else
25076                 {
25077                   bool is_readonly;
25078
25079                   if (!TARGET_FDPIC
25080                       || arm_is_segment_info_known (x, &is_readonly))
25081                     fputs ("(GOTOFF)", asm_out_file);
25082                   else
25083                     fputs ("(GOT)", asm_out_file);
25084                 }
25085             }
25086         }
25087
25088       /* For FDPIC we also have to mark symbol for .data section.  */
25089       if (TARGET_FDPIC
25090           && !making_const_table
25091           && SYMBOL_REF_P (x)
25092           && SYMBOL_REF_FUNCTION_P (x))
25093         fputs ("(FUNCDESC)", asm_out_file);
25094
25095       fputc ('\n', asm_out_file);
25096       return true;
25097     }
25098
25099   mode = GET_MODE (x);
25100
25101   if (arm_vector_mode_supported_p (mode))
25102     {
25103       int i, units;
25104
25105       gcc_assert (GET_CODE (x) == CONST_VECTOR);
25106
25107       units = CONST_VECTOR_NUNITS (x);
25108       size = GET_MODE_UNIT_SIZE (mode);
25109
25110       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
25111         for (i = 0; i < units; i++)
25112           {
25113             rtx elt = CONST_VECTOR_ELT (x, i);
25114             assemble_integer
25115               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
25116           }
25117       else
25118         for (i = 0; i < units; i++)
25119           {
25120             rtx elt = CONST_VECTOR_ELT (x, i);
25121             assemble_real
25122               (*CONST_DOUBLE_REAL_VALUE (elt),
25123                as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
25124                i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
25125           }
25126
25127       return true;
25128     }
25129
25130   return default_assemble_integer (x, size, aligned_p);
25131 }
25132
25133 static void
25134 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
25135 {
25136   section *s;
25137
25138   if (!TARGET_AAPCS_BASED)
25139     {
25140       (is_ctor ?
25141        default_named_section_asm_out_constructor
25142        : default_named_section_asm_out_destructor) (symbol, priority);
25143       return;
25144     }
25145
25146   /* Put these in the .init_array section, using a special relocation.  */
25147   if (priority != DEFAULT_INIT_PRIORITY)
25148     {
25149       char buf[18];
25150       sprintf (buf, "%s.%.5u",
25151                is_ctor ? ".init_array" : ".fini_array",
25152                priority);
25153       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
25154     }
25155   else if (is_ctor)
25156     s = ctors_section;
25157   else
25158     s = dtors_section;
25159
25160   switch_to_section (s);
25161   assemble_align (POINTER_SIZE);
25162   fputs ("\t.word\t", asm_out_file);
25163   output_addr_const (asm_out_file, symbol);
25164   fputs ("(target1)\n", asm_out_file);
25165 }
25166
25167 /* Add a function to the list of static constructors.  */
25168
25169 static void
25170 arm_elf_asm_constructor (rtx symbol, int priority)
25171 {
25172   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
25173 }
25174
25175 /* Add a function to the list of static destructors.  */
25176
25177 static void
25178 arm_elf_asm_destructor (rtx symbol, int priority)
25179 {
25180   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
25181 }
25182 \f
25183 /* A finite state machine takes care of noticing whether or not instructions
25184    can be conditionally executed, and thus decrease execution time and code
25185    size by deleting branch instructions.  The fsm is controlled by
25186    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
25187
25188 /* The state of the fsm controlling condition codes are:
25189    0: normal, do nothing special
25190    1: make ASM_OUTPUT_OPCODE not output this instruction
25191    2: make ASM_OUTPUT_OPCODE not output this instruction
25192    3: make instructions conditional
25193    4: make instructions conditional
25194
25195    State transitions (state->state by whom under condition):
25196    0 -> 1 final_prescan_insn if the `target' is a label
25197    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25198    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25199    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25200    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25201           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25202    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25203           (the target insn is arm_target_insn).
25204
25205    If the jump clobbers the conditions then we use states 2 and 4.
25206
25207    A similar thing can be done with conditional return insns.
25208
25209    XXX In case the `target' is an unconditional branch, this conditionalising
25210    of the instructions always reduces code size, but not always execution
25211    time.  But then, I want to reduce the code size to somewhere near what
25212    /bin/cc produces.  */
25213
25214 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25215    instructions.  When a COND_EXEC instruction is seen the subsequent
25216    instructions are scanned so that multiple conditional instructions can be
25217    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
25218    specify the length and true/false mask for the IT block.  These will be
25219    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
25220
25221 /* Returns the index of the ARM condition code string in
25222    `arm_condition_codes', or ARM_NV if the comparison is invalid.
25223    COMPARISON should be an rtx like `(eq (...) (...))'.  */
25224
25225 enum arm_cond_code
25226 maybe_get_arm_condition_code (rtx comparison)
25227 {
25228   machine_mode mode = GET_MODE (XEXP (comparison, 0));
25229   enum arm_cond_code code;
25230   enum rtx_code comp_code = GET_CODE (comparison);
25231
25232   if (GET_MODE_CLASS (mode) != MODE_CC)
25233     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
25234                            XEXP (comparison, 1));
25235
25236   switch (mode)
25237     {
25238     case E_CC_DNEmode: code = ARM_NE; goto dominance;
25239     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
25240     case E_CC_DGEmode: code = ARM_GE; goto dominance;
25241     case E_CC_DGTmode: code = ARM_GT; goto dominance;
25242     case E_CC_DLEmode: code = ARM_LE; goto dominance;
25243     case E_CC_DLTmode: code = ARM_LT; goto dominance;
25244     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
25245     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
25246     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
25247     case E_CC_DLTUmode: code = ARM_CC;
25248
25249     dominance:
25250       if (comp_code == EQ)
25251         return ARM_INVERSE_CONDITION_CODE (code);
25252       if (comp_code == NE)
25253         return code;
25254       return ARM_NV;
25255
25256     case E_CC_NZmode:
25257       switch (comp_code)
25258         {
25259         case NE: return ARM_NE;
25260         case EQ: return ARM_EQ;
25261         case GE: return ARM_PL;
25262         case LT: return ARM_MI;
25263         default: return ARM_NV;
25264         }
25265
25266     case E_CC_Zmode:
25267       switch (comp_code)
25268         {
25269         case NE: return ARM_NE;
25270         case EQ: return ARM_EQ;
25271         default: return ARM_NV;
25272         }
25273
25274     case E_CC_Nmode:
25275       switch (comp_code)
25276         {
25277         case NE: return ARM_MI;
25278         case EQ: return ARM_PL;
25279         default: return ARM_NV;
25280         }
25281
25282     case E_CCFPEmode:
25283     case E_CCFPmode:
25284       /* We can handle all cases except UNEQ and LTGT.  */
25285       switch (comp_code)
25286         {
25287         case GE: return ARM_GE;
25288         case GT: return ARM_GT;
25289         case LE: return ARM_LS;
25290         case LT: return ARM_MI;
25291         case NE: return ARM_NE;
25292         case EQ: return ARM_EQ;
25293         case ORDERED: return ARM_VC;
25294         case UNORDERED: return ARM_VS;
25295         case UNLT: return ARM_LT;
25296         case UNLE: return ARM_LE;
25297         case UNGT: return ARM_HI;
25298         case UNGE: return ARM_PL;
25299           /* UNEQ and LTGT do not have a representation.  */
25300         case UNEQ: /* Fall through.  */
25301         case LTGT: /* Fall through.  */
25302         default: return ARM_NV;
25303         }
25304
25305     case E_CC_SWPmode:
25306       switch (comp_code)
25307         {
25308         case NE: return ARM_NE;
25309         case EQ: return ARM_EQ;
25310         case GE: return ARM_LE;
25311         case GT: return ARM_LT;
25312         case LE: return ARM_GE;
25313         case LT: return ARM_GT;
25314         case GEU: return ARM_LS;
25315         case GTU: return ARM_CC;
25316         case LEU: return ARM_CS;
25317         case LTU: return ARM_HI;
25318         default: return ARM_NV;
25319         }
25320
25321     case E_CC_Cmode:
25322       switch (comp_code)
25323         {
25324         case LTU: return ARM_CS;
25325         case GEU: return ARM_CC;
25326         default: return ARM_NV;
25327         }
25328
25329     case E_CC_NVmode:
25330       switch (comp_code)
25331         {
25332         case GE: return ARM_GE;
25333         case LT: return ARM_LT;
25334         default: return ARM_NV;
25335         }
25336
25337     case E_CC_Bmode:
25338       switch (comp_code)
25339         {
25340         case GEU: return ARM_CS;
25341         case LTU: return ARM_CC;
25342         default: return ARM_NV;
25343         }
25344
25345     case E_CC_Vmode:
25346       switch (comp_code)
25347         {
25348         case NE: return ARM_VS;
25349         case EQ: return ARM_VC;
25350         default: return ARM_NV;
25351         }
25352
25353     case E_CC_ADCmode:
25354       switch (comp_code)
25355         {
25356         case GEU: return ARM_CS;
25357         case LTU: return ARM_CC;
25358         default: return ARM_NV;
25359         }
25360
25361     case E_CCmode:
25362     case E_CC_RSBmode:
25363       switch (comp_code)
25364         {
25365         case NE: return ARM_NE;
25366         case EQ: return ARM_EQ;
25367         case GE: return ARM_GE;
25368         case GT: return ARM_GT;
25369         case LE: return ARM_LE;
25370         case LT: return ARM_LT;
25371         case GEU: return ARM_CS;
25372         case GTU: return ARM_HI;
25373         case LEU: return ARM_LS;
25374         case LTU: return ARM_CC;
25375         default: return ARM_NV;
25376         }
25377
25378     default: gcc_unreachable ();
25379     }
25380 }
25381
25382 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
25383 static enum arm_cond_code
25384 get_arm_condition_code (rtx comparison)
25385 {
25386   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25387   gcc_assert (code != ARM_NV);
25388   return code;
25389 }
25390
25391 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
25392    code registers when not targetting Thumb1.  The VFP condition register
25393    only exists when generating hard-float code.  */
25394 static bool
25395 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25396 {
25397   if (!TARGET_32BIT)
25398     return false;
25399
25400   *p1 = CC_REGNUM;
25401   *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25402   return true;
25403 }
25404
25405 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25406    instructions.  */
25407 void
25408 thumb2_final_prescan_insn (rtx_insn *insn)
25409 {
25410   rtx_insn *first_insn = insn;
25411   rtx body = PATTERN (insn);
25412   rtx predicate;
25413   enum arm_cond_code code;
25414   int n;
25415   int mask;
25416   int max;
25417
25418   /* max_insns_skipped in the tune was already taken into account in the
25419      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
25420      just emit the IT blocks as we can.  It does not make sense to split
25421      the IT blocks.  */
25422   max = MAX_INSN_PER_IT_BLOCK;
25423
25424   /* Remove the previous insn from the count of insns to be output.  */
25425   if (arm_condexec_count)
25426       arm_condexec_count--;
25427
25428   /* Nothing to do if we are already inside a conditional block.  */
25429   if (arm_condexec_count)
25430     return;
25431
25432   if (GET_CODE (body) != COND_EXEC)
25433     return;
25434
25435   /* Conditional jumps are implemented directly.  */
25436   if (JUMP_P (insn))
25437     return;
25438
25439   predicate = COND_EXEC_TEST (body);
25440   arm_current_cc = get_arm_condition_code (predicate);
25441
25442   n = get_attr_ce_count (insn);
25443   arm_condexec_count = 1;
25444   arm_condexec_mask = (1 << n) - 1;
25445   arm_condexec_masklen = n;
25446   /* See if subsequent instructions can be combined into the same block.  */
25447   for (;;)
25448     {
25449       insn = next_nonnote_insn (insn);
25450
25451       /* Jumping into the middle of an IT block is illegal, so a label or
25452          barrier terminates the block.  */
25453       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25454         break;
25455
25456       body = PATTERN (insn);
25457       /* USE and CLOBBER aren't really insns, so just skip them.  */
25458       if (GET_CODE (body) == USE
25459           || GET_CODE (body) == CLOBBER)
25460         continue;
25461
25462       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
25463       if (GET_CODE (body) != COND_EXEC)
25464         break;
25465       /* Maximum number of conditionally executed instructions in a block.  */
25466       n = get_attr_ce_count (insn);
25467       if (arm_condexec_masklen + n > max)
25468         break;
25469
25470       predicate = COND_EXEC_TEST (body);
25471       code = get_arm_condition_code (predicate);
25472       mask = (1 << n) - 1;
25473       if (arm_current_cc == code)
25474         arm_condexec_mask |= (mask << arm_condexec_masklen);
25475       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25476         break;
25477
25478       arm_condexec_count++;
25479       arm_condexec_masklen += n;
25480
25481       /* A jump must be the last instruction in a conditional block.  */
25482       if (JUMP_P (insn))
25483         break;
25484     }
25485   /* Restore recog_data (getting the attributes of other insns can
25486      destroy this array, but final.cc assumes that it remains intact
25487      across this call).  */
25488   extract_constrain_insn_cached (first_insn);
25489 }
25490
25491 void
25492 arm_final_prescan_insn (rtx_insn *insn)
25493 {
25494   /* BODY will hold the body of INSN.  */
25495   rtx body = PATTERN (insn);
25496
25497   /* This will be 1 if trying to repeat the trick, and things need to be
25498      reversed if it appears to fail.  */
25499   int reverse = 0;
25500
25501   /* If we start with a return insn, we only succeed if we find another one.  */
25502   int seeking_return = 0;
25503   enum rtx_code return_code = UNKNOWN;
25504
25505   /* START_INSN will hold the insn from where we start looking.  This is the
25506      first insn after the following code_label if REVERSE is true.  */
25507   rtx_insn *start_insn = insn;
25508
25509   /* If in state 4, check if the target branch is reached, in order to
25510      change back to state 0.  */
25511   if (arm_ccfsm_state == 4)
25512     {
25513       if (insn == arm_target_insn)
25514         {
25515           arm_target_insn = NULL;
25516           arm_ccfsm_state = 0;
25517         }
25518       return;
25519     }
25520
25521   /* If in state 3, it is possible to repeat the trick, if this insn is an
25522      unconditional branch to a label, and immediately following this branch
25523      is the previous target label which is only used once, and the label this
25524      branch jumps to is not too far off.  */
25525   if (arm_ccfsm_state == 3)
25526     {
25527       if (simplejump_p (insn))
25528         {
25529           start_insn = next_nonnote_insn (start_insn);
25530           if (BARRIER_P (start_insn))
25531             {
25532               /* XXX Isn't this always a barrier?  */
25533               start_insn = next_nonnote_insn (start_insn);
25534             }
25535           if (LABEL_P (start_insn)
25536               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25537               && LABEL_NUSES (start_insn) == 1)
25538             reverse = TRUE;
25539           else
25540             return;
25541         }
25542       else if (ANY_RETURN_P (body))
25543         {
25544           start_insn = next_nonnote_insn (start_insn);
25545           if (BARRIER_P (start_insn))
25546             start_insn = next_nonnote_insn (start_insn);
25547           if (LABEL_P (start_insn)
25548               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25549               && LABEL_NUSES (start_insn) == 1)
25550             {
25551               reverse = TRUE;
25552               seeking_return = 1;
25553               return_code = GET_CODE (body);
25554             }
25555           else
25556             return;
25557         }
25558       else
25559         return;
25560     }
25561
25562   gcc_assert (!arm_ccfsm_state || reverse);
25563   if (!JUMP_P (insn))
25564     return;
25565
25566   /* This jump might be paralleled with a clobber of the condition codes
25567      the jump should always come first */
25568   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25569     body = XVECEXP (body, 0, 0);
25570
25571   if (reverse
25572       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25573           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25574     {
25575       int insns_skipped;
25576       int fail = FALSE, succeed = FALSE;
25577       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
25578       int then_not_else = TRUE;
25579       rtx_insn *this_insn = start_insn;
25580       rtx label = 0;
25581
25582       /* Register the insn jumped to.  */
25583       if (reverse)
25584         {
25585           if (!seeking_return)
25586             label = XEXP (SET_SRC (body), 0);
25587         }
25588       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25589         label = XEXP (XEXP (SET_SRC (body), 1), 0);
25590       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25591         {
25592           label = XEXP (XEXP (SET_SRC (body), 2), 0);
25593           then_not_else = FALSE;
25594         }
25595       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25596         {
25597           seeking_return = 1;
25598           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25599         }
25600       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25601         {
25602           seeking_return = 1;
25603           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25604           then_not_else = FALSE;
25605         }
25606       else
25607         gcc_unreachable ();
25608
25609       /* See how many insns this branch skips, and what kind of insns.  If all
25610          insns are okay, and the label or unconditional branch to the same
25611          label is not too far away, succeed.  */
25612       for (insns_skipped = 0;
25613            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25614         {
25615           rtx scanbody;
25616
25617           this_insn = next_nonnote_insn (this_insn);
25618           if (!this_insn)
25619             break;
25620
25621           switch (GET_CODE (this_insn))
25622             {
25623             case CODE_LABEL:
25624               /* Succeed if it is the target label, otherwise fail since
25625                  control falls in from somewhere else.  */
25626               if (this_insn == label)
25627                 {
25628                   arm_ccfsm_state = 1;
25629                   succeed = TRUE;
25630                 }
25631               else
25632                 fail = TRUE;
25633               break;
25634
25635             case BARRIER:
25636               /* Succeed if the following insn is the target label.
25637                  Otherwise fail.
25638                  If return insns are used then the last insn in a function
25639                  will be a barrier.  */
25640               this_insn = next_nonnote_insn (this_insn);
25641               if (this_insn && this_insn == label)
25642                 {
25643                   arm_ccfsm_state = 1;
25644                   succeed = TRUE;
25645                 }
25646               else
25647                 fail = TRUE;
25648               break;
25649
25650             case CALL_INSN:
25651               /* The AAPCS says that conditional calls should not be
25652                  used since they make interworking inefficient (the
25653                  linker can't transform BL<cond> into BLX).  That's
25654                  only a problem if the machine has BLX.  */
25655               if (arm_arch5t)
25656                 {
25657                   fail = TRUE;
25658                   break;
25659                 }
25660
25661               /* Succeed if the following insn is the target label, or
25662                  if the following two insns are a barrier and the
25663                  target label.  */
25664               this_insn = next_nonnote_insn (this_insn);
25665               if (this_insn && BARRIER_P (this_insn))
25666                 this_insn = next_nonnote_insn (this_insn);
25667
25668               if (this_insn && this_insn == label
25669                   && insns_skipped < max_insns_skipped)
25670                 {
25671                   arm_ccfsm_state = 1;
25672                   succeed = TRUE;
25673                 }
25674               else
25675                 fail = TRUE;
25676               break;
25677
25678             case JUMP_INSN:
25679               /* If this is an unconditional branch to the same label, succeed.
25680                  If it is to another label, do nothing.  If it is conditional,
25681                  fail.  */
25682               /* XXX Probably, the tests for SET and the PC are
25683                  unnecessary.  */
25684
25685               scanbody = PATTERN (this_insn);
25686               if (GET_CODE (scanbody) == SET
25687                   && GET_CODE (SET_DEST (scanbody)) == PC)
25688                 {
25689                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25690                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25691                     {
25692                       arm_ccfsm_state = 2;
25693                       succeed = TRUE;
25694                     }
25695                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25696                     fail = TRUE;
25697                 }
25698               /* Fail if a conditional return is undesirable (e.g. on a
25699                  StrongARM), but still allow this if optimizing for size.  */
25700               else if (GET_CODE (scanbody) == return_code
25701                        && !use_return_insn (TRUE, NULL)
25702                        && !optimize_size)
25703                 fail = TRUE;
25704               else if (GET_CODE (scanbody) == return_code)
25705                 {
25706                   arm_ccfsm_state = 2;
25707                   succeed = TRUE;
25708                 }
25709               else if (GET_CODE (scanbody) == PARALLEL)
25710                 {
25711                   switch (get_attr_conds (this_insn))
25712                     {
25713                     case CONDS_NOCOND:
25714                       break;
25715                     default:
25716                       fail = TRUE;
25717                       break;
25718                     }
25719                 }
25720               else
25721                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
25722
25723               break;
25724
25725             case INSN:
25726               /* Check the instruction is explicitly marked as predicable.
25727                  Instructions using or affecting the condition codes are not.  */
25728               scanbody = PATTERN (this_insn);
25729               if (!(GET_CODE (scanbody) == SET
25730                     || GET_CODE (scanbody) == PARALLEL)
25731                   || get_attr_predicable (this_insn) != PREDICABLE_YES
25732                   || get_attr_conds (this_insn) != CONDS_NOCOND)
25733                 fail = TRUE;
25734               break;
25735
25736             default:
25737               break;
25738             }
25739         }
25740       if (succeed)
25741         {
25742           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25743             arm_target_label = CODE_LABEL_NUMBER (label);
25744           else
25745             {
25746               gcc_assert (seeking_return || arm_ccfsm_state == 2);
25747
25748               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25749                 {
25750                   this_insn = next_nonnote_insn (this_insn);
25751                   gcc_assert (!this_insn
25752                               || (!BARRIER_P (this_insn)
25753                                   && !LABEL_P (this_insn)));
25754                 }
25755               if (!this_insn)
25756                 {
25757                   /* Oh, dear! we ran off the end.. give up.  */
25758                   extract_constrain_insn_cached (insn);
25759                   arm_ccfsm_state = 0;
25760                   arm_target_insn = NULL;
25761                   return;
25762                 }
25763               arm_target_insn = this_insn;
25764             }
25765
25766           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25767              what it was.  */
25768           if (!reverse)
25769             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25770
25771           if (reverse || then_not_else)
25772             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25773         }
25774
25775       /* Restore recog_data (getting the attributes of other insns can
25776          destroy this array, but final.cc assumes that it remains intact
25777          across this call.  */
25778       extract_constrain_insn_cached (insn);
25779     }
25780 }
25781
25782 /* Output IT instructions.  */
25783 void
25784 thumb2_asm_output_opcode (FILE * stream)
25785 {
25786   char buff[5];
25787   int n;
25788
25789   if (arm_condexec_mask)
25790     {
25791       for (n = 0; n < arm_condexec_masklen; n++)
25792         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25793       buff[n] = 0;
25794       asm_fprintf(stream, "i%s\t%s\n\t", buff,
25795                   arm_condition_codes[arm_current_cc]);
25796       arm_condexec_mask = 0;
25797     }
25798 }
25799
25800 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
25801    UNITS_PER_WORD bytes wide.  */
25802 static unsigned int
25803 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25804 {
25805   if (IS_VPR_REGNUM (regno))
25806     return CEIL (GET_MODE_SIZE (mode), 2);
25807
25808   if (TARGET_32BIT
25809       && regno > PC_REGNUM
25810       && regno != FRAME_POINTER_REGNUM
25811       && regno != ARG_POINTER_REGNUM
25812       && !IS_VFP_REGNUM (regno))
25813     return 1;
25814
25815   return ARM_NUM_REGS (mode);
25816 }
25817
25818 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
25819 static bool
25820 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25821 {
25822   if (GET_MODE_CLASS (mode) == MODE_CC)
25823     return (regno == CC_REGNUM
25824             || (TARGET_VFP_BASE
25825                 && regno == VFPCC_REGNUM));
25826
25827   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25828     return false;
25829
25830   if (IS_VPR_REGNUM (regno))
25831     return VALID_MVE_PRED_MODE (mode);
25832
25833   if (TARGET_THUMB1)
25834     /* For the Thumb we only allow values bigger than SImode in
25835        registers 0 - 6, so that there is always a second low
25836        register available to hold the upper part of the value.
25837        We probably we ought to ensure that the register is the
25838        start of an even numbered register pair.  */
25839     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25840
25841   if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25842     {
25843       if (mode == DFmode || mode == DImode)
25844         return VFP_REGNO_OK_FOR_DOUBLE (regno);
25845
25846       if (mode == HFmode || mode == BFmode || mode == HImode
25847           || mode == SFmode || mode == SImode)
25848         return VFP_REGNO_OK_FOR_SINGLE (regno);
25849
25850       if (TARGET_NEON)
25851         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25852                || (VALID_NEON_QREG_MODE (mode)
25853                    && NEON_REGNO_OK_FOR_QUAD (regno))
25854                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25855                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25856                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25857                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25858                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25859      if (TARGET_HAVE_MVE)
25860        return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25861                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25862                || (mode == V2x16QImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25863                || (mode == V2x8HImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25864                || (mode == V2x4SImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25865                || (mode == V2x8HFmode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25866                || (mode == V2x4SFmode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25867                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
25868                || (mode == V4x16QImode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
25869                || (mode == V4x8HImode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
25870                || (mode == V4x4SImode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
25871                || (mode == V4x8HFmode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
25872                || (mode == V4x4SFmode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25873
25874       return false;
25875     }
25876
25877   if (TARGET_REALLY_IWMMXT)
25878     {
25879       if (IS_IWMMXT_GR_REGNUM (regno))
25880         return mode == SImode;
25881
25882       if (IS_IWMMXT_REGNUM (regno))
25883         return VALID_IWMMXT_REG_MODE (mode);
25884     }
25885
25886   /* We allow almost any value to be stored in the general registers.
25887      Restrict doubleword quantities to even register pairs in ARM state
25888      so that we can use ldrd. The same restriction applies for MVE
25889      in order to support Armv8.1-M Mainline instructions.
25890      Do not allow very large Neon structure  opaque modes in general
25891      registers; they would use too many.  */
25892   if (regno <= LAST_ARM_REGNUM)
25893     {
25894       if (ARM_NUM_REGS (mode) > 4)
25895         return false;
25896
25897       if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25898         return true;
25899
25900       return !((TARGET_LDRD || TARGET_CDE)
25901                && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25902     }
25903
25904   if (regno == FRAME_POINTER_REGNUM
25905       || regno == ARG_POINTER_REGNUM)
25906     /* We only allow integers in the fake hard registers.  */
25907     return GET_MODE_CLASS (mode) == MODE_INT;
25908
25909   return false;
25910 }
25911
25912 /* Implement TARGET_MODES_TIEABLE_P.  */
25913
25914 static bool
25915 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25916 {
25917   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25918     return true;
25919
25920   if (TARGET_HAVE_MVE
25921       && (VALID_MVE_PRED_MODE (mode1) && VALID_MVE_PRED_MODE (mode2)))
25922     return true;
25923
25924   /* We specifically want to allow elements of "structure" modes to
25925      be tieable to the structure.  This more general condition allows
25926      other rarer situations too.  */
25927   if ((TARGET_NEON
25928        && (VALID_NEON_DREG_MODE (mode1)
25929            || VALID_NEON_QREG_MODE (mode1)
25930            || VALID_NEON_STRUCT_MODE (mode1))
25931        && (VALID_NEON_DREG_MODE (mode2)
25932            || VALID_NEON_QREG_MODE (mode2)
25933            || VALID_NEON_STRUCT_MODE (mode2)))
25934       || (TARGET_HAVE_MVE
25935           && (VALID_MVE_MODE (mode1)
25936               || VALID_MVE_STRUCT_MODE (mode1))
25937           && (VALID_MVE_MODE (mode2)
25938               || VALID_MVE_STRUCT_MODE (mode2))))
25939     return true;
25940
25941   return false;
25942 }
25943
25944 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25945    not used in arm mode.  */
25946
25947 enum reg_class
25948 arm_regno_class (int regno)
25949 {
25950   if (regno == PC_REGNUM)
25951     return NO_REGS;
25952
25953   if (IS_VPR_REGNUM (regno))
25954     return VPR_REG;
25955
25956   if (IS_PAC_REGNUM (regno))
25957     return PAC_REG;
25958
25959   if (TARGET_THUMB1)
25960     {
25961       if (regno == STACK_POINTER_REGNUM)
25962         return STACK_REG;
25963       if (regno == CC_REGNUM)
25964         return CC_REG;
25965       if (regno < 8)
25966         return LO_REGS;
25967       return HI_REGS;
25968     }
25969
25970   if (TARGET_THUMB2 && regno < 8)
25971     return LO_REGS;
25972
25973   if (   regno <= LAST_ARM_REGNUM
25974       || regno == FRAME_POINTER_REGNUM
25975       || regno == ARG_POINTER_REGNUM)
25976     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25977
25978   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25979     return TARGET_THUMB2 ? CC_REG : NO_REGS;
25980
25981   if (IS_VFP_REGNUM (regno))
25982     {
25983       if (regno <= D7_VFP_REGNUM)
25984         return VFP_D0_D7_REGS;
25985       else if (regno <= LAST_LO_VFP_REGNUM)
25986         return VFP_LO_REGS;
25987       else
25988         return VFP_HI_REGS;
25989     }
25990
25991   if (IS_IWMMXT_REGNUM (regno))
25992     return IWMMXT_REGS;
25993
25994   if (IS_IWMMXT_GR_REGNUM (regno))
25995     return IWMMXT_GR_REGS;
25996
25997   return NO_REGS;
25998 }
25999
26000 /* Handle a special case when computing the offset
26001    of an argument from the frame pointer.  */
26002 int
26003 arm_debugger_arg_offset (int value, rtx addr)
26004 {
26005   rtx_insn *insn;
26006
26007   /* We are only interested if dbxout_parms() failed to compute the offset.  */
26008   if (value != 0)
26009     return 0;
26010
26011   /* We can only cope with the case where the address is held in a register.  */
26012   if (!REG_P (addr))
26013     return 0;
26014
26015   /* If we are using the frame pointer to point at the argument, then
26016      an offset of 0 is correct.  */
26017   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
26018     return 0;
26019
26020   /* If we are using the stack pointer to point at the
26021      argument, then an offset of 0 is correct.  */
26022   /* ??? Check this is consistent with thumb2 frame layout.  */
26023   if ((TARGET_THUMB || !frame_pointer_needed)
26024       && REGNO (addr) == SP_REGNUM)
26025     return 0;
26026
26027   /* Oh dear.  The argument is pointed to by a register rather
26028      than being held in a register, or being stored at a known
26029      offset from the frame pointer.  Since GDB only understands
26030      those two kinds of argument we must translate the address
26031      held in the register into an offset from the frame pointer.
26032      We do this by searching through the insns for the function
26033      looking to see where this register gets its value.  If the
26034      register is initialized from the frame pointer plus an offset
26035      then we are in luck and we can continue, otherwise we give up.
26036
26037      This code is exercised by producing debugging information
26038      for a function with arguments like this:
26039
26040            double func (double a, double b, int c, double d) {return d;}
26041
26042      Without this code the stab for parameter 'd' will be set to
26043      an offset of 0 from the frame pointer, rather than 8.  */
26044
26045   /* The if() statement says:
26046
26047      If the insn is a normal instruction
26048      and if the insn is setting the value in a register
26049      and if the register being set is the register holding the address of the argument
26050      and if the address is computing by an addition
26051      that involves adding to a register
26052      which is the frame pointer
26053      a constant integer
26054
26055      then...  */
26056
26057   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26058     {
26059       if (   NONJUMP_INSN_P (insn)
26060           && GET_CODE (PATTERN (insn)) == SET
26061           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
26062           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
26063           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
26064           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
26065           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
26066              )
26067         {
26068           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
26069
26070           break;
26071         }
26072     }
26073
26074   if (value == 0)
26075     {
26076       debug_rtx (addr);
26077       warning (0, "unable to compute real location of stacked parameter");
26078       value = 8; /* XXX magic hack */
26079     }
26080
26081   return value;
26082 }
26083 \f
26084 /* Implement TARGET_PROMOTED_TYPE.  */
26085
26086 static tree
26087 arm_promoted_type (const_tree t)
26088 {
26089   if (SCALAR_FLOAT_TYPE_P (t)
26090       && TYPE_PRECISION (t) == 16
26091       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
26092     return float_type_node;
26093   return NULL_TREE;
26094 }
26095
26096 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
26097    This simply adds HFmode as a supported mode; even though we don't
26098    implement arithmetic on this type directly, it's supported by
26099    optabs conversions, much the way the double-word arithmetic is
26100    special-cased in the default hook.  */
26101
26102 static bool
26103 arm_scalar_mode_supported_p (scalar_mode mode)
26104 {
26105   if (mode == HFmode)
26106     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
26107   else if (ALL_FIXED_POINT_MODE_P (mode))
26108     return true;
26109   else
26110     return default_scalar_mode_supported_p (mode);
26111 }
26112
26113 /* Set the value of FLT_EVAL_METHOD.
26114    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
26115
26116     0: evaluate all operations and constants, whose semantic type has at
26117        most the range and precision of type float, to the range and
26118        precision of float; evaluate all other operations and constants to
26119        the range and precision of the semantic type;
26120
26121     N, where _FloatN is a supported interchange floating type
26122        evaluate all operations and constants, whose semantic type has at
26123        most the range and precision of _FloatN type, to the range and
26124        precision of the _FloatN type; evaluate all other operations and
26125        constants to the range and precision of the semantic type;
26126
26127    If we have the ARMv8.2-A extensions then we support _Float16 in native
26128    precision, so we should set this to 16.  Otherwise, we support the type,
26129    but want to evaluate expressions in float precision, so set this to
26130    0.  */
26131
26132 static enum flt_eval_method
26133 arm_excess_precision (enum excess_precision_type type)
26134 {
26135   switch (type)
26136     {
26137       case EXCESS_PRECISION_TYPE_FAST:
26138       case EXCESS_PRECISION_TYPE_STANDARD:
26139         /* We can calculate either in 16-bit range and precision or
26140            32-bit range and precision.  Make that decision based on whether
26141            we have native support for the ARMv8.2-A 16-bit floating-point
26142            instructions or not.  */
26143         return (TARGET_VFP_FP16INST
26144                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26145                 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
26146       case EXCESS_PRECISION_TYPE_IMPLICIT:
26147       case EXCESS_PRECISION_TYPE_FLOAT16:
26148         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
26149       default:
26150         gcc_unreachable ();
26151     }
26152   return FLT_EVAL_METHOD_UNPREDICTABLE;
26153 }
26154
26155
26156 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
26157    _Float16 if we are using anything other than ieee format for 16-bit
26158    floating point.  Otherwise, punt to the default implementation.  */
26159 static opt_scalar_float_mode
26160 arm_floatn_mode (int n, bool extended)
26161 {
26162   if (!extended && n == 16)
26163     {
26164       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
26165         return HFmode;
26166       return opt_scalar_float_mode ();
26167     }
26168
26169   return default_floatn_mode (n, extended);
26170 }
26171
26172
26173 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26174    not to early-clobber SRC registers in the process.
26175
26176    We assume that the operands described by SRC and DEST represent a
26177    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
26178    number of components into which the copy has been decomposed.  */
26179 void
26180 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
26181 {
26182   unsigned int i;
26183
26184   if (!reg_overlap_mentioned_p (operands[0], operands[1])
26185       || REGNO (operands[0]) < REGNO (operands[1]))
26186     {
26187       for (i = 0; i < count; i++)
26188         {
26189           operands[2 * i] = dest[i];
26190           operands[2 * i + 1] = src[i];
26191         }
26192     }
26193   else
26194     {
26195       for (i = 0; i < count; i++)
26196         {
26197           operands[2 * i] = dest[count - i - 1];
26198           operands[2 * i + 1] = src[count - i - 1];
26199         }
26200     }
26201 }
26202
26203 /* Split operands into moves from op[1] + op[2] into op[0].  */
26204
26205 void
26206 neon_split_vcombine (rtx operands[3])
26207 {
26208   unsigned int dest = REGNO (operands[0]);
26209   unsigned int src1 = REGNO (operands[1]);
26210   unsigned int src2 = REGNO (operands[2]);
26211   machine_mode halfmode = GET_MODE (operands[1]);
26212   unsigned int halfregs = REG_NREGS (operands[1]);
26213   rtx destlo, desthi;
26214
26215   if (src1 == dest && src2 == dest + halfregs)
26216     {
26217       /* No-op move.  Can't split to nothing; emit something.  */
26218       emit_note (NOTE_INSN_DELETED);
26219       return;
26220     }
26221
26222   /* Preserve register attributes for variable tracking.  */
26223   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
26224   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
26225                                GET_MODE_SIZE (halfmode));
26226
26227   /* Special case of reversed high/low parts.  Use VSWP.  */
26228   if (src2 == dest && src1 == dest + halfregs)
26229     {
26230       rtx x = gen_rtx_SET (destlo, operands[1]);
26231       rtx y = gen_rtx_SET (desthi, operands[2]);
26232       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
26233       return;
26234     }
26235
26236   if (!reg_overlap_mentioned_p (operands[2], destlo))
26237     {
26238       /* Try to avoid unnecessary moves if part of the result
26239          is in the right place already.  */
26240       if (src1 != dest)
26241         emit_move_insn (destlo, operands[1]);
26242       if (src2 != dest + halfregs)
26243         emit_move_insn (desthi, operands[2]);
26244     }
26245   else
26246     {
26247       if (src2 != dest + halfregs)
26248         emit_move_insn (desthi, operands[2]);
26249       if (src1 != dest)
26250         emit_move_insn (destlo, operands[1]);
26251     }
26252 }
26253 \f
26254 /* Return the number (counting from 0) of
26255    the least significant set bit in MASK.  */
26256
26257 inline static int
26258 number_of_first_bit_set (unsigned mask)
26259 {
26260   return ctz_hwi (mask);
26261 }
26262
26263 /* Like emit_multi_reg_push, but allowing for a different set of
26264    registers to be described as saved.  MASK is the set of registers
26265    to be saved; REAL_REGS is the set of registers to be described as
26266    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
26267
26268 static rtx_insn *
26269 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26270 {
26271   unsigned long regno;
26272   rtx par[10], tmp, reg;
26273   rtx_insn *insn;
26274   int i, j;
26275
26276   /* Build the parallel of the registers actually being stored.  */
26277   for (i = 0; mask; ++i, mask &= mask - 1)
26278     {
26279       regno = ctz_hwi (mask);
26280       reg = gen_rtx_REG (SImode, regno);
26281
26282       if (i == 0)
26283         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26284       else
26285         tmp = gen_rtx_USE (VOIDmode, reg);
26286
26287       par[i] = tmp;
26288     }
26289
26290   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26291   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26292   tmp = gen_frame_mem (BLKmode, tmp);
26293   tmp = gen_rtx_SET (tmp, par[0]);
26294   par[0] = tmp;
26295
26296   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26297   insn = emit_insn (tmp);
26298
26299   /* Always build the stack adjustment note for unwind info.  */
26300   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26301   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26302   par[0] = tmp;
26303
26304   /* Build the parallel of the registers recorded as saved for unwind.  */
26305   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26306     {
26307       regno = ctz_hwi (real_regs);
26308       reg = gen_rtx_REG (SImode, regno);
26309
26310       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26311       tmp = gen_frame_mem (SImode, tmp);
26312       tmp = gen_rtx_SET (tmp, reg);
26313       RTX_FRAME_RELATED_P (tmp) = 1;
26314       par[j + 1] = tmp;
26315     }
26316
26317   if (j == 0)
26318     tmp = par[0];
26319   else
26320     {
26321       RTX_FRAME_RELATED_P (par[0]) = 1;
26322       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26323     }
26324
26325   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26326
26327   return insn;
26328 }
26329
26330 /* Emit code to push or pop registers to or from the stack.  F is the
26331    assembly file.  MASK is the registers to pop.  */
26332 static void
26333 thumb_pop (FILE *f, unsigned long mask)
26334 {
26335   int regno;
26336   int lo_mask = mask & 0xFF;
26337
26338   gcc_assert (mask);
26339
26340   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26341     {
26342       /* Special case.  Do not generate a POP PC statement here, do it in
26343          thumb_exit() */
26344       thumb_exit (f, -1);
26345       return;
26346     }
26347
26348   fprintf (f, "\tpop\t{");
26349
26350   /* Look at the low registers first.  */
26351   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26352     {
26353       if (lo_mask & 1)
26354         {
26355           asm_fprintf (f, "%r", regno);
26356
26357           if ((lo_mask & ~1) != 0)
26358             fprintf (f, ", ");
26359         }
26360     }
26361
26362   if (mask & (1 << PC_REGNUM))
26363     {
26364       /* Catch popping the PC.  */
26365       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26366           || IS_CMSE_ENTRY (arm_current_func_type ()))
26367         {
26368           /* The PC is never poped directly, instead
26369              it is popped into r3 and then BX is used.  */
26370           fprintf (f, "}\n");
26371
26372           thumb_exit (f, -1);
26373
26374           return;
26375         }
26376       else
26377         {
26378           if (mask & 0xFF)
26379             fprintf (f, ", ");
26380
26381           asm_fprintf (f, "%r", PC_REGNUM);
26382         }
26383     }
26384
26385   fprintf (f, "}\n");
26386 }
26387
26388 /* Generate code to return from a thumb function.
26389    If 'reg_containing_return_addr' is -1, then the return address is
26390    actually on the stack, at the stack pointer.
26391
26392    Note: do not forget to update length attribute of corresponding insn pattern
26393    when changing assembly output (eg. length attribute of epilogue_insns when
26394    updating Armv8-M Baseline Security Extensions register clearing
26395    sequences).  */
26396 static void
26397 thumb_exit (FILE *f, int reg_containing_return_addr)
26398 {
26399   unsigned regs_available_for_popping;
26400   unsigned regs_to_pop;
26401   int pops_needed;
26402   unsigned available;
26403   unsigned required;
26404   machine_mode mode;
26405   int size;
26406   int restore_a4 = FALSE;
26407
26408   /* Compute the registers we need to pop.  */
26409   regs_to_pop = 0;
26410   pops_needed = 0;
26411
26412   if (reg_containing_return_addr == -1)
26413     {
26414       regs_to_pop |= 1 << LR_REGNUM;
26415       ++pops_needed;
26416     }
26417
26418   if (TARGET_BACKTRACE)
26419     {
26420       /* Restore the (ARM) frame pointer and stack pointer.  */
26421       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26422       pops_needed += 2;
26423     }
26424
26425   /* If there is nothing to pop then just emit the BX instruction and
26426      return.  */
26427   if (pops_needed == 0)
26428     {
26429       if (crtl->calls_eh_return)
26430         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26431
26432       if (IS_CMSE_ENTRY (arm_current_func_type ()))
26433         {
26434           /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26435              emitted by cmse_nonsecure_entry_clear_before_return ().  */
26436           if (!TARGET_HAVE_FPCXT_CMSE)
26437             asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26438                          reg_containing_return_addr);
26439           asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26440         }
26441       else
26442         asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26443       return;
26444     }
26445   /* Otherwise if we are not supporting interworking and we have not created
26446      a backtrace structure and the function was not entered in ARM mode then
26447      just pop the return address straight into the PC.  */
26448   else if (!TARGET_INTERWORK
26449            && !TARGET_BACKTRACE
26450            && !is_called_in_ARM_mode (current_function_decl)
26451            && !crtl->calls_eh_return
26452            && !IS_CMSE_ENTRY (arm_current_func_type ()))
26453     {
26454       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26455       return;
26456     }
26457
26458   /* Find out how many of the (return) argument registers we can corrupt.  */
26459   regs_available_for_popping = 0;
26460
26461   /* If returning via __builtin_eh_return, the bottom three registers
26462      all contain information needed for the return.  */
26463   if (crtl->calls_eh_return)
26464     size = 12;
26465   else
26466     {
26467       /* If we can deduce the registers used from the function's
26468          return value.  This is more reliable that examining
26469          df_regs_ever_live_p () because that will be set if the register is
26470          ever used in the function, not just if the register is used
26471          to hold a return value.  */
26472
26473       if (crtl->return_rtx != 0)
26474         mode = GET_MODE (crtl->return_rtx);
26475       else
26476         mode = DECL_MODE (DECL_RESULT (current_function_decl));
26477
26478       size = GET_MODE_SIZE (mode);
26479
26480       if (size == 0)
26481         {
26482           /* In a void function we can use any argument register.
26483              In a function that returns a structure on the stack
26484              we can use the second and third argument registers.  */
26485           if (mode == VOIDmode)
26486             regs_available_for_popping =
26487               (1 << ARG_REGISTER (1))
26488               | (1 << ARG_REGISTER (2))
26489               | (1 << ARG_REGISTER (3));
26490           else
26491             regs_available_for_popping =
26492               (1 << ARG_REGISTER (2))
26493               | (1 << ARG_REGISTER (3));
26494         }
26495       else if (size <= 4)
26496         regs_available_for_popping =
26497           (1 << ARG_REGISTER (2))
26498           | (1 << ARG_REGISTER (3));
26499       else if (size <= 8)
26500         regs_available_for_popping =
26501           (1 << ARG_REGISTER (3));
26502     }
26503
26504   /* Match registers to be popped with registers into which we pop them.  */
26505   for (available = regs_available_for_popping,
26506        required  = regs_to_pop;
26507        required != 0 && available != 0;
26508        available &= ~(available & - available),
26509        required  &= ~(required  & - required))
26510     -- pops_needed;
26511
26512   /* If we have any popping registers left over, remove them.  */
26513   if (available > 0)
26514     regs_available_for_popping &= ~available;
26515
26516   /* Otherwise if we need another popping register we can use
26517      the fourth argument register.  */
26518   else if (pops_needed)
26519     {
26520       /* If we have not found any free argument registers and
26521          reg a4 contains the return address, we must move it.  */
26522       if (regs_available_for_popping == 0
26523           && reg_containing_return_addr == LAST_ARG_REGNUM)
26524         {
26525           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26526           reg_containing_return_addr = LR_REGNUM;
26527         }
26528       else if (size > 12)
26529         {
26530           /* Register a4 is being used to hold part of the return value,
26531              but we have dire need of a free, low register.  */
26532           restore_a4 = TRUE;
26533
26534           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26535         }
26536
26537       if (reg_containing_return_addr != LAST_ARG_REGNUM)
26538         {
26539           /* The fourth argument register is available.  */
26540           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26541
26542           --pops_needed;
26543         }
26544     }
26545
26546   /* Pop as many registers as we can.  */
26547   thumb_pop (f, regs_available_for_popping);
26548
26549   /* Process the registers we popped.  */
26550   if (reg_containing_return_addr == -1)
26551     {
26552       /* The return address was popped into the lowest numbered register.  */
26553       regs_to_pop &= ~(1 << LR_REGNUM);
26554
26555       reg_containing_return_addr =
26556         number_of_first_bit_set (regs_available_for_popping);
26557
26558       /* Remove this register for the mask of available registers, so that
26559          the return address will not be corrupted by further pops.  */
26560       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26561     }
26562
26563   /* If we popped other registers then handle them here.  */
26564   if (regs_available_for_popping)
26565     {
26566       int frame_pointer;
26567
26568       /* Work out which register currently contains the frame pointer.  */
26569       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26570
26571       /* Move it into the correct place.  */
26572       asm_fprintf (f, "\tmov\t%r, %r\n",
26573                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26574
26575       /* (Temporarily) remove it from the mask of popped registers.  */
26576       regs_available_for_popping &= ~(1 << frame_pointer);
26577       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26578
26579       if (regs_available_for_popping)
26580         {
26581           int stack_pointer;
26582
26583           /* We popped the stack pointer as well,
26584              find the register that contains it.  */
26585           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26586
26587           /* Move it into the stack register.  */
26588           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26589
26590           /* At this point we have popped all necessary registers, so
26591              do not worry about restoring regs_available_for_popping
26592              to its correct value:
26593
26594              assert (pops_needed == 0)
26595              assert (regs_available_for_popping == (1 << frame_pointer))
26596              assert (regs_to_pop == (1 << STACK_POINTER))  */
26597         }
26598       else
26599         {
26600           /* Since we have just move the popped value into the frame
26601              pointer, the popping register is available for reuse, and
26602              we know that we still have the stack pointer left to pop.  */
26603           regs_available_for_popping |= (1 << frame_pointer);
26604         }
26605     }
26606
26607   /* If we still have registers left on the stack, but we no longer have
26608      any registers into which we can pop them, then we must move the return
26609      address into the link register and make available the register that
26610      contained it.  */
26611   if (regs_available_for_popping == 0 && pops_needed > 0)
26612     {
26613       regs_available_for_popping |= 1 << reg_containing_return_addr;
26614
26615       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26616                    reg_containing_return_addr);
26617
26618       reg_containing_return_addr = LR_REGNUM;
26619     }
26620
26621   /* If we have registers left on the stack then pop some more.
26622      We know that at most we will want to pop FP and SP.  */
26623   if (pops_needed > 0)
26624     {
26625       int  popped_into;
26626       int  move_to;
26627
26628       thumb_pop (f, regs_available_for_popping);
26629
26630       /* We have popped either FP or SP.
26631          Move whichever one it is into the correct register.  */
26632       popped_into = number_of_first_bit_set (regs_available_for_popping);
26633       move_to     = number_of_first_bit_set (regs_to_pop);
26634
26635       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26636       --pops_needed;
26637     }
26638
26639   /* If we still have not popped everything then we must have only
26640      had one register available to us and we are now popping the SP.  */
26641   if (pops_needed > 0)
26642     {
26643       int  popped_into;
26644
26645       thumb_pop (f, regs_available_for_popping);
26646
26647       popped_into = number_of_first_bit_set (regs_available_for_popping);
26648
26649       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26650       /*
26651         assert (regs_to_pop == (1 << STACK_POINTER))
26652         assert (pops_needed == 1)
26653       */
26654     }
26655
26656   /* If necessary restore the a4 register.  */
26657   if (restore_a4)
26658     {
26659       if (reg_containing_return_addr != LR_REGNUM)
26660         {
26661           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26662           reg_containing_return_addr = LR_REGNUM;
26663         }
26664
26665       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26666     }
26667
26668   if (crtl->calls_eh_return)
26669     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26670
26671   /* Return to caller.  */
26672   if (IS_CMSE_ENTRY (arm_current_func_type ()))
26673     {
26674       /* This is for the cases where LR is not being used to contain the return
26675          address.  It may therefore contain information that we might not want
26676          to leak, hence it must be cleared.  The value in R0 will never be a
26677          secret at this point, so it is safe to use it, see the clearing code
26678          in cmse_nonsecure_entry_clear_before_return ().  */
26679       if (reg_containing_return_addr != LR_REGNUM)
26680         asm_fprintf (f, "\tmov\tlr, r0\n");
26681
26682       /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26683          by cmse_nonsecure_entry_clear_before_return ().  */
26684       if (!TARGET_HAVE_FPCXT_CMSE)
26685         asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26686       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26687     }
26688   else
26689     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26690 }
26691 \f
26692 /* Scan INSN just before assembler is output for it.
26693    For Thumb-1, we track the status of the condition codes; this
26694    information is used in the cbranchsi4_insn pattern.  */
26695 void
26696 thumb1_final_prescan_insn (rtx_insn *insn)
26697 {
26698   if (flag_print_asm_name)
26699     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26700                  INSN_ADDRESSES (INSN_UID (insn)));
26701   /* Don't overwrite the previous setter when we get to a cbranch.  */
26702   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26703     {
26704       enum attr_conds conds;
26705
26706       if (cfun->machine->thumb1_cc_insn)
26707         {
26708           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26709               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26710             CC_STATUS_INIT;
26711         }
26712       conds = get_attr_conds (insn);
26713       if (conds == CONDS_SET)
26714         {
26715           rtx set = single_set (insn);
26716           cfun->machine->thumb1_cc_insn = insn;
26717           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26718           cfun->machine->thumb1_cc_op1 = const0_rtx;
26719           cfun->machine->thumb1_cc_mode = CC_NZmode;
26720           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26721             {
26722               rtx src1 = XEXP (SET_SRC (set), 1);
26723               if (src1 == const0_rtx)
26724                 cfun->machine->thumb1_cc_mode = CCmode;
26725             }
26726           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26727             {
26728               /* Record the src register operand instead of dest because
26729                  cprop_hardreg pass propagates src.  */
26730               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26731             }
26732         }
26733       else if (conds != CONDS_NOCOND)
26734         cfun->machine->thumb1_cc_insn = NULL_RTX;
26735     }
26736
26737     /* Check if unexpected far jump is used.  */
26738     if (cfun->machine->lr_save_eliminated
26739         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26740       internal_error("Unexpected thumb1 far jump");
26741 }
26742
26743 int
26744 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26745 {
26746   unsigned HOST_WIDE_INT mask = 0xff;
26747   int i;
26748
26749   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26750   if (val == 0) /* XXX */
26751     return 0;
26752
26753   for (i = 0; i < 25; i++)
26754     if ((val & (mask << i)) == val)
26755       return 1;
26756
26757   return 0;
26758 }
26759
26760 /* Returns nonzero if the current function contains,
26761    or might contain a far jump.  */
26762 static int
26763 thumb_far_jump_used_p (void)
26764 {
26765   rtx_insn *insn;
26766   bool far_jump = false;
26767   unsigned int func_size = 0;
26768
26769   /* If we have already decided that far jumps may be used,
26770      do not bother checking again, and always return true even if
26771      it turns out that they are not being used.  Once we have made
26772      the decision that far jumps are present (and that hence the link
26773      register will be pushed onto the stack) we cannot go back on it.  */
26774   if (cfun->machine->far_jump_used)
26775     return 1;
26776
26777   /* If this function is not being called from the prologue/epilogue
26778      generation code then it must be being called from the
26779      INITIAL_ELIMINATION_OFFSET macro.  */
26780   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26781     {
26782       /* In this case we know that we are being asked about the elimination
26783          of the arg pointer register.  If that register is not being used,
26784          then there are no arguments on the stack, and we do not have to
26785          worry that a far jump might force the prologue to push the link
26786          register, changing the stack offsets.  In this case we can just
26787          return false, since the presence of far jumps in the function will
26788          not affect stack offsets.
26789
26790          If the arg pointer is live (or if it was live, but has now been
26791          eliminated and so set to dead) then we do have to test to see if
26792          the function might contain a far jump.  This test can lead to some
26793          false negatives, since before reload is completed, then length of
26794          branch instructions is not known, so gcc defaults to returning their
26795          longest length, which in turn sets the far jump attribute to true.
26796
26797          A false negative will not result in bad code being generated, but it
26798          will result in a needless push and pop of the link register.  We
26799          hope that this does not occur too often.
26800
26801          If we need doubleword stack alignment this could affect the other
26802          elimination offsets so we can't risk getting it wrong.  */
26803       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26804         cfun->machine->arg_pointer_live = 1;
26805       else if (!cfun->machine->arg_pointer_live)
26806         return 0;
26807     }
26808
26809   /* We should not change far_jump_used during or after reload, as there is
26810      no chance to change stack frame layout.  */
26811   if (reload_in_progress || reload_completed)
26812     return 0;
26813
26814   /* Check to see if the function contains a branch
26815      insn with the far jump attribute set.  */
26816   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26817     {
26818       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26819         {
26820           far_jump = true;
26821         }
26822       func_size += get_attr_length (insn);
26823     }
26824
26825   /* Attribute far_jump will always be true for thumb1 before
26826      shorten_branch pass.  So checking far_jump attribute before
26827      shorten_branch isn't much useful.
26828
26829      Following heuristic tries to estimate more accurately if a far jump
26830      may finally be used.  The heuristic is very conservative as there is
26831      no chance to roll-back the decision of not to use far jump.
26832
26833      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26834      2-byte insn is associated with a 4 byte constant pool.  Using
26835      function size 2048/3 as the threshold is conservative enough.  */
26836   if (far_jump)
26837     {
26838       if ((func_size * 3) >= 2048)
26839         {
26840           /* Record the fact that we have decided that
26841              the function does use far jumps.  */
26842           cfun->machine->far_jump_used = 1;
26843           return 1;
26844         }
26845     }
26846
26847   return 0;
26848 }
26849
26850 /* Return nonzero if FUNC must be entered in ARM mode.  */
26851 static bool
26852 is_called_in_ARM_mode (tree func)
26853 {
26854   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26855
26856   /* Ignore the problem about functions whose address is taken.  */
26857   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26858     return true;
26859
26860   return false;
26861 }
26862
26863 /* Given the stack offsets and register mask in OFFSETS, decide how
26864    many additional registers to push instead of subtracting a constant
26865    from SP.  For epilogues the principle is the same except we use pop.
26866    FOR_PROLOGUE indicates which we're generating.  */
26867 static int
26868 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26869 {
26870   HOST_WIDE_INT amount;
26871   unsigned long live_regs_mask = offsets->saved_regs_mask;
26872   /* Extract a mask of the ones we can give to the Thumb's push/pop
26873      instruction.  */
26874   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26875   /* Then count how many other high registers will need to be pushed.  */
26876   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26877   int n_free, reg_base, size;
26878
26879   if (!for_prologue && frame_pointer_needed)
26880     amount = offsets->locals_base - offsets->saved_regs;
26881   else
26882     amount = offsets->outgoing_args - offsets->saved_regs;
26883
26884   /* If the stack frame size is 512 exactly, we can save one load
26885      instruction, which should make this a win even when optimizing
26886      for speed.  */
26887   if (!optimize_size && amount != 512)
26888     return 0;
26889
26890   /* Can't do this if there are high registers to push.  */
26891   if (high_regs_pushed != 0)
26892     return 0;
26893
26894   /* Shouldn't do it in the prologue if no registers would normally
26895      be pushed at all.  In the epilogue, also allow it if we'll have
26896      a pop insn for the PC.  */
26897   if  (l_mask == 0
26898        && (for_prologue
26899            || TARGET_BACKTRACE
26900            || (live_regs_mask & 1 << LR_REGNUM) == 0
26901            || TARGET_INTERWORK
26902            || crtl->args.pretend_args_size != 0))
26903     return 0;
26904
26905   /* Don't do this if thumb_expand_prologue wants to emit instructions
26906      between the push and the stack frame allocation.  */
26907   if (for_prologue
26908       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26909           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26910     return 0;
26911
26912   reg_base = 0;
26913   n_free = 0;
26914   if (!for_prologue)
26915     {
26916       size = arm_size_return_regs ();
26917       reg_base = ARM_NUM_INTS (size);
26918       live_regs_mask >>= reg_base;
26919     }
26920
26921   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26922          && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26923     {
26924       live_regs_mask >>= 1;
26925       n_free++;
26926     }
26927
26928   if (n_free == 0)
26929     return 0;
26930   gcc_assert (amount / 4 * 4 == amount);
26931
26932   if (amount >= 512 && (amount - n_free * 4) < 512)
26933     return (amount - 508) / 4;
26934   if (amount <= n_free * 4)
26935     return amount / 4;
26936   return 0;
26937 }
26938
26939 /* The bits which aren't usefully expanded as rtl.  */
26940 const char *
26941 thumb1_unexpanded_epilogue (void)
26942 {
26943   arm_stack_offsets *offsets;
26944   int regno;
26945   unsigned long live_regs_mask = 0;
26946   int high_regs_pushed = 0;
26947   int extra_pop;
26948   int had_to_push_lr;
26949   int size;
26950
26951   if (cfun->machine->return_used_this_function != 0)
26952     return "";
26953
26954   if (IS_NAKED (arm_current_func_type ()))
26955     return "";
26956
26957   offsets = arm_get_frame_offsets ();
26958   live_regs_mask = offsets->saved_regs_mask;
26959   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26960
26961   /* If we can deduce the registers used from the function's return value.
26962      This is more reliable that examining df_regs_ever_live_p () because that
26963      will be set if the register is ever used in the function, not just if
26964      the register is used to hold a return value.  */
26965   size = arm_size_return_regs ();
26966
26967   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26968   if (extra_pop > 0)
26969     {
26970       unsigned long extra_mask = (1 << extra_pop) - 1;
26971       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26972     }
26973
26974   /* The prolog may have pushed some high registers to use as
26975      work registers.  e.g. the testsuite file:
26976      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26977      compiles to produce:
26978         push    {r4, r5, r6, r7, lr}
26979         mov     r7, r9
26980         mov     r6, r8
26981         push    {r6, r7}
26982      as part of the prolog.  We have to undo that pushing here.  */
26983
26984   if (high_regs_pushed)
26985     {
26986       unsigned long mask = live_regs_mask & 0xff;
26987       int next_hi_reg;
26988
26989       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26990
26991       if (mask == 0)
26992         /* Oh dear!  We have no low registers into which we can pop
26993            high registers!  */
26994         internal_error
26995           ("no low registers available for popping high registers");
26996
26997       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26998         if (live_regs_mask & (1 << next_hi_reg))
26999           break;
27000
27001       while (high_regs_pushed)
27002         {
27003           /* Find lo register(s) into which the high register(s) can
27004              be popped.  */
27005           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
27006             {
27007               if (mask & (1 << regno))
27008                 high_regs_pushed--;
27009               if (high_regs_pushed == 0)
27010                 break;
27011             }
27012
27013           if (high_regs_pushed == 0 && regno >= 0)
27014             mask &= ~((1 << regno) - 1);
27015
27016           /* Pop the values into the low register(s).  */
27017           thumb_pop (asm_out_file, mask);
27018
27019           /* Move the value(s) into the high registers.  */
27020           for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
27021             {
27022               if (mask & (1 << regno))
27023                 {
27024                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
27025                                regno);
27026
27027                   for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
27028                        next_hi_reg--)
27029                     if (live_regs_mask & (1 << next_hi_reg))
27030                       break;
27031                 }
27032             }
27033         }
27034       live_regs_mask &= ~0x0f00;
27035     }
27036
27037   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
27038   live_regs_mask &= 0xff;
27039
27040   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
27041     {
27042       /* Pop the return address into the PC.  */
27043       if (had_to_push_lr)
27044         live_regs_mask |= 1 << PC_REGNUM;
27045
27046       /* Either no argument registers were pushed or a backtrace
27047          structure was created which includes an adjusted stack
27048          pointer, so just pop everything.  */
27049       if (live_regs_mask)
27050         thumb_pop (asm_out_file, live_regs_mask);
27051
27052       /* We have either just popped the return address into the
27053          PC or it is was kept in LR for the entire function.
27054          Note that thumb_pop has already called thumb_exit if the
27055          PC was in the list.  */
27056       if (!had_to_push_lr)
27057         thumb_exit (asm_out_file, LR_REGNUM);
27058     }
27059   else
27060     {
27061       /* Pop everything but the return address.  */
27062       if (live_regs_mask)
27063         thumb_pop (asm_out_file, live_regs_mask);
27064
27065       if (had_to_push_lr)
27066         {
27067           if (size > 12)
27068             {
27069               /* We have no free low regs, so save one.  */
27070               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
27071                            LAST_ARG_REGNUM);
27072             }
27073
27074           /* Get the return address into a temporary register.  */
27075           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
27076
27077           if (size > 12)
27078             {
27079               /* Move the return address to lr.  */
27080               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
27081                            LAST_ARG_REGNUM);
27082               /* Restore the low register.  */
27083               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
27084                            IP_REGNUM);
27085               regno = LR_REGNUM;
27086             }
27087           else
27088             regno = LAST_ARG_REGNUM;
27089         }
27090       else
27091         regno = LR_REGNUM;
27092
27093       /* Remove the argument registers that were pushed onto the stack.  */
27094       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
27095                    SP_REGNUM, SP_REGNUM,
27096                    crtl->args.pretend_args_size);
27097
27098       thumb_exit (asm_out_file, regno);
27099     }
27100
27101   return "";
27102 }
27103
27104 /* Functions to save and restore machine-specific function data.  */
27105 static struct machine_function *
27106 arm_init_machine_status (void)
27107 {
27108   struct machine_function *machine;
27109   machine = ggc_cleared_alloc<machine_function> ();
27110
27111 #if ARM_FT_UNKNOWN != 0
27112   machine->func_type = ARM_FT_UNKNOWN;
27113 #endif
27114   machine->static_chain_stack_bytes = -1;
27115   machine->pacspval_needed = 0;
27116   return machine;
27117 }
27118
27119 /* Return an RTX indicating where the return address to the
27120    calling function can be found.  */
27121 rtx
27122 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27123 {
27124   if (count != 0)
27125     return NULL_RTX;
27126
27127   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27128 }
27129
27130 /* Do anything needed before RTL is emitted for each function.  */
27131 void
27132 arm_init_expanders (void)
27133 {
27134   /* Arrange to initialize and mark the machine per-function status.  */
27135   init_machine_status = arm_init_machine_status;
27136
27137   /* This is to stop the combine pass optimizing away the alignment
27138      adjustment of va_arg.  */
27139   /* ??? It is claimed that this should not be necessary.  */
27140   if (cfun)
27141     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27142 }
27143
27144 /* Check that FUNC is called with a different mode.  */
27145
27146 bool
27147 arm_change_mode_p (tree func)
27148 {
27149   if (TREE_CODE (func) != FUNCTION_DECL)
27150     return false;
27151
27152   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
27153
27154   if (!callee_tree)
27155     callee_tree = target_option_default_node;
27156
27157   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
27158   int flags = callee_opts->x_target_flags;
27159
27160   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
27161 }
27162
27163 /* Like arm_compute_initial_elimination offset.  Simpler because there
27164    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
27165    to point at the base of the local variables after static stack
27166    space for a function has been allocated.  */
27167
27168 HOST_WIDE_INT
27169 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27170 {
27171   arm_stack_offsets *offsets;
27172
27173   offsets = arm_get_frame_offsets ();
27174
27175   switch (from)
27176     {
27177     case ARG_POINTER_REGNUM:
27178       switch (to)
27179         {
27180         case STACK_POINTER_REGNUM:
27181           return offsets->outgoing_args - offsets->saved_args;
27182
27183         case FRAME_POINTER_REGNUM:
27184           return offsets->soft_frame - offsets->saved_args;
27185
27186         case ARM_HARD_FRAME_POINTER_REGNUM:
27187           return offsets->saved_regs - offsets->saved_args;
27188
27189         case THUMB_HARD_FRAME_POINTER_REGNUM:
27190           return offsets->locals_base - offsets->saved_args;
27191
27192         default:
27193           gcc_unreachable ();
27194         }
27195       break;
27196
27197     case FRAME_POINTER_REGNUM:
27198       switch (to)
27199         {
27200         case STACK_POINTER_REGNUM:
27201           return offsets->outgoing_args - offsets->soft_frame;
27202
27203         case ARM_HARD_FRAME_POINTER_REGNUM:
27204           return offsets->saved_regs - offsets->soft_frame;
27205
27206         case THUMB_HARD_FRAME_POINTER_REGNUM:
27207           return offsets->locals_base - offsets->soft_frame;
27208
27209         default:
27210           gcc_unreachable ();
27211         }
27212       break;
27213
27214     default:
27215       gcc_unreachable ();
27216     }
27217 }
27218
27219 /* Generate the function's prologue.  */
27220
27221 void
27222 thumb1_expand_prologue (void)
27223 {
27224   rtx_insn *insn;
27225
27226   HOST_WIDE_INT amount;
27227   HOST_WIDE_INT size;
27228   arm_stack_offsets *offsets;
27229   unsigned long func_type;
27230   int regno;
27231   unsigned long live_regs_mask;
27232   unsigned long l_mask;
27233   unsigned high_regs_pushed = 0;
27234   bool lr_needs_saving;
27235
27236   func_type = arm_current_func_type ();
27237
27238   /* Naked functions don't have prologues.  */
27239   if (IS_NAKED (func_type))
27240     {
27241       if (flag_stack_usage_info)
27242         current_function_static_stack_size = 0;
27243       return;
27244     }
27245
27246   if (IS_INTERRUPT (func_type))
27247     {
27248       error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27249       return;
27250     }
27251
27252   if (is_called_in_ARM_mode (current_function_decl))
27253     emit_insn (gen_prologue_thumb1_interwork ());
27254
27255   offsets = arm_get_frame_offsets ();
27256   live_regs_mask = offsets->saved_regs_mask;
27257   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
27258
27259   /* The AAPCS requires the callee to widen integral types narrower
27260      than 32 bits to the full width of the register; but when handling
27261      calls to non-secure space, we cannot trust the callee to have
27262      correctly done so.  So forcibly re-widen the result here.  */
27263   if (IS_CMSE_ENTRY (func_type))
27264     {
27265       function_args_iterator args_iter;
27266       CUMULATIVE_ARGS args_so_far_v;
27267       cumulative_args_t args_so_far;
27268       bool first_param = true;
27269       tree arg_type;
27270       tree fndecl = current_function_decl;
27271       tree fntype = TREE_TYPE (fndecl);
27272       arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
27273       args_so_far = pack_cumulative_args (&args_so_far_v);
27274       FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
27275         {
27276           rtx arg_rtx;
27277
27278           if (VOID_TYPE_P (arg_type))
27279             break;
27280
27281           function_arg_info arg (arg_type, /*named=*/true);
27282           if (!first_param)
27283             /* We should advance after processing the argument and pass
27284                the argument we're advancing past.  */
27285             arm_function_arg_advance (args_so_far, arg);
27286           first_param = false;
27287           arg_rtx = arm_function_arg (args_so_far, arg);
27288           gcc_assert (REG_P (arg_rtx));
27289           if ((TREE_CODE (arg_type) == INTEGER_TYPE
27290               || TREE_CODE (arg_type) == ENUMERAL_TYPE
27291               || TREE_CODE (arg_type) == BOOLEAN_TYPE)
27292               && known_lt (GET_MODE_SIZE (GET_MODE (arg_rtx)), 4))
27293             {
27294               rtx res_reg = gen_rtx_REG (SImode, REGNO (arg_rtx));
27295               if (TYPE_UNSIGNED (arg_type))
27296                 emit_set_insn (res_reg, gen_rtx_ZERO_EXTEND (SImode, arg_rtx));
27297               else
27298                 {
27299                   /* Signed-extension is a special case because of
27300                      thumb1_extendhisi2.  */
27301                   if (known_eq (GET_MODE_SIZE (GET_MODE (arg_rtx)), 2))
27302                     emit_insn (gen_thumb1_extendhisi2 (res_reg, arg_rtx));
27303                   else
27304                     emit_set_insn (res_reg,
27305                                    gen_rtx_SIGN_EXTEND (SImode, arg_rtx));
27306                 }
27307             }
27308         }
27309     }
27310
27311   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
27312   l_mask = live_regs_mask & 0x40ff;
27313   /* Then count how many other high registers will need to be pushed.  */
27314   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27315
27316   if (crtl->args.pretend_args_size)
27317     {
27318       rtx x = GEN_INT (-crtl->args.pretend_args_size);
27319
27320       if (cfun->machine->uses_anonymous_args)
27321         {
27322           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27323           unsigned long mask;
27324
27325           mask = 1ul << (LAST_ARG_REGNUM + 1);
27326           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27327
27328           insn = thumb1_emit_multi_reg_push (mask, 0);
27329         }
27330       else
27331         {
27332           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27333                                         stack_pointer_rtx, x));
27334         }
27335       RTX_FRAME_RELATED_P (insn) = 1;
27336     }
27337
27338   if (TARGET_BACKTRACE)
27339     {
27340       HOST_WIDE_INT offset = 0;
27341       unsigned work_register;
27342       rtx work_reg, x, arm_hfp_rtx;
27343
27344       /* We have been asked to create a stack backtrace structure.
27345          The code looks like this:
27346
27347          0   .align 2
27348          0   func:
27349          0     sub   SP, #16         Reserve space for 4 registers.
27350          2     push  {R7}            Push low registers.
27351          4     add   R7, SP, #20     Get the stack pointer before the push.
27352          6     str   R7, [SP, #8]    Store the stack pointer
27353                                         (before reserving the space).
27354          8     mov   R7, PC          Get hold of the start of this code + 12.
27355         10     str   R7, [SP, #16]   Store it.
27356         12     mov   R7, FP          Get hold of the current frame pointer.
27357         14     str   R7, [SP, #4]    Store it.
27358         16     mov   R7, LR          Get hold of the current return address.
27359         18     str   R7, [SP, #12]   Store it.
27360         20     add   R7, SP, #16     Point at the start of the
27361                                         backtrace structure.
27362         22     mov   FP, R7          Put this value into the frame pointer.  */
27363
27364       work_register = thumb_find_work_register (live_regs_mask);
27365       work_reg = gen_rtx_REG (SImode, work_register);
27366       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27367
27368       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27369                                     stack_pointer_rtx, GEN_INT (-16)));
27370       RTX_FRAME_RELATED_P (insn) = 1;
27371
27372       if (l_mask)
27373         {
27374           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27375           RTX_FRAME_RELATED_P (insn) = 1;
27376           lr_needs_saving = false;
27377
27378           offset = bit_count (l_mask) * UNITS_PER_WORD;
27379         }
27380
27381       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27382       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27383
27384       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27385       x = gen_frame_mem (SImode, x);
27386       emit_move_insn (x, work_reg);
27387
27388       /* Make sure that the instruction fetching the PC is in the right place
27389          to calculate "start of backtrace creation code + 12".  */
27390       /* ??? The stores using the common WORK_REG ought to be enough to
27391          prevent the scheduler from doing anything weird.  Failing that
27392          we could always move all of the following into an UNSPEC_VOLATILE.  */
27393       if (l_mask)
27394         {
27395           x = gen_rtx_REG (SImode, PC_REGNUM);
27396           emit_move_insn (work_reg, x);
27397
27398           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27399           x = gen_frame_mem (SImode, x);
27400           emit_move_insn (x, work_reg);
27401
27402           emit_move_insn (work_reg, arm_hfp_rtx);
27403
27404           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27405           x = gen_frame_mem (SImode, x);
27406           emit_move_insn (x, work_reg);
27407         }
27408       else
27409         {
27410           emit_move_insn (work_reg, arm_hfp_rtx);
27411
27412           x = plus_constant (Pmode, stack_pointer_rtx, offset);
27413           x = gen_frame_mem (SImode, x);
27414           emit_move_insn (x, work_reg);
27415
27416           x = gen_rtx_REG (SImode, PC_REGNUM);
27417           emit_move_insn (work_reg, x);
27418
27419           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27420           x = gen_frame_mem (SImode, x);
27421           emit_move_insn (x, work_reg);
27422         }
27423
27424       x = gen_rtx_REG (SImode, LR_REGNUM);
27425       emit_move_insn (work_reg, x);
27426
27427       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27428       x = gen_frame_mem (SImode, x);
27429       emit_move_insn (x, work_reg);
27430
27431       x = GEN_INT (offset + 12);
27432       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27433
27434       emit_move_insn (arm_hfp_rtx, work_reg);
27435     }
27436   /* Optimization:  If we are not pushing any low registers but we are going
27437      to push some high registers then delay our first push.  This will just
27438      be a push of LR and we can combine it with the push of the first high
27439      register.  */
27440   else if ((l_mask & 0xff) != 0
27441            || (high_regs_pushed == 0 && lr_needs_saving))
27442     {
27443       unsigned long mask = l_mask;
27444       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27445       insn = thumb1_emit_multi_reg_push (mask, mask);
27446       RTX_FRAME_RELATED_P (insn) = 1;
27447       lr_needs_saving = false;
27448     }
27449
27450   if (high_regs_pushed)
27451     {
27452       unsigned pushable_regs;
27453       unsigned next_hi_reg;
27454       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27455                                                  : crtl->args.info.nregs;
27456       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27457
27458       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27459         if (live_regs_mask & (1 << next_hi_reg))
27460           break;
27461
27462       /* Here we need to mask out registers used for passing arguments
27463          even if they can be pushed.  This is to avoid using them to
27464          stash the high registers.  Such kind of stash may clobber the
27465          use of arguments.  */
27466       pushable_regs = l_mask & (~arg_regs_mask);
27467       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27468
27469       /* Normally, LR can be used as a scratch register once it has been
27470          saved; but if the function examines its own return address then
27471          the value is still live and we need to avoid using it.  */
27472       bool return_addr_live
27473         = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27474                            LR_REGNUM);
27475
27476       if (lr_needs_saving || return_addr_live)
27477         pushable_regs &= ~(1 << LR_REGNUM);
27478
27479       if (pushable_regs == 0)
27480         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27481
27482       while (high_regs_pushed > 0)
27483         {
27484           unsigned long real_regs_mask = 0;
27485           unsigned long push_mask = 0;
27486
27487           for (regno = LR_REGNUM; regno >= 0; regno --)
27488             {
27489               if (pushable_regs & (1 << regno))
27490                 {
27491                   emit_move_insn (gen_rtx_REG (SImode, regno),
27492                                   gen_rtx_REG (SImode, next_hi_reg));
27493
27494                   high_regs_pushed --;
27495                   real_regs_mask |= (1 << next_hi_reg);
27496                   push_mask |= (1 << regno);
27497
27498                   if (high_regs_pushed)
27499                     {
27500                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27501                            next_hi_reg --)
27502                         if (live_regs_mask & (1 << next_hi_reg))
27503                           break;
27504                     }
27505                   else
27506                     break;
27507                 }
27508             }
27509
27510           /* If we had to find a work register and we have not yet
27511              saved the LR then add it to the list of regs to push.  */
27512           if (lr_needs_saving)
27513             {
27514               push_mask |= 1 << LR_REGNUM;
27515               real_regs_mask |= 1 << LR_REGNUM;
27516               lr_needs_saving = false;
27517               /* If the return address is not live at this point, we
27518                  can add LR to the list of registers that we can use
27519                  for pushes.  */
27520               if (!return_addr_live)
27521                 pushable_regs |= 1 << LR_REGNUM;
27522             }
27523
27524           insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27525           RTX_FRAME_RELATED_P (insn) = 1;
27526         }
27527     }
27528
27529   /* Load the pic register before setting the frame pointer,
27530      so we can use r7 as a temporary work register.  */
27531   if (flag_pic && arm_pic_register != INVALID_REGNUM)
27532     arm_load_pic_register (live_regs_mask, NULL_RTX);
27533
27534   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27535     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27536                     stack_pointer_rtx);
27537
27538   size = offsets->outgoing_args - offsets->saved_args;
27539   if (flag_stack_usage_info)
27540     current_function_static_stack_size = size;
27541
27542   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
27543   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27544        || flag_stack_clash_protection)
27545       && size)
27546     sorry ("%<-fstack-check=specific%> for Thumb-1");
27547
27548   amount = offsets->outgoing_args - offsets->saved_regs;
27549   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27550   if (amount)
27551     {
27552       if (amount < 512)
27553         {
27554           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27555                                         GEN_INT (- amount)));
27556           RTX_FRAME_RELATED_P (insn) = 1;
27557         }
27558       else
27559         {
27560           rtx reg, dwarf;
27561
27562           /* The stack decrement is too big for an immediate value in a single
27563              insn.  In theory we could issue multiple subtracts, but after
27564              three of them it becomes more space efficient to place the full
27565              value in the constant pool and load into a register.  (Also the
27566              ARM debugger really likes to see only one stack decrement per
27567              function).  So instead we look for a scratch register into which
27568              we can load the decrement, and then we subtract this from the
27569              stack pointer.  Unfortunately on the thumb the only available
27570              scratch registers are the argument registers, and we cannot use
27571              these as they may hold arguments to the function.  Instead we
27572              attempt to locate a call preserved register which is used by this
27573              function.  If we can find one, then we know that it will have
27574              been pushed at the start of the prologue and so we can corrupt
27575              it now.  */
27576           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27577             if (live_regs_mask & (1 << regno))
27578               break;
27579
27580           gcc_assert(regno <= LAST_LO_REGNUM);
27581
27582           reg = gen_rtx_REG (SImode, regno);
27583
27584           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27585
27586           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27587                                         stack_pointer_rtx, reg));
27588
27589           dwarf = gen_rtx_SET (stack_pointer_rtx,
27590                                plus_constant (Pmode, stack_pointer_rtx,
27591                                               -amount));
27592           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27593           RTX_FRAME_RELATED_P (insn) = 1;
27594         }
27595     }
27596
27597   if (frame_pointer_needed)
27598     thumb_set_frame_pointer (offsets);
27599
27600   /* If we are profiling, make sure no instructions are scheduled before
27601      the call to mcount.  Similarly if the user has requested no
27602      scheduling in the prolog.  Similarly if we want non-call exceptions
27603      using the EABI unwinder, to prevent faulting instructions from being
27604      swapped with a stack adjustment.  */
27605   if (crtl->profile || !TARGET_SCHED_PROLOG
27606       || (arm_except_unwind_info (&global_options) == UI_TARGET
27607           && cfun->can_throw_non_call_exceptions))
27608     emit_insn (gen_blockage ());
27609
27610   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27611   if (live_regs_mask & 0xff)
27612     cfun->machine->lr_save_eliminated = 0;
27613 }
27614
27615 /* Clear caller saved registers not used to pass return values and leaked
27616    condition flags before exiting a cmse_nonsecure_entry function.  */
27617
27618 void
27619 cmse_nonsecure_entry_clear_before_return (void)
27620 {
27621   bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27622   int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27623   uint32_t padding_bits_to_clear = 0;
27624   auto_sbitmap to_clear_bitmap (maxregno + 1);
27625   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27626   tree result_type;
27627
27628   bitmap_clear (to_clear_bitmap);
27629   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27630   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27631
27632   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27633      registers.  */
27634   if (clear_vfpregs)
27635     {
27636       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27637
27638       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27639
27640       if (!TARGET_HAVE_FPCXT_CMSE)
27641         {
27642           /* Make sure we don't clear the two scratch registers used to clear
27643              the relevant FPSCR bits in output_return_instruction.  */
27644           emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27645           bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27646           emit_use (gen_rtx_REG (SImode, 4));
27647           bitmap_clear_bit (to_clear_bitmap, 4);
27648         }
27649     }
27650
27651   /* If the user has defined registers to be caller saved, these are no longer
27652      restored by the function before returning and must thus be cleared for
27653      security purposes.  */
27654   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27655     {
27656       /* We do not touch registers that can be used to pass arguments as per
27657          the AAPCS, since these should never be made callee-saved by user
27658          options.  */
27659       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27660         continue;
27661       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27662         continue;
27663       if (!callee_saved_reg_p (regno)
27664           && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27665               || TARGET_HARD_FLOAT))
27666         bitmap_set_bit (to_clear_bitmap, regno);
27667     }
27668
27669   /* Make sure we do not clear the registers used to return the result in.  */
27670   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27671   if (!VOID_TYPE_P (result_type))
27672     {
27673       uint64_t to_clear_return_mask;
27674       result_rtl = arm_function_value (result_type, current_function_decl, 0);
27675
27676       /* No need to check that we return in registers, because we don't
27677          support returning on stack yet.  */
27678       gcc_assert (REG_P (result_rtl));
27679       to_clear_return_mask
27680         = compute_not_to_clear_mask (result_type, result_rtl, 0,
27681                                      &padding_bits_to_clear);
27682       if (to_clear_return_mask)
27683         {
27684           gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27685           for (regno = R0_REGNUM; regno <= maxregno; regno++)
27686             {
27687               if (to_clear_return_mask & (1ULL << regno))
27688                 bitmap_clear_bit (to_clear_bitmap, regno);
27689             }
27690         }
27691     }
27692
27693   if (padding_bits_to_clear != 0)
27694     {
27695       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27696       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27697
27698       /* Padding_bits_to_clear is not 0 so we know we are dealing with
27699          returning a composite type, which only uses r0.  Let's make sure that
27700          r1-r3 is cleared too.  */
27701       bitmap_clear (to_clear_arg_regs_bitmap);
27702       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27703       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27704     }
27705
27706   /* Clear full registers that leak before returning.  */
27707   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27708   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27709   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27710                         clearing_reg);
27711 }
27712
27713 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27714    POP instruction can be generated.  LR should be replaced by PC.  All
27715    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27716    all we really need to check here is if single register is to be
27717    returned, or multiple register return.  */
27718 void
27719 thumb2_expand_return (bool simple_return)
27720 {
27721   int i, num_regs;
27722   unsigned long saved_regs_mask;
27723   arm_stack_offsets *offsets;
27724
27725   offsets = arm_get_frame_offsets ();
27726   saved_regs_mask = offsets->saved_regs_mask;
27727
27728   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27729     if (saved_regs_mask & (1 << i))
27730       num_regs++;
27731
27732   if (!simple_return && saved_regs_mask)
27733     {
27734       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27735          functions or adapt code to handle according to ACLE.  This path should
27736          not be reachable for cmse_nonsecure_entry functions though we prefer
27737          to assert it for now to ensure that future code changes do not silently
27738          change this behavior.  */
27739       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27740       if (arm_current_function_pac_enabled_p ())
27741         {
27742           gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
27743           arm_emit_multi_reg_pop (saved_regs_mask);
27744           emit_insn (gen_aut_nop ());
27745           emit_jump_insn (simple_return_rtx);
27746         }
27747       else if (num_regs == 1)
27748         {
27749           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27750           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27751           rtx addr = gen_rtx_MEM (SImode,
27752                                   gen_rtx_POST_INC (SImode,
27753                                                     stack_pointer_rtx));
27754           set_mem_alias_set (addr, get_frame_alias_set ());
27755           XVECEXP (par, 0, 0) = ret_rtx;
27756           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27757           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27758           emit_jump_insn (par);
27759         }
27760       else
27761         {
27762           saved_regs_mask &= ~ (1 << LR_REGNUM);
27763           saved_regs_mask |=   (1 << PC_REGNUM);
27764           arm_emit_multi_reg_pop (saved_regs_mask);
27765         }
27766     }
27767   else
27768     {
27769       if (IS_CMSE_ENTRY (arm_current_func_type ()))
27770         cmse_nonsecure_entry_clear_before_return ();
27771       emit_jump_insn (simple_return_rtx);
27772     }
27773 }
27774
27775 void
27776 thumb1_expand_epilogue (void)
27777 {
27778   HOST_WIDE_INT amount;
27779   arm_stack_offsets *offsets;
27780   int regno;
27781
27782   /* Naked functions don't have prologues.  */
27783   if (IS_NAKED (arm_current_func_type ()))
27784     return;
27785
27786   offsets = arm_get_frame_offsets ();
27787   amount = offsets->outgoing_args - offsets->saved_regs;
27788
27789   if (frame_pointer_needed)
27790     {
27791       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27792       amount = offsets->locals_base - offsets->saved_regs;
27793     }
27794   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27795
27796   gcc_assert (amount >= 0);
27797   if (amount)
27798     {
27799       emit_insn (gen_blockage ());
27800
27801       if (amount < 512)
27802         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27803                                GEN_INT (amount)));
27804       else
27805         {
27806           /* r3 is always free in the epilogue.  */
27807           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27808
27809           emit_insn (gen_movsi (reg, GEN_INT (amount)));
27810           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27811         }
27812     }
27813
27814   /* Emit a USE (stack_pointer_rtx), so that
27815      the stack adjustment will not be deleted.  */
27816   emit_insn (gen_force_register_use (stack_pointer_rtx));
27817
27818   if (crtl->profile || !TARGET_SCHED_PROLOG)
27819     emit_insn (gen_blockage ());
27820
27821   /* Emit a clobber for each insn that will be restored in the epilogue,
27822      so that flow2 will get register lifetimes correct.  */
27823   for (regno = 0; regno < 13; regno++)
27824     if (reg_needs_saving_p (regno))
27825       emit_clobber (gen_rtx_REG (SImode, regno));
27826
27827   if (! df_regs_ever_live_p (LR_REGNUM))
27828     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27829
27830   /* Clear all caller-saved regs that are not used to return.  */
27831   if (IS_CMSE_ENTRY (arm_current_func_type ()))
27832     cmse_nonsecure_entry_clear_before_return ();
27833 }
27834
27835 /* Epilogue code for APCS frame.  */
27836 static void
27837 arm_expand_epilogue_apcs_frame (bool really_return)
27838 {
27839   unsigned long func_type;
27840   unsigned long saved_regs_mask;
27841   int num_regs = 0;
27842   int i;
27843   int floats_from_frame = 0;
27844   arm_stack_offsets *offsets;
27845
27846   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27847   func_type = arm_current_func_type ();
27848
27849   /* Get frame offsets for ARM.  */
27850   offsets = arm_get_frame_offsets ();
27851   saved_regs_mask = offsets->saved_regs_mask;
27852
27853   /* Find the offset of the floating-point save area in the frame.  */
27854   floats_from_frame
27855     = (offsets->saved_args
27856        + arm_compute_static_chain_stack_bytes ()
27857        - offsets->frame);
27858
27859   /* Compute how many core registers saved and how far away the floats are.  */
27860   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27861     if (saved_regs_mask & (1 << i))
27862       {
27863         num_regs++;
27864         floats_from_frame += 4;
27865       }
27866
27867   if (TARGET_VFP_BASE)
27868     {
27869       int start_reg;
27870       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27871
27872       /* The offset is from IP_REGNUM.  */
27873       int saved_size = arm_get_vfp_saved_size ();
27874       if (saved_size > 0)
27875         {
27876           rtx_insn *insn;
27877           floats_from_frame += saved_size;
27878           insn = emit_insn (gen_addsi3 (ip_rtx,
27879                                         hard_frame_pointer_rtx,
27880                                         GEN_INT (-floats_from_frame)));
27881           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27882                                        ip_rtx, hard_frame_pointer_rtx);
27883         }
27884
27885       /* Generate VFP register multi-pop.  */
27886       start_reg = FIRST_VFP_REGNUM;
27887
27888       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27889         /* Look for a case where a reg does not need restoring.  */
27890         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27891           {
27892             if (start_reg != i)
27893               arm_emit_vfp_multi_reg_pop (start_reg,
27894                                           (i - start_reg) / 2,
27895                                           gen_rtx_REG (SImode,
27896                                                        IP_REGNUM));
27897             start_reg = i + 2;
27898           }
27899
27900       /* Restore the remaining regs that we have discovered (or possibly
27901          even all of them, if the conditional in the for loop never
27902          fired).  */
27903       if (start_reg != i)
27904         arm_emit_vfp_multi_reg_pop (start_reg,
27905                                     (i - start_reg) / 2,
27906                                     gen_rtx_REG (SImode, IP_REGNUM));
27907     }
27908
27909   if (TARGET_IWMMXT)
27910     {
27911       /* The frame pointer is guaranteed to be non-double-word aligned, as
27912          it is set to double-word-aligned old_stack_pointer - 4.  */
27913       rtx_insn *insn;
27914       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27915
27916       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27917         if (reg_needs_saving_p (i))
27918           {
27919             rtx addr = gen_frame_mem (V2SImode,
27920                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27921                                                 - lrm_count * 4));
27922             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27923             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27924                                                gen_rtx_REG (V2SImode, i),
27925                                                NULL_RTX);
27926             lrm_count += 2;
27927           }
27928     }
27929
27930   /* saved_regs_mask should contain IP which contains old stack pointer
27931      at the time of activation creation.  Since SP and IP are adjacent registers,
27932      we can restore the value directly into SP.  */
27933   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27934   saved_regs_mask &= ~(1 << IP_REGNUM);
27935   saved_regs_mask |= (1 << SP_REGNUM);
27936
27937   /* There are two registers left in saved_regs_mask - LR and PC.  We
27938      only need to restore LR (the return address), but to
27939      save time we can load it directly into PC, unless we need a
27940      special function exit sequence, or we are not really returning.  */
27941   if (really_return
27942       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27943       && !crtl->calls_eh_return)
27944     /* Delete LR from the register mask, so that LR on
27945        the stack is loaded into the PC in the register mask.  */
27946     saved_regs_mask &= ~(1 << LR_REGNUM);
27947   else
27948     saved_regs_mask &= ~(1 << PC_REGNUM);
27949
27950   num_regs = bit_count (saved_regs_mask);
27951   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27952     {
27953       rtx_insn *insn;
27954       emit_insn (gen_blockage ());
27955       /* Unwind the stack to just below the saved registers.  */
27956       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27957                                     hard_frame_pointer_rtx,
27958                                     GEN_INT (- 4 * num_regs)));
27959
27960       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27961                                    stack_pointer_rtx, hard_frame_pointer_rtx);
27962     }
27963
27964   arm_emit_multi_reg_pop (saved_regs_mask);
27965
27966   if (IS_INTERRUPT (func_type))
27967     {
27968       /* Interrupt handlers will have pushed the
27969          IP onto the stack, so restore it now.  */
27970       rtx_insn *insn;
27971       rtx addr = gen_rtx_MEM (SImode,
27972                               gen_rtx_POST_INC (SImode,
27973                               stack_pointer_rtx));
27974       set_mem_alias_set (addr, get_frame_alias_set ());
27975       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27976       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27977                                          gen_rtx_REG (SImode, IP_REGNUM),
27978                                          NULL_RTX);
27979     }
27980
27981   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27982     return;
27983
27984   if (crtl->calls_eh_return)
27985     emit_insn (gen_addsi3 (stack_pointer_rtx,
27986                            stack_pointer_rtx,
27987                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27988
27989   if (IS_STACKALIGN (func_type))
27990     /* Restore the original stack pointer.  Before prologue, the stack was
27991        realigned and the original stack pointer saved in r0.  For details,
27992        see comment in arm_expand_prologue.  */
27993     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27994
27995   emit_jump_insn (simple_return_rtx);
27996 }
27997
27998 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27999    function is not a sibcall.  */
28000 void
28001 arm_expand_epilogue (bool really_return)
28002 {
28003   unsigned long func_type;
28004   unsigned long saved_regs_mask;
28005   int num_regs = 0;
28006   int i;
28007   int amount;
28008   arm_stack_offsets *offsets;
28009
28010   func_type = arm_current_func_type ();
28011
28012   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
28013      let output_return_instruction take care of instruction emission if any.  */
28014   if (IS_NAKED (func_type)
28015       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
28016     {
28017       if (really_return)
28018         emit_jump_insn (simple_return_rtx);
28019       return;
28020     }
28021
28022   /* If we are throwing an exception, then we really must be doing a
28023      return, so we can't tail-call.  */
28024   gcc_assert (!crtl->calls_eh_return || really_return);
28025
28026   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
28027     {
28028       arm_expand_epilogue_apcs_frame (really_return);
28029       return;
28030     }
28031
28032   /* Get frame offsets for ARM.  */
28033   offsets = arm_get_frame_offsets ();
28034   saved_regs_mask = offsets->saved_regs_mask;
28035   num_regs = bit_count (saved_regs_mask);
28036
28037   if (frame_pointer_needed)
28038     {
28039       rtx_insn *insn;
28040       /* Restore stack pointer if necessary.  */
28041       if (TARGET_ARM)
28042         {
28043           /* In ARM mode, frame pointer points to first saved register.
28044              Restore stack pointer to last saved register.  */
28045           amount = offsets->frame - offsets->saved_regs;
28046
28047           /* Force out any pending memory operations that reference stacked data
28048              before stack de-allocation occurs.  */
28049           emit_insn (gen_blockage ());
28050           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
28051                             hard_frame_pointer_rtx,
28052                             GEN_INT (amount)));
28053           arm_add_cfa_adjust_cfa_note (insn, amount,
28054                                        stack_pointer_rtx,
28055                                        hard_frame_pointer_rtx);
28056
28057           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
28058              deleted.  */
28059           emit_insn (gen_force_register_use (stack_pointer_rtx));
28060         }
28061       else
28062         {
28063           /* In Thumb-2 mode, the frame pointer points to the last saved
28064              register.  */
28065           amount = offsets->locals_base - offsets->saved_regs;
28066           if (amount)
28067             {
28068               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
28069                                 hard_frame_pointer_rtx,
28070                                 GEN_INT (amount)));
28071               arm_add_cfa_adjust_cfa_note (insn, amount,
28072                                            hard_frame_pointer_rtx,
28073                                            hard_frame_pointer_rtx);
28074             }
28075
28076           /* Force out any pending memory operations that reference stacked data
28077              before stack de-allocation occurs.  */
28078           emit_insn (gen_blockage ());
28079           insn = emit_insn (gen_movsi (stack_pointer_rtx,
28080                                        hard_frame_pointer_rtx));
28081           arm_add_cfa_adjust_cfa_note (insn, 0,
28082                                        stack_pointer_rtx,
28083                                        hard_frame_pointer_rtx);
28084           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
28085              deleted.  */
28086           emit_insn (gen_force_register_use (stack_pointer_rtx));
28087         }
28088     }
28089   else
28090     {
28091       /* Pop off outgoing args and local frame to adjust stack pointer to
28092          last saved register.  */
28093       amount = offsets->outgoing_args - offsets->saved_regs;
28094       if (amount)
28095         {
28096           rtx_insn *tmp;
28097           /* Force out any pending memory operations that reference stacked data
28098              before stack de-allocation occurs.  */
28099           emit_insn (gen_blockage ());
28100           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
28101                                        stack_pointer_rtx,
28102                                        GEN_INT (amount)));
28103           arm_add_cfa_adjust_cfa_note (tmp, amount,
28104                                        stack_pointer_rtx, stack_pointer_rtx);
28105           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
28106              not deleted.  */
28107           emit_insn (gen_force_register_use (stack_pointer_rtx));
28108         }
28109     }
28110
28111   if (TARGET_VFP_BASE)
28112     {
28113       /* Generate VFP register multi-pop.  */
28114       int end_reg = LAST_VFP_REGNUM + 1;
28115
28116       /* Scan the registers in reverse order.  We need to match
28117          any groupings made in the prologue and generate matching
28118          vldm operations.  The need to match groups is because,
28119          unlike pop, vldm can only do consecutive regs.  */
28120       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
28121         /* Look for a case where a reg does not need restoring.  */
28122         if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
28123           {
28124             /* Restore the regs discovered so far (from reg+2 to
28125                end_reg).  */
28126             if (end_reg > i + 2)
28127               arm_emit_vfp_multi_reg_pop (i + 2,
28128                                           (end_reg - (i + 2)) / 2,
28129                                           stack_pointer_rtx);
28130             end_reg = i;
28131           }
28132
28133       /* Restore the remaining regs that we have discovered (or possibly
28134          even all of them, if the conditional in the for loop never
28135          fired).  */
28136       if (end_reg > i + 2)
28137         arm_emit_vfp_multi_reg_pop (i + 2,
28138                                     (end_reg - (i + 2)) / 2,
28139                                     stack_pointer_rtx);
28140     }
28141
28142   if (TARGET_IWMMXT)
28143     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
28144       if (reg_needs_saving_p (i))
28145         {
28146           rtx_insn *insn;
28147           rtx addr = gen_rtx_MEM (V2SImode,
28148                                   gen_rtx_POST_INC (SImode,
28149                                                     stack_pointer_rtx));
28150           set_mem_alias_set (addr, get_frame_alias_set ());
28151           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
28152           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
28153                                              gen_rtx_REG (V2SImode, i),
28154                                              NULL_RTX);
28155           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
28156                                        stack_pointer_rtx, stack_pointer_rtx);
28157         }
28158
28159   if (saved_regs_mask)
28160     {
28161       rtx insn;
28162       bool return_in_pc = false;
28163
28164       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
28165           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
28166           && !IS_CMSE_ENTRY (func_type)
28167           && !IS_STACKALIGN (func_type)
28168           && really_return
28169           && crtl->args.pretend_args_size == 0
28170           && saved_regs_mask & (1 << LR_REGNUM)
28171           && !crtl->calls_eh_return
28172           && !arm_current_function_pac_enabled_p ())
28173         {
28174           saved_regs_mask &= ~(1 << LR_REGNUM);
28175           saved_regs_mask |= (1 << PC_REGNUM);
28176           return_in_pc = true;
28177         }
28178
28179       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
28180         {
28181           for (i = 0; i <= LAST_ARM_REGNUM; i++)
28182             if (saved_regs_mask & (1 << i))
28183               {
28184                 rtx addr = gen_rtx_MEM (SImode,
28185                                         gen_rtx_POST_INC (SImode,
28186                                                           stack_pointer_rtx));
28187                 set_mem_alias_set (addr, get_frame_alias_set ());
28188
28189                 if (i == PC_REGNUM)
28190                   {
28191                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
28192                     XVECEXP (insn, 0, 0) = ret_rtx;
28193                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
28194                                                         addr);
28195                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
28196                     insn = emit_jump_insn (insn);
28197                   }
28198                 else
28199                   {
28200                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
28201                                                  addr));
28202                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
28203                                                        gen_rtx_REG (SImode, i),
28204                                                        NULL_RTX);
28205                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
28206                                                  stack_pointer_rtx,
28207                                                  stack_pointer_rtx);
28208                   }
28209               }
28210         }
28211       else
28212         {
28213           if (TARGET_LDRD
28214               && current_tune->prefer_ldrd_strd
28215               && !optimize_function_for_size_p (cfun))
28216             {
28217               if (TARGET_THUMB2)
28218                 thumb2_emit_ldrd_pop (saved_regs_mask);
28219               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
28220                 arm_emit_ldrd_pop (saved_regs_mask);
28221               else
28222                 arm_emit_multi_reg_pop (saved_regs_mask);
28223             }
28224           else
28225             arm_emit_multi_reg_pop (saved_regs_mask);
28226         }
28227
28228       if (return_in_pc)
28229         return;
28230     }
28231
28232   amount
28233     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
28234   if (amount)
28235     {
28236       int i, j;
28237       rtx dwarf = NULL_RTX;
28238       rtx_insn *tmp =
28239         emit_insn (gen_addsi3 (stack_pointer_rtx,
28240                                stack_pointer_rtx,
28241                                GEN_INT (amount)));
28242
28243       RTX_FRAME_RELATED_P (tmp) = 1;
28244
28245       if (cfun->machine->uses_anonymous_args)
28246         {
28247           /* Restore pretend args.  Refer arm_expand_prologue on how to save
28248              pretend_args in stack.  */
28249           int num_regs = crtl->args.pretend_args_size / 4;
28250           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28251           for (j = 0, i = 0; j < num_regs; i++)
28252             if (saved_regs_mask & (1 << i))
28253               {
28254                 rtx reg = gen_rtx_REG (SImode, i);
28255                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28256                 j++;
28257               }
28258           REG_NOTES (tmp) = dwarf;
28259         }
28260       arm_add_cfa_adjust_cfa_note (tmp, amount,
28261                                    stack_pointer_rtx, stack_pointer_rtx);
28262     }
28263
28264   if (IS_CMSE_ENTRY (func_type))
28265     {
28266       /* CMSE_ENTRY always returns.  */
28267       gcc_assert (really_return);
28268       /* Clear all caller-saved regs that are not used to return.  */
28269       cmse_nonsecure_entry_clear_before_return ();
28270
28271       /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28272          VLDR.  */
28273       if (TARGET_HAVE_FPCXT_CMSE)
28274         {
28275           rtx_insn *insn;
28276
28277           insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
28278                                                    GEN_INT (FPCXTNS_ENUM)));
28279           rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
28280                                   plus_constant (Pmode, stack_pointer_rtx, 4));
28281           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
28282           RTX_FRAME_RELATED_P (insn) = 1;
28283         }
28284     }
28285
28286   if (arm_current_function_pac_enabled_p ())
28287     emit_insn (gen_aut_nop ());
28288
28289   if (!really_return)
28290     return;
28291
28292   if (crtl->calls_eh_return)
28293     emit_insn (gen_addsi3 (stack_pointer_rtx,
28294                            stack_pointer_rtx,
28295                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28296
28297   if (IS_STACKALIGN (func_type))
28298     /* Restore the original stack pointer.  Before prologue, the stack was
28299        realigned and the original stack pointer saved in r0.  For details,
28300        see comment in arm_expand_prologue.  */
28301     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
28302
28303   emit_jump_insn (simple_return_rtx);
28304 }
28305
28306 /* Implementation of insn prologue_thumb1_interwork.  This is the first
28307    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
28308
28309 const char *
28310 thumb1_output_interwork (void)
28311 {
28312   const char * name;
28313   FILE *f = asm_out_file;
28314
28315   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28316   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28317               == SYMBOL_REF);
28318   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28319
28320   /* Generate code sequence to switch us into Thumb mode.  */
28321   /* The .code 32 directive has already been emitted by
28322      ASM_DECLARE_FUNCTION_NAME.  */
28323   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28324   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28325
28326   /* Generate a label, so that the debugger will notice the
28327      change in instruction sets.  This label is also used by
28328      the assembler to bypass the ARM code when this function
28329      is called from a Thumb encoded function elsewhere in the
28330      same file.  Hence the definition of STUB_NAME here must
28331      agree with the definition in gas/config/tc-arm.c.  */
28332
28333 #define STUB_NAME ".real_start_of"
28334
28335   fprintf (f, "\t.code\t16\n");
28336   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28337   fprintf (f, "\t.thumb_func\n");
28338   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28339
28340   return "";
28341 }
28342
28343 /* Handle the case of a double word load into a low register from
28344    a computed memory address.  The computed address may involve a
28345    register which is overwritten by the load.  */
28346 const char *
28347 thumb_load_double_from_address (rtx *operands)
28348 {
28349   rtx addr;
28350   rtx base;
28351   rtx offset;
28352   rtx arg1;
28353   rtx arg2;
28354
28355   gcc_assert (REG_P (operands[0]));
28356   gcc_assert (MEM_P (operands[1]));
28357
28358   /* Get the memory address.  */
28359   addr = XEXP (operands[1], 0);
28360
28361   /* Work out how the memory address is computed.  */
28362   switch (GET_CODE (addr))
28363     {
28364     case REG:
28365       if (reg_overlap_mentioned_p (addr, operands[0]))
28366         output_asm_insn ("ldmia\t%m1, {%0, %H0}", operands);
28367       else
28368         {
28369           operands[2] = adjust_address (operands[1], SImode, 4);
28370           output_asm_insn ("ldr\t%0, %1", operands);
28371           output_asm_insn ("ldr\t%H0, %2", operands);
28372         }
28373       break;
28374
28375     case CONST:
28376       /* Compute <address> + 4 for the high order load.  */
28377       operands[2] = adjust_address (operands[1], SImode, 4);
28378
28379       output_asm_insn ("ldr\t%0, %1", operands);
28380       output_asm_insn ("ldr\t%H0, %2", operands);
28381       break;
28382
28383     case PLUS:
28384       arg1   = XEXP (addr, 0);
28385       arg2   = XEXP (addr, 1);
28386
28387       if (CONSTANT_P (arg1))
28388         base = arg2, offset = arg1;
28389       else
28390         base = arg1, offset = arg2;
28391
28392       gcc_assert (REG_P (base));
28393
28394       /* Catch the case of <address> = <reg> + <reg> */
28395       if (REG_P (offset))
28396         {
28397           int reg_offset = REGNO (offset);
28398           int reg_base   = REGNO (base);
28399           int reg_dest   = REGNO (operands[0]);
28400
28401           /* Add the base and offset registers together into the
28402              higher destination register.  */
28403           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28404                        reg_dest + 1, reg_base, reg_offset);
28405
28406           /* Load the lower destination register from the address in
28407              the higher destination register.  */
28408           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28409                        reg_dest, reg_dest + 1);
28410
28411           /* Load the higher destination register from its own address
28412              plus 4.  */
28413           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28414                        reg_dest + 1, reg_dest + 1);
28415         }
28416       else
28417         {
28418           /* Compute <address> + 4 for the high order load.  */
28419           operands[2] = adjust_address (operands[1], SImode, 4);
28420
28421           /* If the computed address is held in the low order register
28422              then load the high order register first, otherwise always
28423              load the low order register first.  */
28424           if (REGNO (operands[0]) == REGNO (base))
28425             {
28426               output_asm_insn ("ldr\t%H0, %2", operands);
28427               output_asm_insn ("ldr\t%0, %1", operands);
28428             }
28429           else
28430             {
28431               output_asm_insn ("ldr\t%0, %1", operands);
28432               output_asm_insn ("ldr\t%H0, %2", operands);
28433             }
28434         }
28435       break;
28436
28437     case LABEL_REF:
28438       /* With no registers to worry about we can just load the value
28439          directly.  */
28440       operands[2] = adjust_address (operands[1], SImode, 4);
28441
28442       output_asm_insn ("ldr\t%H0, %2", operands);
28443       output_asm_insn ("ldr\t%0, %1", operands);
28444       break;
28445
28446     default:
28447       gcc_unreachable ();
28448     }
28449
28450   return "";
28451 }
28452
28453 const char *
28454 thumb_output_move_mem_multiple (int n, rtx *operands)
28455 {
28456   switch (n)
28457     {
28458     case 2:
28459       if (REGNO (operands[4]) > REGNO (operands[5]))
28460         std::swap (operands[4], operands[5]);
28461
28462       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28463       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28464       break;
28465
28466     case 3:
28467       if (REGNO (operands[4]) > REGNO (operands[5]))
28468         std::swap (operands[4], operands[5]);
28469       if (REGNO (operands[5]) > REGNO (operands[6]))
28470         std::swap (operands[5], operands[6]);
28471       if (REGNO (operands[4]) > REGNO (operands[5]))
28472         std::swap (operands[4], operands[5]);
28473
28474       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28475       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28476       break;
28477
28478     default:
28479       gcc_unreachable ();
28480     }
28481
28482   return "";
28483 }
28484
28485 /* Output a call-via instruction for thumb state.  */
28486 const char *
28487 thumb_call_via_reg (rtx reg)
28488 {
28489   int regno = REGNO (reg);
28490   rtx *labelp;
28491
28492   gcc_assert (regno < LR_REGNUM);
28493
28494   /* If we are in the normal text section we can use a single instance
28495      per compilation unit.  If we are doing function sections, then we need
28496      an entry per section, since we can't rely on reachability.  */
28497   if (in_section == text_section)
28498     {
28499       thumb_call_reg_needed = 1;
28500
28501       if (thumb_call_via_label[regno] == NULL)
28502         thumb_call_via_label[regno] = gen_label_rtx ();
28503       labelp = thumb_call_via_label + regno;
28504     }
28505   else
28506     {
28507       if (cfun->machine->call_via[regno] == NULL)
28508         cfun->machine->call_via[regno] = gen_label_rtx ();
28509       labelp = cfun->machine->call_via + regno;
28510     }
28511
28512   output_asm_insn ("bl\t%a0", labelp);
28513   return "";
28514 }
28515
28516 /* Routines for generating rtl.  */
28517 void
28518 thumb_expand_cpymemqi (rtx *operands)
28519 {
28520   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28521   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28522   HOST_WIDE_INT len = INTVAL (operands[2]);
28523   HOST_WIDE_INT offset = 0;
28524
28525   while (len >= 12)
28526     {
28527       emit_insn (gen_cpymem12b (out, in, out, in));
28528       len -= 12;
28529     }
28530
28531   if (len >= 8)
28532     {
28533       emit_insn (gen_cpymem8b (out, in, out, in));
28534       len -= 8;
28535     }
28536
28537   if (len >= 4)
28538     {
28539       rtx reg = gen_reg_rtx (SImode);
28540       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28541       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28542       len -= 4;
28543       offset += 4;
28544     }
28545
28546   if (len >= 2)
28547     {
28548       rtx reg = gen_reg_rtx (HImode);
28549       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28550                                               plus_constant (Pmode, in,
28551                                                              offset))));
28552       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28553                                                                 offset)),
28554                             reg));
28555       len -= 2;
28556       offset += 2;
28557     }
28558
28559   if (len)
28560     {
28561       rtx reg = gen_reg_rtx (QImode);
28562       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28563                                               plus_constant (Pmode, in,
28564                                                              offset))));
28565       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28566                                                                 offset)),
28567                             reg));
28568     }
28569 }
28570
28571 void
28572 thumb_reload_out_hi (rtx *operands)
28573 {
28574   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28575 }
28576
28577 /* Return the length of a function name prefix
28578     that starts with the character 'c'.  */
28579 static int
28580 arm_get_strip_length (int c)
28581 {
28582   switch (c)
28583     {
28584     ARM_NAME_ENCODING_LENGTHS
28585       default: return 0;
28586     }
28587 }
28588
28589 /* Return a pointer to a function's name with any
28590    and all prefix encodings stripped from it.  */
28591 const char *
28592 arm_strip_name_encoding (const char *name)
28593 {
28594   int skip;
28595
28596   while ((skip = arm_get_strip_length (* name)))
28597     name += skip;
28598
28599   return name;
28600 }
28601
28602 /* If there is a '*' anywhere in the name's prefix, then
28603    emit the stripped name verbatim, otherwise prepend an
28604    underscore if leading underscores are being used.  */
28605 void
28606 arm_asm_output_labelref (FILE *stream, const char *name)
28607 {
28608   int skip;
28609   int verbatim = 0;
28610
28611   while ((skip = arm_get_strip_length (* name)))
28612     {
28613       verbatim |= (*name == '*');
28614       name += skip;
28615     }
28616
28617   if (verbatim)
28618     fputs (name, stream);
28619   else
28620     asm_fprintf (stream, "%U%s", name);
28621 }
28622
28623 /* This function is used to emit an EABI tag and its associated value.
28624    We emit the numerical value of the tag in case the assembler does not
28625    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28626    the tag name in a comment so that anyone reading the assembler output
28627    will know which tag is being set.
28628
28629    This function is not static because arm-c.cc needs it too.  */
28630
28631 void
28632 arm_emit_eabi_attribute (const char *name, int num, int val)
28633 {
28634   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28635   if (flag_verbose_asm || flag_debug_asm)
28636     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28637   asm_fprintf (asm_out_file, "\n");
28638 }
28639
28640 /* This function is used to print CPU tuning information as comment
28641    in assembler file.  Pointers are not printed for now.  */
28642
28643 void
28644 arm_print_tune_info (void)
28645 {
28646   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28647   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28648                current_tune->constant_limit);
28649   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28650                "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28651   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28652                "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28653   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28654                "prefetch.l1_cache_size:\t%d\n",
28655                current_tune->prefetch.l1_cache_size);
28656   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28657                "prefetch.l1_cache_line_size:\t%d\n",
28658                current_tune->prefetch.l1_cache_line_size);
28659   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28660                "prefer_constant_pool:\t%d\n",
28661                (int) current_tune->prefer_constant_pool);
28662   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28663                "branch_cost:\t(s:speed, p:predictable)\n");
28664   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28665   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28666                current_tune->branch_cost (false, false));
28667   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28668                current_tune->branch_cost (false, true));
28669   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28670                current_tune->branch_cost (true, false));
28671   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28672                current_tune->branch_cost (true, true));
28673   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28674                "prefer_ldrd_strd:\t%d\n",
28675                (int) current_tune->prefer_ldrd_strd);
28676   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28677                "logical_op_non_short_circuit:\t[%d,%d]\n",
28678                (int) current_tune->logical_op_non_short_circuit_thumb,
28679                (int) current_tune->logical_op_non_short_circuit_arm);
28680   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28681                "disparage_flag_setting_t16_encodings:\t%d\n",
28682                (int) current_tune->disparage_flag_setting_t16_encodings);
28683   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28684                "string_ops_prefer_neon:\t%d\n",
28685                (int) current_tune->string_ops_prefer_neon);
28686   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28687                "max_insns_inline_memset:\t%d\n",
28688                current_tune->max_insns_inline_memset);
28689   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28690                current_tune->fusible_ops);
28691   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28692                (int) current_tune->sched_autopref);
28693 }
28694
28695 /* The last set of target options used to emit .arch directives, etc.  This
28696    could be a function-local static if it were not required to expose it as a
28697    root to the garbage collector.  */
28698 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28699
28700 /* Print .arch and .arch_extension directives corresponding to the
28701    current architecture configuration.  */
28702 static void
28703 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28704 {
28705   arm_build_target build_target;
28706   /* If the target options haven't changed since the last time we were called
28707      there is nothing to do.  This should be sufficient to suppress the
28708      majority of redundant work.  */
28709   if (last_asm_targ_options == targ_options)
28710     return;
28711
28712   last_asm_targ_options = targ_options;
28713
28714   build_target.isa = sbitmap_alloc (isa_num_bits);
28715   arm_configure_build_target (&build_target, targ_options, false);
28716
28717   if (build_target.core_name
28718       && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28719     {
28720       const char* truncated_name
28721         = arm_rewrite_selected_cpu (build_target.core_name);
28722       asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28723     }
28724
28725   const arch_option *arch
28726     = arm_parse_arch_option_name (all_architectures, "-march",
28727                                   build_target.arch_name);
28728   auto_sbitmap opt_bits (isa_num_bits);
28729
28730   gcc_assert (arch);
28731
28732   if (strcmp (build_target.arch_name, "armv7ve") == 0)
28733     {
28734       /* Keep backward compatability for assemblers which don't support
28735          armv7ve.  Fortunately, none of the following extensions are reset
28736          by a .fpu directive.  */
28737       asm_fprintf (stream, "\t.arch armv7-a\n");
28738       asm_fprintf (stream, "\t.arch_extension virt\n");
28739       asm_fprintf (stream, "\t.arch_extension idiv\n");
28740       asm_fprintf (stream, "\t.arch_extension sec\n");
28741       asm_fprintf (stream, "\t.arch_extension mp\n");
28742     }
28743   else
28744     asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28745
28746   /* The .fpu directive will reset any architecture extensions from the
28747      assembler that relate to the fp/vector extensions.  So put this out before
28748      any .arch_extension directives.  */
28749   const char *fpu_name = (TARGET_SOFT_FLOAT
28750                           ? "softvfp"
28751                           : arm_identify_fpu_from_isa (build_target.isa));
28752   asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28753
28754   if (!arch->common.extensions)
28755     return;
28756
28757   for (const struct cpu_arch_extension *opt = arch->common.extensions;
28758        opt->name != NULL;
28759        opt++)
28760     {
28761       if (!opt->remove)
28762         {
28763           arm_initialize_isa (opt_bits, opt->isa_bits);
28764
28765           /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28766              "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28767              floating point instructions is disabled.  So the following check
28768              restricts the printing of ".arch_extension mve" and
28769              ".arch_extension fp" (for mve.fp) in the assembly file.  MVE needs
28770              this special behaviour because the feature bit "mve" and
28771              "mve_float" are not part of "fpu bits", so they are not cleared
28772              when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28773              TARGET_HAVE_MVE_FLOAT are disabled.  */
28774           if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28775               || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28776                   && !TARGET_HAVE_MVE_FLOAT))
28777             continue;
28778
28779           /* If every feature bit of this option is set in the target ISA
28780              specification, print out the option name.  However, don't print
28781              anything if all the bits are part of the FPU specification.  */
28782           if (bitmap_subset_p (opt_bits, build_target.isa)
28783               && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28784             asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28785         }
28786     }
28787 }
28788
28789 static void
28790 arm_file_start (void)
28791 {
28792   int val;
28793   bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
28794   bool bti = (aarch_enable_bti == 1);
28795
28796   arm_print_asm_arch_directives
28797     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28798
28799   if (TARGET_BPABI)
28800     {
28801       /* If we have a named cpu, but we the assembler does not support that
28802          name via .cpu, put out a cpu name attribute; but don't do this if the
28803          name starts with the fictitious prefix, 'generic'.  */
28804       if (arm_active_target.core_name
28805           && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28806           && !startswith (arm_active_target.core_name, "generic"))
28807         {
28808           const char* truncated_name
28809             = arm_rewrite_selected_cpu (arm_active_target.core_name);
28810           if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28811             asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28812                          truncated_name);
28813         }
28814
28815       if (print_tune_info)
28816         arm_print_tune_info ();
28817
28818       if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28819         arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28820
28821       if (TARGET_HARD_FLOAT_ABI)
28822         arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28823
28824       /* Some of these attributes only apply when the corresponding features
28825          are used.  However we don't have any easy way of figuring this out.
28826          Conservatively record the setting that would have been used.  */
28827
28828       if (flag_rounding_math)
28829         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28830
28831       if (!flag_unsafe_math_optimizations)
28832         {
28833           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28834           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28835         }
28836       if (flag_signaling_nans)
28837         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28838
28839       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28840                            flag_finite_math_only ? 1 : 3);
28841
28842       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28843       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28844       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28845                                flag_short_enums ? 1 : 2);
28846
28847       /* Tag_ABI_optimization_goals.  */
28848       if (optimize_size)
28849         val = 4;
28850       else if (optimize >= 2)
28851         val = 2;
28852       else if (optimize)
28853         val = 1;
28854       else
28855         val = 6;
28856       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28857
28858       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28859                                unaligned_access);
28860
28861       if (arm_fp16_format)
28862         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28863                              (int) arm_fp16_format);
28864
28865       if (TARGET_HAVE_PACBTI)
28866         {
28867           arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28868           arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28869         }
28870       else if (pac || bti)
28871         {
28872           arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28873           arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28874         }
28875
28876       if (bti)
28877         arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28878       if (pac)
28879         arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28880
28881       if (arm_lang_output_object_attributes_hook)
28882         arm_lang_output_object_attributes_hook();
28883     }
28884
28885   default_file_start ();
28886 }
28887
28888 static void
28889 arm_file_end (void)
28890 {
28891   int regno;
28892
28893   /* Just in case the last function output in the assembler had non-default
28894      architecture directives, we force the assembler state back to the default
28895      set, so that any 'calculated' build attributes are based on the default
28896      options rather than the special options for that function.  */
28897   arm_print_asm_arch_directives
28898     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28899
28900   if (NEED_INDICATE_EXEC_STACK)
28901     /* Add .note.GNU-stack.  */
28902     file_end_indicate_exec_stack ();
28903
28904   if (! thumb_call_reg_needed)
28905     return;
28906
28907   switch_to_section (text_section);
28908   asm_fprintf (asm_out_file, "\t.code 16\n");
28909   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28910
28911   for (regno = 0; regno < LR_REGNUM; regno++)
28912     {
28913       rtx label = thumb_call_via_label[regno];
28914
28915       if (label != 0)
28916         {
28917           targetm.asm_out.internal_label (asm_out_file, "L",
28918                                           CODE_LABEL_NUMBER (label));
28919           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28920         }
28921     }
28922 }
28923
28924 /* Symbols in the text segment can be accessed without indirecting via the
28925    constant pool; it may take an extra binary operation, but this is still
28926    faster than indirecting via memory.  Don't do this when not optimizing,
28927    since we won't be calculating al of the offsets necessary to do this
28928    simplification.  */
28929
28930 static void
28931 arm_encode_section_info (tree decl, rtx rtl, int first)
28932 {
28933   if (optimize > 0 && TREE_CONSTANT (decl))
28934     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28935
28936   default_encode_section_info (decl, rtl, first);
28937 }
28938
28939 static void
28940 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28941 {
28942   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28943       && !strcmp (prefix, "L"))
28944     {
28945       arm_ccfsm_state = 0;
28946       arm_target_insn = NULL;
28947     }
28948   default_internal_label (stream, prefix, labelno);
28949 }
28950
28951 /* Define classes to generate code as RTL or output asm to a file.
28952    Using templates then allows to use the same code to output code
28953    sequences in the two formats.  */
28954 class thumb1_const_rtl
28955 {
28956  public:
28957   thumb1_const_rtl (rtx dst) : dst (dst) {}
28958
28959   void mov (HOST_WIDE_INT val)
28960   {
28961     emit_set_insn (dst, GEN_INT (val));
28962   }
28963
28964   void add (HOST_WIDE_INT val)
28965   {
28966     emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28967   }
28968
28969   void ashift (HOST_WIDE_INT shift)
28970   {
28971     emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28972   }
28973
28974   void neg ()
28975   {
28976     emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28977   }
28978
28979  private:
28980   rtx dst;
28981 };
28982
28983 class thumb1_const_print
28984 {
28985  public:
28986   thumb1_const_print (FILE *f, int regno)
28987   {
28988     t_file = f;
28989     dst_regname = reg_names[regno];
28990   }
28991
28992   void mov (HOST_WIDE_INT val)
28993   {
28994     asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28995                  dst_regname, val);
28996   }
28997
28998   void add (HOST_WIDE_INT val)
28999   {
29000     asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
29001                  dst_regname, val);
29002   }
29003
29004   void ashift (HOST_WIDE_INT shift)
29005   {
29006     asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
29007                  dst_regname, shift);
29008   }
29009
29010   void neg ()
29011   {
29012     asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
29013   }
29014
29015  private:
29016   FILE *t_file;
29017   const char *dst_regname;
29018 };
29019
29020 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
29021    Avoid generating useless code when one of the bytes is zero.  */
29022 template <class T>
29023 void
29024 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
29025 {
29026   bool mov_done_p = false;
29027   unsigned HOST_WIDE_INT val = op1;
29028   int shift = 0;
29029   int i;
29030
29031   gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
29032
29033   if (val <= 255)
29034     {
29035       dst.mov (val);
29036       return;
29037     }
29038
29039   /* For negative numbers with the first nine bits set, build the
29040      opposite of OP1, then negate it, it's generally shorter and not
29041      longer.  */
29042   if ((val & 0xFF800000) == 0xFF800000)
29043     {
29044       thumb1_gen_const_int_1 (dst, -op1);
29045       dst.neg ();
29046       return;
29047     }
29048
29049   /* In the general case, we need 7 instructions to build
29050      a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
29051      do better if VAL is small enough, or
29052      right-shiftable by a suitable amount.  If the
29053      right-shift enables to encode at least one less byte,
29054      it's worth it: we save a adds and a lsls at the
29055      expense of a final lsls.  */
29056   int final_shift = number_of_first_bit_set (val);
29057
29058   int leading_zeroes = clz_hwi (val);
29059   int number_of_bytes_needed
29060     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
29061        / BITS_PER_UNIT) + 1;
29062   int number_of_bytes_needed2
29063     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
29064        / BITS_PER_UNIT) + 1;
29065
29066   if (number_of_bytes_needed2 < number_of_bytes_needed)
29067     val >>= final_shift;
29068   else
29069     final_shift = 0;
29070
29071   /* If we are in a very small range, we can use either a single movs
29072      or movs+adds.  */
29073   if (val <= 510)
29074     {
29075       if (val > 255)
29076         {
29077           unsigned HOST_WIDE_INT high = val - 255;
29078
29079           dst.mov (high);
29080           dst.add (255);
29081         }
29082       else
29083         dst.mov (val);
29084
29085       if (final_shift > 0)
29086         dst.ashift (final_shift);
29087     }
29088   else
29089     {
29090       /* General case, emit upper 3 bytes as needed.  */
29091       for (i = 0; i < 3; i++)
29092         {
29093           unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
29094
29095           if (byte)
29096             {
29097               /* We are about to emit new bits, stop accumulating a
29098                  shift amount, and left-shift only if we have already
29099                  emitted some upper bits.  */
29100               if (mov_done_p)
29101                 {
29102                   dst.ashift (shift);
29103                   dst.add (byte);
29104                 }
29105               else
29106                 dst.mov (byte);
29107
29108               /* Stop accumulating shift amount since we've just
29109                  emitted some bits.  */
29110               shift = 0;
29111
29112               mov_done_p = true;
29113             }
29114
29115           if (mov_done_p)
29116             shift += 8;
29117         }
29118
29119       /* Emit lower byte.  */
29120       if (!mov_done_p)
29121         dst.mov (val & 0xff);
29122       else
29123         {
29124           dst.ashift (shift);
29125           if (val & 0xff)
29126             dst.add (val & 0xff);
29127         }
29128
29129       if (final_shift > 0)
29130         dst.ashift (final_shift);
29131     }
29132 }
29133
29134 /* Proxies for thumb1.md, since the thumb1_const_print and
29135    thumb1_const_rtl classes are not exported.  */
29136 void
29137 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
29138 {
29139   thumb1_const_rtl t (dst);
29140   thumb1_gen_const_int_1 (t, op1);
29141 }
29142
29143 void
29144 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
29145 {
29146   thumb1_const_print t (asm_out_file, REGNO (dst));
29147   thumb1_gen_const_int_1 (t, op1);
29148 }
29149
29150 /* Output code to add DELTA to the first argument, and then jump
29151    to FUNCTION.  Used for C++ multiple inheritance.  */
29152
29153 static void
29154 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29155                      HOST_WIDE_INT, tree function)
29156 {
29157   static int thunk_label = 0;
29158   char label[256];
29159   char labelpc[256];
29160   int mi_delta = delta;
29161   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
29162   int shift = 0;
29163   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
29164                     ? 1 : 0);
29165   if (mi_delta < 0)
29166     mi_delta = - mi_delta;
29167
29168   final_start_function (emit_barrier (), file, 1);
29169
29170   if (TARGET_THUMB1)
29171     {
29172       int labelno = thunk_label++;
29173       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
29174       /* Thunks are entered in arm mode when available.  */
29175       if (TARGET_THUMB1_ONLY)
29176         {
29177           /* push r3 so we can use it as a temporary.  */
29178           /* TODO: Omit this save if r3 is not used.  */
29179           fputs ("\tpush {r3}\n", file);
29180
29181           /* With -mpure-code, we cannot load the address from the
29182              constant pool: we build it explicitly.  */
29183           if (target_pure_code)
29184             {
29185               fputs ("\tmovs\tr3, #:upper8_15:#", file);
29186               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29187               fputc ('\n', file);
29188               fputs ("\tlsls r3, #8\n", file);
29189               fputs ("\tadds\tr3, #:upper0_7:#", file);
29190               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29191               fputc ('\n', file);
29192               fputs ("\tlsls r3, #8\n", file);
29193               fputs ("\tadds\tr3, #:lower8_15:#", file);
29194               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29195               fputc ('\n', file);
29196               fputs ("\tlsls r3, #8\n", file);
29197               fputs ("\tadds\tr3, #:lower0_7:#", file);
29198               assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29199               fputc ('\n', file);
29200             }
29201           else
29202             fputs ("\tldr\tr3, ", file);
29203         }
29204       else
29205         {
29206           fputs ("\tldr\tr12, ", file);
29207         }
29208
29209       if (!target_pure_code)
29210         {
29211           assemble_name (file, label);
29212           fputc ('\n', file);
29213         }
29214
29215       if (flag_pic)
29216         {
29217           /* If we are generating PIC, the ldr instruction below loads
29218              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
29219              the address of the add + 8, so we have:
29220
29221              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29222                  = target + 1.
29223
29224              Note that we have "+ 1" because some versions of GNU ld
29225              don't set the low bit of the result for R_ARM_REL32
29226              relocations against thumb function symbols.
29227              On ARMv6M this is +4, not +8.  */
29228           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
29229           assemble_name (file, labelpc);
29230           fputs (":\n", file);
29231           if (TARGET_THUMB1_ONLY)
29232             {
29233               /* This is 2 insns after the start of the thunk, so we know it
29234                  is 4-byte aligned.  */
29235               fputs ("\tadd\tr3, pc, r3\n", file);
29236               fputs ("\tmov r12, r3\n", file);
29237             }
29238           else
29239             fputs ("\tadd\tr12, pc, r12\n", file);
29240         }
29241       else if (TARGET_THUMB1_ONLY)
29242         fputs ("\tmov r12, r3\n", file);
29243     }
29244   if (TARGET_THUMB1_ONLY)
29245     {
29246       if (mi_delta > 255)
29247         {
29248           /* With -mpure-code, we cannot load MI_DELTA from the
29249              constant pool: we build it explicitly.  */
29250           if (target_pure_code)
29251             {
29252               thumb1_const_print r3 (file, 3);
29253               thumb1_gen_const_int_1 (r3, mi_delta);
29254             }
29255           else
29256             {
29257               fputs ("\tldr\tr3, ", file);
29258               assemble_name (file, label);
29259               fputs ("+4\n", file);
29260             }
29261           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
29262                        mi_op, this_regno, this_regno);
29263         }
29264       else if (mi_delta != 0)
29265         {
29266           /* Thumb1 unified syntax requires s suffix in instruction name when
29267              one of the operands is immediate.  */
29268           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
29269                        mi_op, this_regno, this_regno,
29270                        mi_delta);
29271         }
29272     }
29273   else
29274     {
29275       /* TODO: Use movw/movt for large constants when available.  */
29276       while (mi_delta != 0)
29277         {
29278           if ((mi_delta & (3 << shift)) == 0)
29279             shift += 2;
29280           else
29281             {
29282               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
29283                            mi_op, this_regno, this_regno,
29284                            mi_delta & (0xff << shift));
29285               mi_delta &= ~(0xff << shift);
29286               shift += 8;
29287             }
29288         }
29289     }
29290   if (TARGET_THUMB1)
29291     {
29292       if (TARGET_THUMB1_ONLY)
29293         fputs ("\tpop\t{r3}\n", file);
29294
29295       fprintf (file, "\tbx\tr12\n");
29296
29297       /* With -mpure-code, we don't need to emit literals for the
29298          function address and delta since we emitted code to build
29299          them.  */
29300       if (!target_pure_code)
29301         {
29302           ASM_OUTPUT_ALIGN (file, 2);
29303           assemble_name (file, label);
29304           fputs (":\n", file);
29305           if (flag_pic)
29306             {
29307               /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
29308               rtx tem = XEXP (DECL_RTL (function), 0);
29309               /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29310                  pipeline offset is four rather than eight.  Adjust the offset
29311                  accordingly.  */
29312               tem = plus_constant (GET_MODE (tem), tem,
29313                                    TARGET_THUMB1_ONLY ? -3 : -7);
29314               tem = gen_rtx_MINUS (GET_MODE (tem),
29315                                    tem,
29316                                    gen_rtx_SYMBOL_REF (Pmode,
29317                                                        ggc_strdup (labelpc)));
29318               assemble_integer (tem, 4, BITS_PER_WORD, 1);
29319             }
29320           else
29321             /* Output ".word .LTHUNKn".  */
29322             assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
29323
29324           if (TARGET_THUMB1_ONLY && mi_delta > 255)
29325             assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
29326         }
29327     }
29328   else
29329     {
29330       fputs ("\tb\t", file);
29331       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29332       if (NEED_PLT_RELOC)
29333         fputs ("(PLT)", file);
29334       fputc ('\n', file);
29335     }
29336
29337   final_end_function ();
29338 }
29339
29340 /* MI thunk handling for TARGET_32BIT.  */
29341
29342 static void
29343 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29344                        HOST_WIDE_INT vcall_offset, tree function)
29345 {
29346   const bool long_call_p = arm_is_long_call_p (function);
29347
29348   /* On ARM, this_regno is R0 or R1 depending on
29349      whether the function returns an aggregate or not.
29350   */
29351   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29352                                        function)
29353                     ? R1_REGNUM : R0_REGNUM);
29354
29355   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29356   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29357   reload_completed = 1;
29358   emit_note (NOTE_INSN_PROLOGUE_END);
29359
29360   /* Add DELTA to THIS_RTX.  */
29361   if (delta != 0)
29362     arm_split_constant (PLUS, Pmode, NULL_RTX,
29363                         delta, this_rtx, this_rtx, false);
29364
29365   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
29366   if (vcall_offset != 0)
29367     {
29368       /* Load *THIS_RTX.  */
29369       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29370       /* Compute *THIS_RTX + VCALL_OFFSET.  */
29371       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29372                           false);
29373       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
29374       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29375       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29376     }
29377
29378   /* Generate a tail call to the target function.  */
29379   if (!TREE_USED (function))
29380     {
29381       assemble_external (function);
29382       TREE_USED (function) = 1;
29383     }
29384   rtx funexp = XEXP (DECL_RTL (function), 0);
29385   if (long_call_p)
29386     {
29387       emit_move_insn (temp, funexp);
29388       funexp = temp;
29389     }
29390   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29391   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29392   SIBLING_CALL_P (insn) = 1;
29393   emit_barrier ();
29394
29395   /* Indirect calls require a bit of fixup in PIC mode.  */
29396   if (long_call_p)
29397     {
29398       split_all_insns_noflow ();
29399       arm_reorg ();
29400     }
29401
29402   insn = get_insns ();
29403   shorten_branches (insn);
29404   final_start_function (insn, file, 1);
29405   final (insn, file, 1);
29406   final_end_function ();
29407
29408   /* Stop pretending this is a post-reload pass.  */
29409   reload_completed = 0;
29410 }
29411
29412 /* Output code to add DELTA to the first argument, and then jump
29413    to FUNCTION.  Used for C++ multiple inheritance.  */
29414
29415 static void
29416 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29417                      HOST_WIDE_INT vcall_offset, tree function)
29418 {
29419   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29420
29421   assemble_start_function (thunk, fnname);
29422   if (aarch_bti_enabled ())
29423     emit_insn (aarch_gen_bti_c ());
29424   if (TARGET_32BIT)
29425     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29426   else
29427     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29428   assemble_end_function (thunk, fnname);
29429 }
29430
29431 int
29432 arm_emit_vector_const (FILE *file, rtx x)
29433 {
29434   int i;
29435   const char * pattern;
29436
29437   gcc_assert (GET_CODE (x) == CONST_VECTOR);
29438
29439   switch (GET_MODE (x))
29440     {
29441     case E_V2SImode: pattern = "%08x"; break;
29442     case E_V4HImode: pattern = "%04x"; break;
29443     case E_V8QImode: pattern = "%02x"; break;
29444     default:       gcc_unreachable ();
29445     }
29446
29447   fprintf (file, "0x");
29448   for (i = CONST_VECTOR_NUNITS (x); i--;)
29449     {
29450       rtx element;
29451
29452       element = CONST_VECTOR_ELT (x, i);
29453       fprintf (file, pattern, INTVAL (element));
29454     }
29455
29456   return 1;
29457 }
29458
29459 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29460    HFmode constant pool entries are actually loaded with ldr.  */
29461 void
29462 arm_emit_fp16_const (rtx c)
29463 {
29464   long bits;
29465
29466   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29467   if (WORDS_BIG_ENDIAN)
29468     assemble_zeros (2);
29469   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29470   if (!WORDS_BIG_ENDIAN)
29471     assemble_zeros (2);
29472 }
29473
29474 const char *
29475 arm_output_load_gr (rtx *operands)
29476 {
29477   rtx reg;
29478   rtx offset;
29479   rtx wcgr;
29480   rtx sum;
29481
29482   if (!MEM_P (operands [1])
29483       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29484       || !REG_P (reg = XEXP (sum, 0))
29485       || !CONST_INT_P (offset = XEXP (sum, 1))
29486       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29487     return "wldrw%?\t%0, %1";
29488
29489   /* Fix up an out-of-range load of a GR register.  */
29490   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29491   wcgr = operands[0];
29492   operands[0] = reg;
29493   output_asm_insn ("ldr%?\t%0, %1", operands);
29494
29495   operands[0] = wcgr;
29496   operands[1] = reg;
29497   output_asm_insn ("tmcr%?\t%0, %1", operands);
29498   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29499
29500   return "";
29501 }
29502
29503 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29504
29505    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29506    named arg and all anonymous args onto the stack.
29507    XXX I know the prologue shouldn't be pushing registers, but it is faster
29508    that way.  */
29509
29510 static void
29511 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29512                             const function_arg_info &arg,
29513                             int *pretend_size,
29514                             int second_time ATTRIBUTE_UNUSED)
29515 {
29516   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29517   int nregs;
29518
29519   cfun->machine->uses_anonymous_args = 1;
29520   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29521     {
29522       nregs = pcum->aapcs_ncrn;
29523       if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29524           && (nregs & 1))
29525         {
29526           int res = arm_needs_doubleword_align (arg.mode, arg.type);
29527           if (res < 0 && warn_psabi)
29528             inform (input_location, "parameter passing for argument of "
29529                     "type %qT changed in GCC 7.1", arg.type);
29530           else if (res > 0)
29531             {
29532               nregs++;
29533               if (res > 1 && warn_psabi)
29534                 inform (input_location,
29535                         "parameter passing for argument of type "
29536                         "%qT changed in GCC 9.1", arg.type);
29537             }
29538         }
29539     }
29540   else
29541     nregs = pcum->nregs;
29542
29543   if (nregs < NUM_ARG_REGS)
29544     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29545 }
29546
29547 /* We can't rely on the caller doing the proper promotion when
29548    using APCS or ATPCS.  */
29549
29550 static bool
29551 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29552 {
29553     return !TARGET_AAPCS_BASED;
29554 }
29555
29556 static machine_mode
29557 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29558                            machine_mode mode,
29559                            int *punsignedp ATTRIBUTE_UNUSED,
29560                            const_tree fntype ATTRIBUTE_UNUSED,
29561                            int for_return ATTRIBUTE_UNUSED)
29562 {
29563   if (GET_MODE_CLASS (mode) == MODE_INT
29564       && GET_MODE_SIZE (mode) < 4)
29565     return SImode;
29566
29567   return mode;
29568 }
29569
29570
29571 static bool
29572 arm_default_short_enums (void)
29573 {
29574   return ARM_DEFAULT_SHORT_ENUMS;
29575 }
29576
29577
29578 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
29579
29580 static bool
29581 arm_align_anon_bitfield (void)
29582 {
29583   return TARGET_AAPCS_BASED;
29584 }
29585
29586
29587 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
29588
29589 static tree
29590 arm_cxx_guard_type (void)
29591 {
29592   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29593 }
29594
29595
29596 /* The EABI says test the least significant bit of a guard variable.  */
29597
29598 static bool
29599 arm_cxx_guard_mask_bit (void)
29600 {
29601   return TARGET_AAPCS_BASED;
29602 }
29603
29604
29605 /* The EABI specifies that all array cookies are 8 bytes long.  */
29606
29607 static tree
29608 arm_get_cookie_size (tree type)
29609 {
29610   tree size;
29611
29612   if (!TARGET_AAPCS_BASED)
29613     return default_cxx_get_cookie_size (type);
29614
29615   size = build_int_cst (sizetype, 8);
29616   return size;
29617 }
29618
29619
29620 /* The EABI says that array cookies should also contain the element size.  */
29621
29622 static bool
29623 arm_cookie_has_size (void)
29624 {
29625   return TARGET_AAPCS_BASED;
29626 }
29627
29628
29629 /* The EABI says constructors and destructors should return a pointer to
29630    the object constructed/destroyed.  */
29631
29632 static bool
29633 arm_cxx_cdtor_returns_this (void)
29634 {
29635   return TARGET_AAPCS_BASED;
29636 }
29637
29638 /* The EABI says that an inline function may never be the key
29639    method.  */
29640
29641 static bool
29642 arm_cxx_key_method_may_be_inline (void)
29643 {
29644   return !TARGET_AAPCS_BASED;
29645 }
29646
29647 static void
29648 arm_cxx_determine_class_data_visibility (tree decl)
29649 {
29650   if (!TARGET_AAPCS_BASED
29651       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29652     return;
29653
29654   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29655      is exported.  However, on systems without dynamic vague linkage,
29656      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
29657   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29658     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29659   else
29660     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29661   DECL_VISIBILITY_SPECIFIED (decl) = 1;
29662 }
29663
29664 static bool
29665 arm_cxx_class_data_always_comdat (void)
29666 {
29667   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29668      vague linkage if the class has no key function.  */
29669   return !TARGET_AAPCS_BASED;
29670 }
29671
29672
29673 /* The EABI says __aeabi_atexit should be used to register static
29674    destructors.  */
29675
29676 static bool
29677 arm_cxx_use_aeabi_atexit (void)
29678 {
29679   return TARGET_AAPCS_BASED;
29680 }
29681
29682
29683 void
29684 arm_set_return_address (rtx source, rtx scratch)
29685 {
29686   arm_stack_offsets *offsets;
29687   HOST_WIDE_INT delta;
29688   rtx addr, mem;
29689   unsigned long saved_regs;
29690
29691   offsets = arm_get_frame_offsets ();
29692   saved_regs = offsets->saved_regs_mask;
29693
29694   if ((saved_regs & (1 << LR_REGNUM)) == 0)
29695     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29696   else
29697     {
29698       if (frame_pointer_needed)
29699         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29700       else
29701         {
29702           /* LR will be the first saved register.  */
29703           delta = offsets->outgoing_args - (offsets->frame + 4);
29704
29705
29706           if (delta >= 4096)
29707             {
29708               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29709                                      GEN_INT (delta & ~4095)));
29710               addr = scratch;
29711               delta &= 4095;
29712             }
29713           else
29714             addr = stack_pointer_rtx;
29715
29716           addr = plus_constant (Pmode, addr, delta);
29717         }
29718
29719       /* The store needs to be marked to prevent DSE from deleting
29720          it as dead if it is based on fp.  */
29721       mem = gen_frame_mem (Pmode, addr);
29722       MEM_VOLATILE_P (mem) = true;
29723       emit_move_insn (mem, source);
29724     }
29725 }
29726
29727
29728 void
29729 thumb_set_return_address (rtx source, rtx scratch)
29730 {
29731   arm_stack_offsets *offsets;
29732   HOST_WIDE_INT delta;
29733   HOST_WIDE_INT limit;
29734   int reg;
29735   rtx addr, mem;
29736   unsigned long mask;
29737
29738   emit_use (source);
29739
29740   offsets = arm_get_frame_offsets ();
29741   mask = offsets->saved_regs_mask;
29742   if (mask & (1 << LR_REGNUM))
29743     {
29744       limit = 1024;
29745       /* Find the saved regs.  */
29746       if (frame_pointer_needed)
29747         {
29748           delta = offsets->soft_frame - offsets->saved_args;
29749           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29750           if (TARGET_THUMB1)
29751             limit = 128;
29752         }
29753       else
29754         {
29755           delta = offsets->outgoing_args - offsets->saved_args;
29756           reg = SP_REGNUM;
29757         }
29758       /* Allow for the stack frame.  */
29759       if (TARGET_THUMB1 && TARGET_BACKTRACE)
29760         delta -= 16;
29761       /* The link register is always the first saved register.  */
29762       delta -= 4;
29763
29764       /* Construct the address.  */
29765       addr = gen_rtx_REG (SImode, reg);
29766       if (delta > limit)
29767         {
29768           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29769           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29770           addr = scratch;
29771         }
29772       else
29773         addr = plus_constant (Pmode, addr, delta);
29774
29775       /* The store needs to be marked to prevent DSE from deleting
29776          it as dead if it is based on fp.  */
29777       mem = gen_frame_mem (Pmode, addr);
29778       MEM_VOLATILE_P (mem) = true;
29779       emit_move_insn (mem, source);
29780     }
29781   else
29782     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29783 }
29784
29785 /* Implements target hook vector_mode_supported_p.  */
29786 bool
29787 arm_vector_mode_supported_p (machine_mode mode)
29788 {
29789   /* Neon also supports V2SImode, etc. listed in the clause below.  */
29790   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29791       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29792       || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29793       || mode == V8BFmode))
29794     return true;
29795
29796   if ((TARGET_NEON || TARGET_IWMMXT)
29797       && ((mode == V2SImode)
29798           || (mode == V4HImode)
29799           || (mode == V8QImode)))
29800     return true;
29801
29802   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29803       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29804       || mode == V2HAmode))
29805     return true;
29806
29807   if (TARGET_HAVE_MVE
29808       && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode)))
29809     return true;
29810
29811   if (TARGET_HAVE_MVE_FLOAT
29812       && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29813     return true;
29814
29815   return false;
29816 }
29817
29818 /* Implements target hook array_mode.  */
29819 static opt_machine_mode
29820 arm_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
29821 {
29822   if (TARGET_HAVE_MVE
29823       /* MVE accepts only tuples of 2 or 4 vectors.  */
29824       && (nelems == 2
29825           || nelems == 4))
29826     {
29827       machine_mode struct_mode;
29828       FOR_EACH_MODE_IN_CLASS (struct_mode, GET_MODE_CLASS (mode))
29829         {
29830           if (GET_MODE_INNER (struct_mode) == GET_MODE_INNER (mode)
29831               && known_eq (GET_MODE_NUNITS (struct_mode),
29832                            GET_MODE_NUNITS (mode) * nelems))
29833             return struct_mode;
29834         }
29835     }
29836   return opt_machine_mode ();
29837 }
29838
29839 /* Implements target hook array_mode_supported_p.  */
29840
29841 static bool
29842 arm_array_mode_supported_p (machine_mode mode,
29843                             unsigned HOST_WIDE_INT nelems)
29844 {
29845   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29846      for now, as the lane-swapping logic needs to be extended in the expanders.
29847      See PR target/82518.  */
29848   if (TARGET_NEON && !BYTES_BIG_ENDIAN
29849       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29850       && (nelems >= 2 && nelems <= 4))
29851     return true;
29852
29853   if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29854       && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29855     return true;
29856
29857   return false;
29858 }
29859
29860 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29861    registers when autovectorizing for Neon, at least until multiple vector
29862    widths are supported properly by the middle-end.  */
29863
29864 static machine_mode
29865 arm_preferred_simd_mode (scalar_mode mode)
29866 {
29867   if (TARGET_NEON)
29868     switch (mode)
29869       {
29870       case E_HFmode:
29871         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29872       case E_SFmode:
29873         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29874       case E_SImode:
29875         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29876       case E_HImode:
29877         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29878       case E_QImode:
29879         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29880       case E_DImode:
29881         if (!TARGET_NEON_VECTORIZE_DOUBLE)
29882           return V2DImode;
29883         break;
29884
29885       default:;
29886       }
29887
29888   if (TARGET_REALLY_IWMMXT)
29889     switch (mode)
29890       {
29891       case E_SImode:
29892         return V2SImode;
29893       case E_HImode:
29894         return V4HImode;
29895       case E_QImode:
29896         return V8QImode;
29897
29898       default:;
29899       }
29900
29901   if (TARGET_HAVE_MVE)
29902     switch (mode)
29903       {
29904       case E_QImode:
29905         return V16QImode;
29906       case E_HImode:
29907         return V8HImode;
29908       case E_SImode:
29909         return V4SImode;
29910
29911       default:;
29912       }
29913
29914   if (TARGET_HAVE_MVE_FLOAT)
29915     switch (mode)
29916       {
29917       case E_HFmode:
29918         return V8HFmode;
29919       case E_SFmode:
29920         return V4SFmode;
29921
29922       default:;
29923       }
29924
29925   return word_mode;
29926 }
29927
29928 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29929
29930    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29931    using r0-r4 for function arguments, r7 for the stack frame and don't have
29932    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29933    potentially problematic instructions accept high registers so this is not
29934    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29935    that require many low registers.  */
29936 static bool
29937 arm_class_likely_spilled_p (reg_class_t rclass)
29938 {
29939   if ((TARGET_THUMB1 && rclass == LO_REGS)
29940       || rclass  == CC_REG)
29941     return true;
29942
29943   return default_class_likely_spilled_p (rclass);
29944 }
29945
29946 /* Implements target hook small_register_classes_for_mode_p.  */
29947 bool
29948 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29949 {
29950   return TARGET_THUMB1;
29951 }
29952
29953 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29954    ARM insns and therefore guarantee that the shift count is modulo 256.
29955    DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29956    guarantee no particular behavior for out-of-range counts.  */
29957
29958 static unsigned HOST_WIDE_INT
29959 arm_shift_truncation_mask (machine_mode mode)
29960 {
29961   return mode == SImode ? 255 : 0;
29962 }
29963
29964
29965 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29966
29967 unsigned int
29968 arm_debugger_regno (unsigned int regno)
29969 {
29970   if (regno < 16)
29971     return regno;
29972
29973   if (IS_VFP_REGNUM (regno))
29974     {
29975       /* See comment in arm_dwarf_register_span.  */
29976       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29977         return 64 + regno - FIRST_VFP_REGNUM;
29978       else
29979         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29980     }
29981
29982   if (IS_IWMMXT_GR_REGNUM (regno))
29983     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29984
29985   if (IS_IWMMXT_REGNUM (regno))
29986     return 112 + regno - FIRST_IWMMXT_REGNUM;
29987
29988   if (IS_PAC_REGNUM (regno))
29989     return DWARF_PAC_REGNUM;
29990
29991   return DWARF_FRAME_REGISTERS;
29992 }
29993
29994 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29995    GCC models tham as 64 32-bit registers, so we need to describe this to
29996    the DWARF generation code.  Other registers can use the default.  */
29997 static rtx
29998 arm_dwarf_register_span (rtx rtl)
29999 {
30000   machine_mode mode;
30001   unsigned regno;
30002   rtx parts[16];
30003   int nregs;
30004   int i;
30005
30006   regno = REGNO (rtl);
30007   if (!IS_VFP_REGNUM (regno))
30008     return NULL_RTX;
30009
30010   /* XXX FIXME: The EABI defines two VFP register ranges:
30011         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
30012         256-287: D0-D31
30013      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
30014      corresponding D register.  Until GDB supports this, we shall use the
30015      legacy encodings.  We also use these encodings for D0-D15 for
30016      compatibility with older debuggers.  */
30017   mode = GET_MODE (rtl);
30018   if (GET_MODE_SIZE (mode) < 8)
30019     return NULL_RTX;
30020
30021   if (VFP_REGNO_OK_FOR_SINGLE (regno))
30022     {
30023       nregs = GET_MODE_SIZE (mode) / 4;
30024       for (i = 0; i < nregs; i += 2)
30025         if (TARGET_BIG_END)
30026           {
30027             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
30028             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
30029           }
30030         else
30031           {
30032             parts[i] = gen_rtx_REG (SImode, regno + i);
30033             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
30034           }
30035     }
30036   else
30037     {
30038       nregs = GET_MODE_SIZE (mode) / 8;
30039       for (i = 0; i < nregs; i++)
30040         parts[i] = gen_rtx_REG (DImode, regno + i);
30041     }
30042
30043   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
30044 }
30045
30046 #if ARM_UNWIND_INFO
30047 /* Emit unwind directives for a store-multiple instruction or stack pointer
30048    push during alignment.
30049    These should only ever be generated by the function prologue code, so
30050    expect them to have a particular form.
30051    The store-multiple instruction sometimes pushes pc as the last register,
30052    although it should not be tracked into unwind information, or for -Os
30053    sometimes pushes some dummy registers before first register that needs
30054    to be tracked in unwind information; such dummy registers are there just
30055    to avoid separate stack adjustment, and will not be restored in the
30056    epilogue.  */
30057
30058 static void
30059 arm_unwind_emit_sequence (FILE * out_file, rtx p)
30060 {
30061   int i;
30062   HOST_WIDE_INT offset;
30063   HOST_WIDE_INT nregs;
30064   int reg_size;
30065   unsigned reg;
30066   unsigned lastreg;
30067   unsigned padfirst = 0, padlast = 0;
30068   rtx e;
30069
30070   e = XVECEXP (p, 0, 0);
30071   gcc_assert (GET_CODE (e) == SET);
30072
30073   /* First insn will adjust the stack pointer.  */
30074   gcc_assert (GET_CODE (e) == SET
30075               && REG_P (SET_DEST (e))
30076               && REGNO (SET_DEST (e)) == SP_REGNUM
30077               && GET_CODE (SET_SRC (e)) == PLUS);
30078
30079   offset = -INTVAL (XEXP (SET_SRC (e), 1));
30080   nregs = XVECLEN (p, 0) - 1;
30081   gcc_assert (nregs);
30082
30083   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
30084   if (reg < 16 || IS_PAC_REGNUM (reg))
30085     {
30086       /* For -Os dummy registers can be pushed at the beginning to
30087          avoid separate stack pointer adjustment.  */
30088       e = XVECEXP (p, 0, 1);
30089       e = XEXP (SET_DEST (e), 0);
30090       if (GET_CODE (e) == PLUS)
30091         padfirst = INTVAL (XEXP (e, 1));
30092       gcc_assert (padfirst == 0 || optimize_size);
30093       /* The function prologue may also push pc, but not annotate it as it is
30094          never restored.  We turn this into a stack pointer adjustment.  */
30095       e = XVECEXP (p, 0, nregs);
30096       e = XEXP (SET_DEST (e), 0);
30097       if (GET_CODE (e) == PLUS)
30098         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
30099       else
30100         padlast = offset - 4;
30101       gcc_assert (padlast == 0 || padlast == 4);
30102       if (padlast == 4)
30103         fprintf (out_file, "\t.pad #4\n");
30104       reg_size = 4;
30105       fprintf (out_file, "\t.save {");
30106     }
30107   else if (IS_VFP_REGNUM (reg))
30108     {
30109       reg_size = 8;
30110       fprintf (out_file, "\t.vsave {");
30111     }
30112   else
30113     /* Unknown register type.  */
30114     gcc_unreachable ();
30115
30116   /* If the stack increment doesn't match the size of the saved registers,
30117      something has gone horribly wrong.  */
30118   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
30119
30120   offset = padfirst;
30121   lastreg = 0;
30122   /* The remaining insns will describe the stores.  */
30123   for (i = 1; i <= nregs; i++)
30124     {
30125       /* Expect (set (mem <addr>) (reg)).
30126          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
30127       e = XVECEXP (p, 0, i);
30128       gcc_assert (GET_CODE (e) == SET
30129                   && MEM_P (SET_DEST (e))
30130                   && REG_P (SET_SRC (e)));
30131
30132       reg = REGNO (SET_SRC (e));
30133       gcc_assert (reg >= lastreg);
30134
30135       if (i != 1)
30136         fprintf (out_file, ", ");
30137       /* We can't use %r for vfp because we need to use the
30138          double precision register names.  */
30139       if (IS_VFP_REGNUM (reg))
30140         asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
30141       else if (IS_PAC_REGNUM (reg))
30142         asm_fprintf (asm_out_file, "ra_auth_code");
30143       else
30144         asm_fprintf (out_file, "%r", reg);
30145
30146       if (flag_checking)
30147         {
30148           /* Check that the addresses are consecutive.  */
30149           e = XEXP (SET_DEST (e), 0);
30150           if (GET_CODE (e) == PLUS)
30151             gcc_assert (REG_P (XEXP (e, 0))
30152                         && REGNO (XEXP (e, 0)) == SP_REGNUM
30153                         && CONST_INT_P (XEXP (e, 1))
30154                         && offset == INTVAL (XEXP (e, 1)));
30155           else
30156             gcc_assert (i == 1
30157                         && REG_P (e)
30158                         && REGNO (e) == SP_REGNUM);
30159           offset += reg_size;
30160         }
30161     }
30162   fprintf (out_file, "}\n");
30163   if (padfirst)
30164     fprintf (out_file, "\t.pad #%d\n", padfirst);
30165 }
30166
30167 /*  Emit unwind directives for a SET.  */
30168
30169 static void
30170 arm_unwind_emit_set (FILE * out_file, rtx p)
30171 {
30172   rtx e0;
30173   rtx e1;
30174   unsigned reg;
30175
30176   e0 = XEXP (p, 0);
30177   e1 = XEXP (p, 1);
30178   switch (GET_CODE (e0))
30179     {
30180     case MEM:
30181       /* Pushing a single register.  */
30182       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
30183           || !REG_P (XEXP (XEXP (e0, 0), 0))
30184           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
30185         abort ();
30186
30187       asm_fprintf (out_file, "\t.save ");
30188       if (IS_VFP_REGNUM (REGNO (e1)))
30189         asm_fprintf(out_file, "{d%d}\n",
30190                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
30191       else
30192         asm_fprintf(out_file, "{%r}\n", REGNO (e1));
30193       break;
30194
30195     case REG:
30196       if (REGNO (e0) == SP_REGNUM)
30197         {
30198           /* A stack increment.  */
30199           if (GET_CODE (e1) != PLUS
30200               || !REG_P (XEXP (e1, 0))
30201               || REGNO (XEXP (e1, 0)) != SP_REGNUM
30202               || !CONST_INT_P (XEXP (e1, 1)))
30203             abort ();
30204
30205           asm_fprintf (out_file, "\t.pad #%wd\n",
30206                        -INTVAL (XEXP (e1, 1)));
30207         }
30208       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
30209         {
30210           HOST_WIDE_INT offset;
30211
30212           if (GET_CODE (e1) == PLUS)
30213             {
30214               if (!REG_P (XEXP (e1, 0))
30215                   || !CONST_INT_P (XEXP (e1, 1)))
30216                 abort ();
30217               reg = REGNO (XEXP (e1, 0));
30218               offset = INTVAL (XEXP (e1, 1));
30219               asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
30220                            HARD_FRAME_POINTER_REGNUM, reg,
30221                            offset);
30222             }
30223           else if (REG_P (e1))
30224             {
30225               reg = REGNO (e1);
30226               asm_fprintf (out_file, "\t.setfp %r, %r\n",
30227                            HARD_FRAME_POINTER_REGNUM, reg);
30228             }
30229           else
30230             abort ();
30231         }
30232       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
30233         {
30234           /* Move from sp to reg.  */
30235           asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
30236         }
30237       else if (GET_CODE (e1) == PLUS
30238               && REG_P (XEXP (e1, 0))
30239               && REGNO (XEXP (e1, 0)) == SP_REGNUM
30240               && CONST_INT_P (XEXP (e1, 1)))
30241         {
30242           /* Set reg to offset from sp.  */
30243           asm_fprintf (out_file, "\t.movsp %r, #%d\n",
30244                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
30245         }
30246       else if (REGNO (e0) == IP_REGNUM && arm_current_function_pac_enabled_p ())
30247         {
30248           if (cfun->machine->pacspval_needed)
30249             asm_fprintf (out_file, "\t.pacspval\n");
30250         }
30251       else
30252         abort ();
30253       break;
30254
30255     default:
30256       abort ();
30257     }
30258 }
30259
30260
30261 /* Emit unwind directives for the given insn.  */
30262
30263 static void
30264 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
30265 {
30266   rtx note, pat;
30267   bool handled_one = false;
30268
30269   if (arm_except_unwind_info (&global_options) != UI_TARGET)
30270     return;
30271
30272   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30273       && (TREE_NOTHROW (current_function_decl)
30274           || crtl->all_throwers_are_sibcalls))
30275     return;
30276
30277   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
30278     return;
30279
30280   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
30281     {
30282       switch (REG_NOTE_KIND (note))
30283         {
30284         case REG_FRAME_RELATED_EXPR:
30285           pat = XEXP (note, 0);
30286           goto found;
30287
30288         case REG_CFA_REGISTER:
30289           pat = XEXP (note, 0);
30290           if (pat == NULL)
30291             {
30292               pat = PATTERN (insn);
30293               if (GET_CODE (pat) == PARALLEL)
30294                 pat = XVECEXP (pat, 0, 0);
30295             }
30296
30297           /* Only emitted for IS_STACKALIGN re-alignment.  */
30298           {
30299             rtx dest, src;
30300             unsigned reg;
30301
30302             src = SET_SRC (pat);
30303             dest = SET_DEST (pat);
30304
30305             gcc_assert (src == stack_pointer_rtx
30306                         || IS_PAC_REGNUM (REGNO (src)));
30307             reg = REGNO (dest);
30308
30309             if (IS_PAC_REGNUM (REGNO (src)))
30310               arm_unwind_emit_set (out_file, PATTERN (insn));
30311             else
30312               asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30313                            reg + 0x90, reg);
30314           }
30315           handled_one = true;
30316           break;
30317
30318         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
30319            to get correct dwarf information for shrink-wrap.  We should not
30320            emit unwind information for it because these are used either for
30321            pretend arguments or notes to adjust sp and restore registers from
30322            stack.  */
30323         case REG_CFA_DEF_CFA:
30324         case REG_CFA_ADJUST_CFA:
30325         case REG_CFA_RESTORE:
30326           return;
30327
30328         case REG_CFA_EXPRESSION:
30329         case REG_CFA_OFFSET:
30330           /* ??? Only handling here what we actually emit.  */
30331           gcc_unreachable ();
30332
30333         default:
30334           break;
30335         }
30336     }
30337   if (handled_one)
30338     return;
30339   pat = PATTERN (insn);
30340  found:
30341
30342   switch (GET_CODE (pat))
30343     {
30344     case SET:
30345       arm_unwind_emit_set (out_file, pat);
30346       break;
30347
30348     case SEQUENCE:
30349       /* Store multiple.  */
30350       arm_unwind_emit_sequence (out_file, pat);
30351       break;
30352
30353     default:
30354       abort();
30355     }
30356 }
30357
30358
30359 /* Output a reference from a function exception table to the type_info
30360    object X.  The EABI specifies that the symbol should be relocated by
30361    an R_ARM_TARGET2 relocation.  */
30362
30363 static bool
30364 arm_output_ttype (rtx x)
30365 {
30366   fputs ("\t.word\t", asm_out_file);
30367   output_addr_const (asm_out_file, x);
30368   /* Use special relocations for symbol references.  */
30369   if (!CONST_INT_P (x))
30370     fputs ("(TARGET2)", asm_out_file);
30371   fputc ('\n', asm_out_file);
30372
30373   return TRUE;
30374 }
30375
30376 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
30377
30378 static void
30379 arm_asm_emit_except_personality (rtx personality)
30380 {
30381   fputs ("\t.personality\t", asm_out_file);
30382   output_addr_const (asm_out_file, personality);
30383   fputc ('\n', asm_out_file);
30384 }
30385 #endif /* ARM_UNWIND_INFO */
30386
30387 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
30388
30389 static void
30390 arm_asm_init_sections (void)
30391 {
30392 #if ARM_UNWIND_INFO
30393   exception_section = get_unnamed_section (0, output_section_asm_op,
30394                                            "\t.handlerdata");
30395 #endif /* ARM_UNWIND_INFO */
30396
30397 #ifdef OBJECT_FORMAT_ELF
30398   if (target_pure_code)
30399     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30400 #endif
30401 }
30402
30403 /* Output unwind directives for the start/end of a function.  */
30404
30405 void
30406 arm_output_fn_unwind (FILE * f, bool prologue)
30407 {
30408   if (arm_except_unwind_info (&global_options) != UI_TARGET)
30409     return;
30410
30411   if (prologue)
30412     fputs ("\t.fnstart\n", f);
30413   else
30414     {
30415       /* If this function will never be unwound, then mark it as such.
30416          The came condition is used in arm_unwind_emit to suppress
30417          the frame annotations.  */
30418       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30419           && (TREE_NOTHROW (current_function_decl)
30420               || crtl->all_throwers_are_sibcalls))
30421         fputs("\t.cantunwind\n", f);
30422
30423       fputs ("\t.fnend\n", f);
30424     }
30425 }
30426
30427 static bool
30428 arm_emit_tls_decoration (FILE *fp, rtx x)
30429 {
30430   enum tls_reloc reloc;
30431   rtx val;
30432
30433   val = XVECEXP (x, 0, 0);
30434   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30435
30436   output_addr_const (fp, val);
30437
30438   switch (reloc)
30439     {
30440     case TLS_GD32:
30441       fputs ("(tlsgd)", fp);
30442       break;
30443     case TLS_GD32_FDPIC:
30444       fputs ("(tlsgd_fdpic)", fp);
30445       break;
30446     case TLS_LDM32:
30447       fputs ("(tlsldm)", fp);
30448       break;
30449     case TLS_LDM32_FDPIC:
30450       fputs ("(tlsldm_fdpic)", fp);
30451       break;
30452     case TLS_LDO32:
30453       fputs ("(tlsldo)", fp);
30454       break;
30455     case TLS_IE32:
30456       fputs ("(gottpoff)", fp);
30457       break;
30458     case TLS_IE32_FDPIC:
30459       fputs ("(gottpoff_fdpic)", fp);
30460       break;
30461     case TLS_LE32:
30462       fputs ("(tpoff)", fp);
30463       break;
30464     case TLS_DESCSEQ:
30465       fputs ("(tlsdesc)", fp);
30466       break;
30467     default:
30468       gcc_unreachable ();
30469     }
30470
30471   switch (reloc)
30472     {
30473     case TLS_GD32:
30474     case TLS_LDM32:
30475     case TLS_IE32:
30476     case TLS_DESCSEQ:
30477       fputs (" + (. - ", fp);
30478       output_addr_const (fp, XVECEXP (x, 0, 2));
30479       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30480       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30481       output_addr_const (fp, XVECEXP (x, 0, 3));
30482       fputc (')', fp);
30483       break;
30484     default:
30485       break;
30486     }
30487
30488   return TRUE;
30489 }
30490
30491 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
30492
30493 static void
30494 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30495 {
30496   gcc_assert (size == 4);
30497   fputs ("\t.word\t", file);
30498   output_addr_const (file, x);
30499   fputs ("(tlsldo)", file);
30500 }
30501
30502 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
30503
30504 static bool
30505 arm_output_addr_const_extra (FILE *fp, rtx x)
30506 {
30507   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30508     return arm_emit_tls_decoration (fp, x);
30509   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30510     {
30511       char label[256];
30512       int labelno = INTVAL (XVECEXP (x, 0, 0));
30513
30514       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30515       assemble_name_raw (fp, label);
30516
30517       return TRUE;
30518     }
30519   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30520     {
30521       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30522       if (GOT_PCREL)
30523         fputs ("+.", fp);
30524       fputs ("-(", fp);
30525       output_addr_const (fp, XVECEXP (x, 0, 0));
30526       fputc (')', fp);
30527       return TRUE;
30528     }
30529   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30530     {
30531       output_addr_const (fp, XVECEXP (x, 0, 0));
30532       if (GOT_PCREL)
30533         fputs ("+.", fp);
30534       fputs ("-(", fp);
30535       output_addr_const (fp, XVECEXP (x, 0, 1));
30536       fputc (')', fp);
30537       return TRUE;
30538     }
30539   else if (GET_CODE (x) == CONST_VECTOR)
30540     return arm_emit_vector_const (fp, x);
30541
30542   return FALSE;
30543 }
30544
30545 /* Output assembly for a shift instruction.
30546    SET_FLAGS determines how the instruction modifies the condition codes.
30547    0 - Do not set condition codes.
30548    1 - Set condition codes.
30549    2 - Use smallest instruction.  */
30550 const char *
30551 arm_output_shift(rtx * operands, int set_flags)
30552 {
30553   char pattern[100];
30554   static const char flag_chars[3] = {'?', '.', '!'};
30555   const char *shift;
30556   HOST_WIDE_INT val;
30557   char c;
30558
30559   c = flag_chars[set_flags];
30560   shift = shift_op(operands[3], &val);
30561   if (shift)
30562     {
30563       if (val != -1)
30564         operands[2] = GEN_INT(val);
30565       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30566     }
30567   else
30568     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30569
30570   output_asm_insn (pattern, operands);
30571   return "";
30572 }
30573
30574 /* Output assembly for a WMMX immediate shift instruction.  */
30575 const char *
30576 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30577 {
30578   int shift = INTVAL (operands[2]);
30579   char templ[50];
30580   machine_mode opmode = GET_MODE (operands[0]);
30581
30582   gcc_assert (shift >= 0);
30583
30584   /* If the shift value in the register versions is > 63 (for D qualifier),
30585      31 (for W qualifier) or 15 (for H qualifier).  */
30586   if (((opmode == V4HImode) && (shift > 15))
30587         || ((opmode == V2SImode) && (shift > 31))
30588         || ((opmode == DImode) && (shift > 63)))
30589   {
30590     if (wror_or_wsra)
30591       {
30592         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30593         output_asm_insn (templ, operands);
30594         if (opmode == DImode)
30595           {
30596             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30597             output_asm_insn (templ, operands);
30598           }
30599       }
30600     else
30601       {
30602         /* The destination register will contain all zeros.  */
30603         sprintf (templ, "wzero\t%%0");
30604         output_asm_insn (templ, operands);
30605       }
30606     return "";
30607   }
30608
30609   if ((opmode == DImode) && (shift > 32))
30610     {
30611       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30612       output_asm_insn (templ, operands);
30613       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30614       output_asm_insn (templ, operands);
30615     }
30616   else
30617     {
30618       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30619       output_asm_insn (templ, operands);
30620     }
30621   return "";
30622 }
30623
30624 /* Output assembly for a WMMX tinsr instruction.  */
30625 const char *
30626 arm_output_iwmmxt_tinsr (rtx *operands)
30627 {
30628   int mask = INTVAL (operands[3]);
30629   int i;
30630   char templ[50];
30631   int units = mode_nunits[GET_MODE (operands[0])];
30632   gcc_assert ((mask & (mask - 1)) == 0);
30633   for (i = 0; i < units; ++i)
30634     {
30635       if ((mask & 0x01) == 1)
30636         {
30637           break;
30638         }
30639       mask >>= 1;
30640     }
30641   gcc_assert (i < units);
30642   {
30643     switch (GET_MODE (operands[0]))
30644       {
30645       case E_V8QImode:
30646         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30647         break;
30648       case E_V4HImode:
30649         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30650         break;
30651       case E_V2SImode:
30652         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30653         break;
30654       default:
30655         gcc_unreachable ();
30656         break;
30657       }
30658     output_asm_insn (templ, operands);
30659   }
30660   return "";
30661 }
30662
30663 /* Output an arm casesi dispatch sequence.  Used by arm_casesi_internal insn.
30664    Responsible for the handling of switch statements in arm.  */
30665 const char *
30666 arm_output_casesi (rtx *operands)
30667 {
30668   char label[100];
30669   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30670   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30671   output_asm_insn ("cmp\t%0, %1", operands);
30672   output_asm_insn ("bhi\t%l3", operands);
30673   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
30674   switch (GET_MODE (diff_vec))
30675     {
30676     case E_QImode:
30677       if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30678         output_asm_insn ("ldrb\t%4, [%5, %0]", operands);
30679       else
30680         output_asm_insn ("ldrsb\t%4, [%5, %0]", operands);
30681       output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30682       break;
30683     case E_HImode:
30684       if (REGNO (operands[4]) != REGNO (operands[5]))
30685         {
30686           output_asm_insn ("add\t%4, %0, %0", operands);
30687           if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30688             output_asm_insn ("ldrh\t%4, [%5, %4]", operands);
30689           else
30690             output_asm_insn ("ldrsh\t%4, [%5, %4]", operands);
30691         }
30692       else
30693         {
30694           output_asm_insn ("add\t%4, %5, %0", operands);
30695           if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30696             output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
30697           else
30698             output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
30699         }
30700       output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30701       break;
30702     case E_SImode:
30703       if (flag_pic)
30704         {
30705           output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands);
30706           output_asm_insn ("add\t%|pc, %|pc, %4", operands);
30707         }
30708       else
30709         output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);
30710       break;
30711     default:
30712       gcc_unreachable ();
30713     }
30714     assemble_label (asm_out_file, label);
30715     output_asm_insn ("nop", operands);
30716   return "";
30717 }
30718
30719 /* Output a Thumb-1 casesi dispatch sequence.  */
30720 const char *
30721 thumb1_output_casesi (rtx *operands)
30722 {
30723   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30724
30725   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30726
30727   switch (GET_MODE(diff_vec))
30728     {
30729     case E_QImode:
30730       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30731               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30732     case E_HImode:
30733       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30734               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30735     case E_SImode:
30736       return "bl\t%___gnu_thumb1_case_si";
30737     default:
30738       gcc_unreachable ();
30739     }
30740 }
30741
30742 /* Output a Thumb-2 casesi instruction.  */
30743 const char *
30744 thumb2_output_casesi (rtx *operands)
30745 {
30746   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30747
30748   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30749
30750   output_asm_insn ("cmp\t%0, %1", operands);
30751   output_asm_insn ("bhi\t%l3", operands);
30752   switch (GET_MODE(diff_vec))
30753     {
30754     case E_QImode:
30755       return "tbb\t[%|pc, %0]";
30756     case E_HImode:
30757       return "tbh\t[%|pc, %0, lsl #1]";
30758     case E_SImode:
30759       if (flag_pic)
30760         {
30761           output_asm_insn ("adr\t%4, %l2", operands);
30762           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30763           output_asm_insn ("add\t%4, %4, %5", operands);
30764           return "bx\t%4";
30765         }
30766       else
30767         {
30768           output_asm_insn ("adr\t%4, %l2", operands);
30769           return "ldr\t%|pc, [%4, %0, lsl #2]";
30770         }
30771     default:
30772       gcc_unreachable ();
30773     }
30774 }
30775
30776 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
30777    per-core tuning structs.  */
30778 static int
30779 arm_issue_rate (void)
30780 {
30781   return current_tune->issue_rate;
30782 }
30783
30784 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
30785 static int
30786 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30787 {
30788   if (DEBUG_INSN_P (insn))
30789     return more;
30790
30791   rtx_code code = GET_CODE (PATTERN (insn));
30792   if (code == USE || code == CLOBBER)
30793     return more;
30794
30795   if (get_attr_type (insn) == TYPE_NO_INSN)
30796     return more;
30797
30798   return more - 1;
30799 }
30800
30801 /* Return how many instructions should scheduler lookahead to choose the
30802    best one.  */
30803 static int
30804 arm_first_cycle_multipass_dfa_lookahead (void)
30805 {
30806   int issue_rate = arm_issue_rate ();
30807
30808   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30809 }
30810
30811 /* Enable modeling of L2 auto-prefetcher.  */
30812 static int
30813 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30814 {
30815   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30816 }
30817
30818 const char *
30819 arm_mangle_type (const_tree type)
30820 {
30821   /* The ARM ABI documents (10th October 2008) say that "__va_list"
30822      has to be managled as if it is in the "std" namespace.  */
30823   if (TARGET_AAPCS_BASED
30824       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30825     return "St9__va_list";
30826
30827   /* Half-precision floating point types.  */
30828   if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
30829     {
30830       if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30831         return NULL;
30832       if (TYPE_MODE (type) == BFmode)
30833         return "u6__bf16";
30834       else
30835         return "Dh";
30836     }
30837
30838   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30839      builtin type.  */
30840   if (TYPE_NAME (type) != NULL)
30841     return arm_mangle_builtin_type (type);
30842
30843   /* Use the default mangling.  */
30844   return NULL;
30845 }
30846
30847 /* Order of allocation of core registers for Thumb: this allocation is
30848    written over the corresponding initial entries of the array
30849    initialized with REG_ALLOC_ORDER.  We allocate all low registers
30850    first.  Saving and restoring a low register is usually cheaper than
30851    using a call-clobbered high register.  */
30852
30853 static const int thumb_core_reg_alloc_order[] =
30854 {
30855    3,  2,  1,  0,  4,  5,  6,  7,
30856   12, 14,  8,  9, 10, 11
30857 };
30858
30859 /* Adjust register allocation order when compiling for Thumb.  */
30860
30861 void
30862 arm_order_regs_for_local_alloc (void)
30863 {
30864   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30865   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30866   if (TARGET_THUMB)
30867     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30868             sizeof (thumb_core_reg_alloc_order));
30869 }
30870
30871 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
30872
30873 bool
30874 arm_frame_pointer_required (void)
30875 {
30876   if (SUBTARGET_FRAME_POINTER_REQUIRED)
30877     return true;
30878
30879   /* If the function receives nonlocal gotos, it needs to save the frame
30880      pointer in the nonlocal_goto_save_area object.  */
30881   if (cfun->has_nonlocal_label)
30882     return true;
30883
30884   /* The frame pointer is required for non-leaf APCS frames.  */
30885   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30886     return true;
30887
30888   /* If we are probing the stack in the prologue, we will have a faulting
30889      instruction prior to the stack adjustment and this requires a frame
30890      pointer if we want to catch the exception using the EABI unwinder.  */
30891   if (!IS_INTERRUPT (arm_current_func_type ())
30892       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30893           || flag_stack_clash_protection)
30894       && arm_except_unwind_info (&global_options) == UI_TARGET
30895       && cfun->can_throw_non_call_exceptions)
30896     {
30897       HOST_WIDE_INT size = get_frame_size ();
30898
30899       /* That's irrelevant if there is no stack adjustment.  */
30900       if (size <= 0)
30901         return false;
30902
30903       /* That's relevant only if there is a stack probe.  */
30904       if (crtl->is_leaf && !cfun->calls_alloca)
30905         {
30906           /* We don't have the final size of the frame so adjust.  */
30907           size += 32 * UNITS_PER_WORD;
30908           if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30909             return true;
30910         }
30911       else
30912         return true;
30913     }
30914
30915   return false;
30916 }
30917
30918 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30919    All modes except THUMB1 have conditional execution.
30920    If we have conditional arithmetic, return false before reload to
30921    enable some ifcvt transformations. */
30922 static bool
30923 arm_have_conditional_execution (void)
30924 {
30925   bool has_cond_exec, enable_ifcvt_trans;
30926
30927   /* Only THUMB1 cannot support conditional execution. */
30928   has_cond_exec = !TARGET_THUMB1;
30929
30930   /* Enable ifcvt transformations if we have conditional arithmetic, but only
30931      before reload. */
30932   enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30933
30934   return has_cond_exec && !enable_ifcvt_trans;
30935 }
30936
30937 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
30938 static HOST_WIDE_INT
30939 arm_vector_alignment (const_tree type)
30940 {
30941   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30942
30943   if (TARGET_AAPCS_BASED)
30944     align = MIN (align, 64);
30945
30946   return align;
30947 }
30948
30949 static unsigned int
30950 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30951 {
30952   if (!TARGET_NEON_VECTORIZE_DOUBLE)
30953     {
30954       modes->safe_push (V16QImode);
30955       modes->safe_push (V8QImode);
30956     }
30957   return 0;
30958 }
30959
30960 static bool
30961 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30962 {
30963   /* Vectors which aren't in packed structures will not be less aligned than
30964      the natural alignment of their element type, so this is safe.  */
30965   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30966     return !is_packed;
30967
30968   return default_builtin_vector_alignment_reachable (type, is_packed);
30969 }
30970
30971 static bool
30972 arm_builtin_support_vector_misalignment (machine_mode mode,
30973                                          const_tree type, int misalignment,
30974                                          bool is_packed)
30975 {
30976   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30977     {
30978       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30979
30980       if (is_packed)
30981         return align == 1;
30982
30983       /* If the misalignment is unknown, we should be able to handle the access
30984          so long as it is not to a member of a packed data structure.  */
30985       if (misalignment == -1)
30986         return true;
30987
30988       /* Return true if the misalignment is a multiple of the natural alignment
30989          of the vector's element type.  This is probably always going to be
30990          true in practice, since we've already established that this isn't a
30991          packed access.  */
30992       return ((misalignment % align) == 0);
30993     }
30994
30995   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30996                                                       is_packed);
30997 }
30998
30999 static void
31000 arm_conditional_register_usage (void)
31001 {
31002   int regno;
31003
31004   if (TARGET_THUMB1 && optimize_size)
31005     {
31006       /* When optimizing for size on Thumb-1, it's better not
31007         to use the HI regs, because of the overhead of
31008         stacking them.  */
31009       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
31010         fixed_regs[regno] = call_used_regs[regno] = 1;
31011     }
31012
31013   /* The link register can be clobbered by any branch insn,
31014      but we have no way to track that at present, so mark
31015      it as unavailable.  */
31016   if (TARGET_THUMB1)
31017     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
31018
31019   if (TARGET_32BIT && TARGET_VFP_BASE)
31020     {
31021       /* VFPv3 registers are disabled when earlier VFP
31022          versions are selected due to the definition of
31023          LAST_VFP_REGNUM.  */
31024       for (regno = FIRST_VFP_REGNUM;
31025            regno <= LAST_VFP_REGNUM; ++ regno)
31026         {
31027           fixed_regs[regno] = 0;
31028           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
31029             || regno >= FIRST_VFP_REGNUM + 32;
31030         }
31031       if (TARGET_HAVE_MVE)
31032         fixed_regs[VPR_REGNUM] = 0;
31033     }
31034
31035   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
31036     {
31037       regno = FIRST_IWMMXT_GR_REGNUM;
31038       /* The 2002/10/09 revision of the XScale ABI has wCG0
31039          and wCG1 as call-preserved registers.  The 2002/11/21
31040          revision changed this so that all wCG registers are
31041          scratch registers.  */
31042       for (regno = FIRST_IWMMXT_GR_REGNUM;
31043            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
31044         fixed_regs[regno] = 0;
31045       /* The XScale ABI has wR0 - wR9 as scratch registers,
31046          the rest as call-preserved registers.  */
31047       for (regno = FIRST_IWMMXT_REGNUM;
31048            regno <= LAST_IWMMXT_REGNUM; ++ regno)
31049         {
31050           fixed_regs[regno] = 0;
31051           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
31052         }
31053     }
31054
31055   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
31056     {
31057       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
31058       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
31059     }
31060   else if (TARGET_APCS_STACK)
31061     {
31062       fixed_regs[10]     = 1;
31063       call_used_regs[10] = 1;
31064     }
31065   /* -mcaller-super-interworking reserves r11 for calls to
31066      _interwork_r11_call_via_rN().  Making the register global
31067      is an easy way of ensuring that it remains valid for all
31068      calls.  */
31069   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
31070       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
31071     {
31072       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
31073       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
31074       if (TARGET_CALLER_INTERWORKING)
31075         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
31076     }
31077
31078   /* The Q and GE bits are only accessed via special ACLE patterns.  */
31079   CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
31080   CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
31081
31082   SUBTARGET_CONDITIONAL_REGISTER_USAGE
31083 }
31084
31085 static reg_class_t
31086 arm_preferred_rename_class (reg_class_t rclass)
31087 {
31088   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
31089      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
31090      and code size can be reduced.  */
31091   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
31092     return LO_REGS;
31093   else
31094     return NO_REGS;
31095 }
31096
31097 /* Compute the attribute "length" of insn "*push_multi".
31098    So this function MUST be kept in sync with that insn pattern.  */
31099 int
31100 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
31101 {
31102   int i, regno, hi_reg;
31103   int num_saves = XVECLEN (parallel_op, 0);
31104
31105   /* ARM mode.  */
31106   if (TARGET_ARM)
31107     return 4;
31108   /* Thumb1 mode.  */
31109   if (TARGET_THUMB1)
31110     return 2;
31111
31112   /* Thumb2 mode.  */
31113   regno = REGNO (first_op);
31114   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
31115      list is 8-bit.  Normally this means all registers in the list must be
31116      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
31117      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
31118      with 16-bit encoding.  */
31119   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
31120   for (i = 1; i < num_saves && !hi_reg; i++)
31121     {
31122       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
31123       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
31124     }
31125
31126   if (!hi_reg)
31127     return 2;
31128   return 4;
31129 }
31130
31131 /* Compute the attribute "length" of insn.  Currently, this function is used
31132    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
31133    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
31134    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
31135    true if OPERANDS contains insn which explicit updates base register.  */
31136
31137 int
31138 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
31139 {
31140   /* ARM mode.  */
31141   if (TARGET_ARM)
31142     return 4;
31143   /* Thumb1 mode.  */
31144   if (TARGET_THUMB1)
31145     return 2;
31146
31147   rtx parallel_op = operands[0];
31148   /* Initialize to elements number of PARALLEL.  */
31149   unsigned indx = XVECLEN (parallel_op, 0) - 1;
31150   /* Initialize the value to base register.  */
31151   unsigned regno = REGNO (operands[1]);
31152   /* Skip return and write back pattern.
31153      We only need register pop pattern for later analysis.  */
31154   unsigned first_indx = 0;
31155   first_indx += return_pc ? 1 : 0;
31156   first_indx += write_back_p ? 1 : 0;
31157
31158   /* A pop operation can be done through LDM or POP.  If the base register is SP
31159      and if it's with write back, then a LDM will be alias of POP.  */
31160   bool pop_p = (regno == SP_REGNUM && write_back_p);
31161   bool ldm_p = !pop_p;
31162
31163   /* Check base register for LDM.  */
31164   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
31165     return 4;
31166
31167   /* Check each register in the list.  */
31168   for (; indx >= first_indx; indx--)
31169     {
31170       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
31171       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
31172          comment in arm_attr_length_push_multi.  */
31173       if (REGNO_REG_CLASS (regno) == HI_REGS
31174           && (regno != PC_REGNUM || ldm_p))
31175         return 4;
31176     }
31177
31178   return 2;
31179 }
31180
31181 /* Compute the number of instructions emitted by output_move_double.  */
31182 int
31183 arm_count_output_move_double_insns (rtx *operands)
31184 {
31185   int count;
31186   rtx ops[2];
31187   /* output_move_double may modify the operands array, so call it
31188      here on a copy of the array.  */
31189   ops[0] = operands[0];
31190   ops[1] = operands[1];
31191   output_move_double (ops, false, &count);
31192   return count;
31193 }
31194
31195 /* Same as above, but operands are a register/memory pair in SImode.
31196    Assumes operands has the base register in position 0 and memory in position
31197    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
31198 int
31199 arm_count_ldrdstrd_insns (rtx *operands, bool load)
31200 {
31201   int count;
31202   rtx ops[2];
31203   int regnum, memnum;
31204   if (load)
31205     regnum = 0, memnum = 1;
31206   else
31207     regnum = 1, memnum = 0;
31208   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
31209   ops[memnum] = adjust_address (operands[2], DImode, 0);
31210   output_move_double (ops, false, &count);
31211   return count;
31212 }
31213
31214
31215 int
31216 vfp3_const_double_for_fract_bits (rtx operand)
31217 {
31218   REAL_VALUE_TYPE r0;
31219
31220   if (!CONST_DOUBLE_P (operand))
31221     return 0;
31222
31223   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
31224   if (exact_real_inverse (DFmode, &r0)
31225       && !REAL_VALUE_NEGATIVE (r0))
31226     {
31227       if (exact_real_truncate (DFmode, &r0))
31228         {
31229           HOST_WIDE_INT value = real_to_integer (&r0);
31230           value = value & 0xffffffff;
31231           if ((value != 0) && ( (value & (value - 1)) == 0))
31232             {
31233               int ret = exact_log2 (value);
31234               gcc_assert (IN_RANGE (ret, 0, 31));
31235               return ret;
31236             }
31237         }
31238     }
31239   return 0;
31240 }
31241
31242 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
31243    log2 is in [1, 32], return that log2.  Otherwise return -1.
31244    This is used in the patterns for vcvt.s32.f32 floating-point to
31245    fixed-point conversions.  */
31246
31247 int
31248 vfp3_const_double_for_bits (rtx x)
31249 {
31250   const REAL_VALUE_TYPE *r;
31251
31252   if (!CONST_DOUBLE_P (x))
31253     return -1;
31254
31255   r = CONST_DOUBLE_REAL_VALUE (x);
31256
31257   if (REAL_VALUE_NEGATIVE (*r)
31258       || REAL_VALUE_ISNAN (*r)
31259       || REAL_VALUE_ISINF (*r)
31260       || !real_isinteger (r, SFmode))
31261     return -1;
31262
31263   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
31264
31265 /* The exact_log2 above will have returned -1 if this is
31266    not an exact log2.  */
31267   if (!IN_RANGE (hwint, 1, 32))
31268     return -1;
31269
31270   return hwint;
31271 }
31272
31273 \f
31274 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
31275
31276 static void
31277 arm_pre_atomic_barrier (enum memmodel model)
31278 {
31279   if (need_atomic_barrier_p (model, true))
31280     emit_insn (gen_memory_barrier ());
31281 }
31282
31283 static void
31284 arm_post_atomic_barrier (enum memmodel model)
31285 {
31286   if (need_atomic_barrier_p (model, false))
31287     emit_insn (gen_memory_barrier ());
31288 }
31289
31290 /* Emit the load-exclusive and store-exclusive instructions.
31291    Use acquire and release versions if necessary.  */
31292
31293 static void
31294 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
31295 {
31296   rtx (*gen) (rtx, rtx);
31297
31298   if (acq)
31299     {
31300       switch (mode)
31301         {
31302         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
31303         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
31304         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
31305         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
31306         default:
31307           gcc_unreachable ();
31308         }
31309     }
31310   else
31311     {
31312       switch (mode)
31313         {
31314         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
31315         case E_HImode: gen = gen_arm_load_exclusivehi; break;
31316         case E_SImode: gen = gen_arm_load_exclusivesi; break;
31317         case E_DImode: gen = gen_arm_load_exclusivedi; break;
31318         default:
31319           gcc_unreachable ();
31320         }
31321     }
31322
31323   emit_insn (gen (rval, mem));
31324 }
31325
31326 static void
31327 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
31328                           rtx mem, bool rel)
31329 {
31330   rtx (*gen) (rtx, rtx, rtx);
31331
31332   if (rel)
31333     {
31334       switch (mode)
31335         {
31336         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
31337         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
31338         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
31339         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
31340         default:
31341           gcc_unreachable ();
31342         }
31343     }
31344   else
31345     {
31346       switch (mode)
31347         {
31348         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
31349         case E_HImode: gen = gen_arm_store_exclusivehi; break;
31350         case E_SImode: gen = gen_arm_store_exclusivesi; break;
31351         case E_DImode: gen = gen_arm_store_exclusivedi; break;
31352         default:
31353           gcc_unreachable ();
31354         }
31355     }
31356
31357   emit_insn (gen (bval, rval, mem));
31358 }
31359
31360 /* Mark the previous jump instruction as unlikely.  */
31361
31362 static void
31363 emit_unlikely_jump (rtx insn)
31364 {
31365   rtx_insn *jump = emit_jump_insn (insn);
31366   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
31367 }
31368
31369 /* Expand a compare and swap pattern.  */
31370
31371 void
31372 arm_expand_compare_and_swap (rtx operands[])
31373 {
31374   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
31375   machine_mode mode, cmp_mode;
31376
31377   bval = operands[0];
31378   rval = operands[1];
31379   mem = operands[2];
31380   oldval = operands[3];
31381   newval = operands[4];
31382   is_weak = operands[5];
31383   mod_s = operands[6];
31384   mod_f = operands[7];
31385   mode = GET_MODE (mem);
31386
31387   /* Normally the succ memory model must be stronger than fail, but in the
31388      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31389      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
31390
31391   if (TARGET_HAVE_LDACQ
31392       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
31393       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
31394     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
31395
31396   switch (mode)
31397     {
31398     case E_QImode:
31399     case E_HImode:
31400       /* For narrow modes, we're going to perform the comparison in SImode,
31401          so do the zero-extension now.  */
31402       rval = gen_reg_rtx (SImode);
31403       oldval = convert_modes (SImode, mode, oldval, true);
31404       /* FALLTHRU */
31405
31406     case E_SImode:
31407       /* Force the value into a register if needed.  We waited until after
31408          the zero-extension above to do this properly.  */
31409       if (!arm_add_operand (oldval, SImode))
31410         oldval = force_reg (SImode, oldval);
31411       break;
31412
31413     case E_DImode:
31414       if (!cmpdi_operand (oldval, mode))
31415         oldval = force_reg (mode, oldval);
31416       break;
31417
31418     default:
31419       gcc_unreachable ();
31420     }
31421
31422   if (TARGET_THUMB1)
31423     cmp_mode = E_SImode;
31424   else
31425     cmp_mode = CC_Zmode;
31426
31427   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
31428   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
31429                                         oldval, newval, is_weak, mod_s, mod_f));
31430
31431   if (mode == QImode || mode == HImode)
31432     emit_move_insn (operands[1], gen_lowpart (mode, rval));
31433
31434   /* In all cases, we arrange for success to be signaled by Z set.
31435      This arrangement allows for the boolean result to be used directly
31436      in a subsequent branch, post optimization.  For Thumb-1 targets, the
31437      boolean negation of the result is also stored in bval because Thumb-1
31438      backend lacks dependency tracking for CC flag due to flag-setting not
31439      being represented at RTL level.  */
31440   if (TARGET_THUMB1)
31441       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31442   else
31443     {
31444       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31445       emit_insn (gen_rtx_SET (bval, x));
31446     }
31447 }
31448
31449 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
31450    another memory store between the load-exclusive and store-exclusive can
31451    reset the monitor from Exclusive to Open state.  This means we must wait
31452    until after reload to split the pattern, lest we get a register spill in
31453    the middle of the atomic sequence.  Success of the compare and swap is
31454    indicated by the Z flag set for 32bit targets and by neg_bval being zero
31455    for Thumb-1 targets (ie. negation of the boolean value returned by
31456    atomic_compare_and_swapmode standard pattern in operand 0).  */
31457
31458 void
31459 arm_split_compare_and_swap (rtx operands[])
31460 {
31461   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31462   machine_mode mode;
31463   enum memmodel mod_s, mod_f;
31464   bool is_weak;
31465   rtx_code_label *label1, *label2;
31466   rtx x, cond;
31467
31468   rval = operands[1];
31469   mem = operands[2];
31470   oldval = operands[3];
31471   newval = operands[4];
31472   is_weak = (operands[5] != const0_rtx);
31473   mod_s_rtx = operands[6];
31474   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31475   mod_f = memmodel_from_int (INTVAL (operands[7]));
31476   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31477   mode = GET_MODE (mem);
31478
31479   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31480
31481   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31482   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31483
31484   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
31485      a full barrier is emitted after the store-release.  */
31486   if (is_armv8_sync)
31487     use_acquire = false;
31488
31489   /* Checks whether a barrier is needed and emits one accordingly.  */
31490   if (!(use_acquire || use_release))
31491     arm_pre_atomic_barrier (mod_s);
31492
31493   label1 = NULL;
31494   if (!is_weak)
31495     {
31496       label1 = gen_label_rtx ();
31497       emit_label (label1);
31498     }
31499   label2 = gen_label_rtx ();
31500
31501   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31502
31503   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31504      as required to communicate with arm_expand_compare_and_swap.  */
31505   if (TARGET_32BIT)
31506     {
31507       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31508       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31509       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31510                                 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31511       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31512     }
31513   else
31514     {
31515       cond = gen_rtx_NE (VOIDmode, rval, oldval);
31516       if (thumb1_cmpneg_operand (oldval, SImode))
31517         {
31518           rtx src = rval;
31519           if (!satisfies_constraint_L (oldval))
31520             {
31521               gcc_assert (satisfies_constraint_J (oldval));
31522
31523               /* For such immediates, ADDS needs the source and destination regs
31524                  to be the same.
31525
31526                  Normally this would be handled by RA, but this is all happening
31527                  after RA.  */
31528               emit_move_insn (neg_bval, rval);
31529               src = neg_bval;
31530             }
31531
31532           emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31533                                                        label2, cond));
31534         }
31535       else
31536         {
31537           emit_move_insn (neg_bval, const1_rtx);
31538           emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31539         }
31540     }
31541
31542   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31543
31544   /* Weak or strong, we want EQ to be true for success, so that we
31545      match the flags that we got from the compare above.  */
31546   if (TARGET_32BIT)
31547     {
31548       cond = gen_rtx_REG (CCmode, CC_REGNUM);
31549       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31550       emit_insn (gen_rtx_SET (cond, x));
31551     }
31552
31553   if (!is_weak)
31554     {
31555       /* Z is set to boolean value of !neg_bval, as required to communicate
31556          with arm_expand_compare_and_swap.  */
31557       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31558       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31559     }
31560
31561   if (!is_mm_relaxed (mod_f))
31562     emit_label (label2);
31563
31564   /* Checks whether a barrier is needed and emits one accordingly.  */
31565   if (is_armv8_sync
31566       || !(use_acquire || use_release))
31567     arm_post_atomic_barrier (mod_s);
31568
31569   if (is_mm_relaxed (mod_f))
31570     emit_label (label2);
31571 }
31572
31573 /* Split an atomic operation pattern.  Operation is given by CODE and is one
31574    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31575    operation).  Operation is performed on the content at MEM and on VALUE
31576    following the memory model MODEL_RTX.  The content at MEM before and after
31577    the operation is returned in OLD_OUT and NEW_OUT respectively while the
31578    success of the operation is returned in COND.  Using a scratch register or
31579    an operand register for these determines what result is returned for that
31580    pattern.  */
31581
31582 void
31583 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31584                      rtx value, rtx model_rtx, rtx cond)
31585 {
31586   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31587   machine_mode mode = GET_MODE (mem);
31588   machine_mode wmode = (mode == DImode ? DImode : SImode);
31589   rtx_code_label *label;
31590   bool all_low_regs, bind_old_new;
31591   rtx x;
31592
31593   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31594
31595   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31596   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31597
31598   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
31599      a full barrier is emitted after the store-release.  */
31600   if (is_armv8_sync)
31601     use_acquire = false;
31602
31603   /* Checks whether a barrier is needed and emits one accordingly.  */
31604   if (!(use_acquire || use_release))
31605     arm_pre_atomic_barrier (model);
31606
31607   label = gen_label_rtx ();
31608   emit_label (label);
31609
31610   if (new_out)
31611     new_out = gen_lowpart (wmode, new_out);
31612   if (old_out)
31613     old_out = gen_lowpart (wmode, old_out);
31614   else
31615     old_out = new_out;
31616   value = simplify_gen_subreg (wmode, value, mode, 0);
31617
31618   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31619
31620   /* Does the operation require destination and first operand to use the same
31621      register?  This is decided by register constraints of relevant insn
31622      patterns in thumb1.md.  */
31623   gcc_assert (!new_out || REG_P (new_out));
31624   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31625                  && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31626                  && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31627   bind_old_new =
31628     (TARGET_THUMB1
31629      && code != SET
31630      && code != MINUS
31631      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31632
31633   /* We want to return the old value while putting the result of the operation
31634      in the same register as the old value so copy the old value over to the
31635      destination register and use that register for the operation.  */
31636   if (old_out && bind_old_new)
31637     {
31638       emit_move_insn (new_out, old_out);
31639       old_out = new_out;
31640     }
31641
31642   switch (code)
31643     {
31644     case SET:
31645       new_out = value;
31646       break;
31647
31648     case NOT:
31649       x = gen_rtx_AND (wmode, old_out, value);
31650       emit_insn (gen_rtx_SET (new_out, x));
31651       x = gen_rtx_NOT (wmode, new_out);
31652       emit_insn (gen_rtx_SET (new_out, x));
31653       break;
31654
31655     case MINUS:
31656       if (CONST_INT_P (value))
31657         {
31658           value = gen_int_mode (-INTVAL (value), wmode);
31659           code = PLUS;
31660         }
31661       /* FALLTHRU */
31662
31663     case PLUS:
31664       if (mode == DImode)
31665         {
31666           /* DImode plus/minus need to clobber flags.  */
31667           /* The adddi3 and subdi3 patterns are incorrectly written so that
31668              they require matching operands, even when we could easily support
31669              three operands.  Thankfully, this can be fixed up post-splitting,
31670              as the individual add+adc patterns do accept three operands and
31671              post-reload cprop can make these moves go away.  */
31672           emit_move_insn (new_out, old_out);
31673           if (code == PLUS)
31674             x = gen_adddi3 (new_out, new_out, value);
31675           else
31676             x = gen_subdi3 (new_out, new_out, value);
31677           emit_insn (x);
31678           break;
31679         }
31680       /* FALLTHRU */
31681
31682     default:
31683       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31684       emit_insn (gen_rtx_SET (new_out, x));
31685       break;
31686     }
31687
31688   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31689                             use_release);
31690
31691   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31692   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31693
31694   /* Checks whether a barrier is needed and emits one accordingly.  */
31695   if (is_armv8_sync
31696       || !(use_acquire || use_release))
31697     arm_post_atomic_barrier (model);
31698 }
31699 \f
31700 /* Return the mode for the MVE vector of predicates corresponding to MODE.  */
31701 opt_machine_mode
31702 arm_mode_to_pred_mode (machine_mode mode)
31703 {
31704   switch (GET_MODE_NUNITS (mode))
31705     {
31706     case 16: return V16BImode;
31707     case 8: return V8BImode;
31708     case 4: return V4BImode;
31709     case 2: return V2QImode;
31710     }
31711   return opt_machine_mode ();
31712 }
31713
31714 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31715    If CAN_INVERT, store either the result or its inverse in TARGET
31716    and return true if TARGET contains the inverse.  If !CAN_INVERT,
31717    always store the result in TARGET, never its inverse.
31718
31719    Note that the handling of floating-point comparisons is not
31720    IEEE compliant.  */
31721
31722 bool
31723 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31724                            bool can_invert)
31725 {
31726   machine_mode cmp_result_mode = GET_MODE (target);
31727   machine_mode cmp_mode = GET_MODE (op0);
31728
31729   bool inverted;
31730
31731   /* MVE supports more comparisons than Neon.  */
31732   if (TARGET_HAVE_MVE)
31733       inverted = false;
31734   else
31735     switch (code)
31736       {
31737         /* For these we need to compute the inverse of the requested
31738            comparison.  */
31739       case UNORDERED:
31740       case UNLT:
31741       case UNLE:
31742       case UNGT:
31743       case UNGE:
31744       case UNEQ:
31745       case NE:
31746         code = reverse_condition_maybe_unordered (code);
31747         if (!can_invert)
31748           {
31749             /* Recursively emit the inverted comparison into a temporary
31750                and then store its inverse in TARGET.  This avoids reusing
31751                TARGET (which for integer NE could be one of the inputs).  */
31752             rtx tmp = gen_reg_rtx (cmp_result_mode);
31753             if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31754               gcc_unreachable ();
31755             emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31756             return false;
31757           }
31758         inverted = true;
31759         break;
31760
31761       default:
31762         inverted = false;
31763         break;
31764       }
31765
31766   switch (code)
31767     {
31768     /* These are natively supported by Neon for zero comparisons, but otherwise
31769        require the operands to be swapped. For MVE, we can only compare
31770        registers.  */
31771     case LE:
31772     case LT:
31773       if (!TARGET_HAVE_MVE)
31774         if (op1 != CONST0_RTX (cmp_mode))
31775           {
31776             code = swap_condition (code);
31777             std::swap (op0, op1);
31778           }
31779       /* Fall through.  */
31780
31781     /* These are natively supported by Neon for both register and zero
31782        operands. MVE supports registers only.  */
31783     case EQ:
31784     case GE:
31785     case GT:
31786     case NE:
31787       if (TARGET_HAVE_MVE)
31788         {
31789           switch (GET_MODE_CLASS (cmp_mode))
31790             {
31791             case MODE_VECTOR_INT:
31792               emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31793                                         op0, force_reg (cmp_mode, op1)));
31794               break;
31795             case MODE_VECTOR_FLOAT:
31796               if (TARGET_HAVE_MVE_FLOAT)
31797                 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31798                                             op0, force_reg (cmp_mode, op1)));
31799               else
31800                 gcc_unreachable ();
31801               break;
31802             default:
31803               gcc_unreachable ();
31804             }
31805         }
31806       else
31807         emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31808       return inverted;
31809
31810     /* These are natively supported for register operands only.
31811        Comparisons with zero aren't useful and should be folded
31812        or canonicalized by target-independent code.  */
31813     case GEU:
31814     case GTU:
31815       if (TARGET_HAVE_MVE)
31816         emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31817                                   op0, force_reg (cmp_mode, op1)));
31818       else
31819         emit_insn (gen_neon_vc (code, cmp_mode, target,
31820                                 op0, force_reg (cmp_mode, op1)));
31821       return inverted;
31822
31823     /* These require the operands to be swapped and likewise do not
31824        support comparisons with zero.  */
31825     case LEU:
31826     case LTU:
31827       if (TARGET_HAVE_MVE)
31828         emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31829                                   force_reg (cmp_mode, op1), op0));
31830       else
31831         emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31832                                 target, force_reg (cmp_mode, op1), op0));
31833       return inverted;
31834
31835     /* These need a combination of two comparisons.  */
31836     case LTGT:
31837     case ORDERED:
31838       {
31839         /* Operands are LTGT iff (a > b || a > b).
31840            Operands are ORDERED iff (a > b || a <= b).  */
31841         rtx gt_res = gen_reg_rtx (cmp_result_mode);
31842         rtx alt_res = gen_reg_rtx (cmp_result_mode);
31843         rtx_code alt_code = (code == LTGT ? LT : LE);
31844         if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31845             || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31846           gcc_unreachable ();
31847         emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31848                                                      gt_res, alt_res)));
31849         return inverted;
31850       }
31851
31852     default:
31853       gcc_unreachable ();
31854     }
31855 }
31856
31857 \f
31858 #define MAX_VECT_LEN 16
31859
31860 struct expand_vec_perm_d
31861 {
31862   rtx target, op0, op1;
31863   vec_perm_indices perm;
31864   machine_mode vmode;
31865   bool one_vector_p;
31866   bool testing_p;
31867 };
31868
31869 /* Generate a variable permutation.  */
31870
31871 static void
31872 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31873 {
31874   machine_mode vmode = GET_MODE (target);
31875   bool one_vector_p = rtx_equal_p (op0, op1);
31876
31877   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31878   gcc_checking_assert (GET_MODE (op0) == vmode);
31879   gcc_checking_assert (GET_MODE (op1) == vmode);
31880   gcc_checking_assert (GET_MODE (sel) == vmode);
31881   gcc_checking_assert (TARGET_NEON);
31882
31883   if (one_vector_p)
31884     {
31885       if (vmode == V8QImode)
31886         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31887       else
31888         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31889     }
31890   else
31891     {
31892       rtx pair;
31893
31894       if (vmode == V8QImode)
31895         {
31896           pair = gen_reg_rtx (V16QImode);
31897           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31898           pair = gen_lowpart (TImode, pair);
31899           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31900         }
31901       else
31902         {
31903           pair = gen_reg_rtx (OImode);
31904           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31905           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31906         }
31907     }
31908 }
31909
31910 void
31911 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31912 {
31913   machine_mode vmode = GET_MODE (target);
31914   unsigned int nelt = GET_MODE_NUNITS (vmode);
31915   bool one_vector_p = rtx_equal_p (op0, op1);
31916   rtx mask;
31917
31918   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31919      numbering of elements for big-endian, we must reverse the order.  */
31920   gcc_checking_assert (!BYTES_BIG_ENDIAN);
31921
31922   /* The VTBL instruction does not use a modulo index, so we must take care
31923      of that ourselves.  */
31924   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31925   mask = gen_const_vec_duplicate (vmode, mask);
31926   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31927
31928   arm_expand_vec_perm_1 (target, op0, op1, sel);
31929 }
31930
31931 /* Map lane ordering between architectural lane order, and GCC lane order,
31932    taking into account ABI.  See comment above output_move_neon for details.  */
31933
31934 static int
31935 neon_endian_lane_map (machine_mode mode, int lane)
31936 {
31937   if (BYTES_BIG_ENDIAN)
31938   {
31939     int nelems = GET_MODE_NUNITS (mode);
31940     /* Reverse lane order.  */
31941     lane = (nelems - 1 - lane);
31942     /* Reverse D register order, to match ABI.  */
31943     if (GET_MODE_SIZE (mode) == 16)
31944       lane = lane ^ (nelems / 2);
31945   }
31946   return lane;
31947 }
31948
31949 /* Some permutations index into pairs of vectors, this is a helper function
31950    to map indexes into those pairs of vectors.  */
31951
31952 static int
31953 neon_pair_endian_lane_map (machine_mode mode, int lane)
31954 {
31955   int nelem = GET_MODE_NUNITS (mode);
31956   if (BYTES_BIG_ENDIAN)
31957     lane =
31958       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31959   return lane;
31960 }
31961
31962 /* Generate or test for an insn that supports a constant permutation.  */
31963
31964 /* Recognize patterns for the VUZP insns.  */
31965
31966 static bool
31967 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31968 {
31969   unsigned int i, odd, mask, nelt = d->perm.length ();
31970   rtx out0, out1, in0, in1;
31971   int first_elem;
31972   int swap_nelt;
31973
31974   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31975     return false;
31976
31977   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31978      big endian pattern on 64 bit vectors, so we correct for that.  */
31979   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31980     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31981
31982   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31983
31984   if (first_elem == neon_endian_lane_map (d->vmode, 0))
31985     odd = 0;
31986   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31987     odd = 1;
31988   else
31989     return false;
31990   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31991
31992   for (i = 0; i < nelt; i++)
31993     {
31994       unsigned elt =
31995         (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31996       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31997         return false;
31998     }
31999
32000   /* Success!  */
32001   if (d->testing_p)
32002     return true;
32003
32004   in0 = d->op0;
32005   in1 = d->op1;
32006   if (swap_nelt != 0)
32007     std::swap (in0, in1);
32008
32009   out0 = d->target;
32010   out1 = gen_reg_rtx (d->vmode);
32011   if (odd)
32012     std::swap (out0, out1);
32013
32014   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
32015   return true;
32016 }
32017
32018 /* Recognize patterns for the VZIP insns.  */
32019
32020 static bool
32021 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
32022 {
32023   unsigned int i, high, mask, nelt = d->perm.length ();
32024   rtx out0, out1, in0, in1;
32025   int first_elem;
32026   bool is_swapped;
32027
32028   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
32029     return false;
32030
32031   is_swapped = BYTES_BIG_ENDIAN;
32032
32033   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
32034
32035   high = nelt / 2;
32036   if (first_elem == neon_endian_lane_map (d->vmode, high))
32037     ;
32038   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
32039     high = 0;
32040   else
32041     return false;
32042   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
32043
32044   for (i = 0; i < nelt / 2; i++)
32045     {
32046       unsigned elt =
32047         neon_pair_endian_lane_map (d->vmode, i + high) & mask;
32048       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
32049           != elt)
32050         return false;
32051       elt =
32052         neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
32053       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
32054           != elt)
32055         return false;
32056     }
32057
32058   /* Success!  */
32059   if (d->testing_p)
32060     return true;
32061
32062   in0 = d->op0;
32063   in1 = d->op1;
32064   if (is_swapped)
32065     std::swap (in0, in1);
32066
32067   out0 = d->target;
32068   out1 = gen_reg_rtx (d->vmode);
32069   if (high)
32070     std::swap (out0, out1);
32071
32072   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
32073   return true;
32074 }
32075
32076 /* Recognize patterns for the VREV insns.  */
32077 static bool
32078 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
32079 {
32080   unsigned int i, j, diff, nelt = d->perm.length ();
32081   rtx (*gen) (machine_mode, rtx, rtx);
32082
32083   if (!d->one_vector_p)
32084     return false;
32085
32086   diff = d->perm[0];
32087   switch (diff)
32088     {
32089     case 7:
32090        switch (d->vmode)
32091         {
32092          case E_V16QImode:
32093          case E_V8QImode:
32094           gen = gen_neon_vrev64;
32095           break;
32096          default:
32097           return false;
32098         }
32099        break;
32100     case 3:
32101        switch (d->vmode)
32102         {
32103         case E_V16QImode:
32104         case E_V8QImode:
32105           gen = gen_neon_vrev32;
32106           break;
32107         case E_V8HImode:
32108         case E_V4HImode:
32109         case E_V8HFmode:
32110         case E_V4HFmode:
32111           gen = gen_neon_vrev64;
32112           break;
32113         default:
32114           return false;
32115         }
32116       break;
32117     case 1:
32118       switch (d->vmode)
32119         {
32120         case E_V16QImode:
32121         case E_V8QImode:
32122           gen = gen_neon_vrev16;
32123           break;
32124         case E_V8HImode:
32125         case E_V4HImode:
32126           gen = gen_neon_vrev32;
32127           break;
32128         case E_V4SImode:
32129         case E_V2SImode:
32130         case E_V4SFmode:
32131         case E_V2SFmode:
32132           gen = gen_neon_vrev64;
32133           break;
32134         default:
32135           return false;
32136         }
32137       break;
32138     default:
32139       return false;
32140     }
32141
32142   for (i = 0; i < nelt ; i += diff + 1)
32143     for (j = 0; j <= diff; j += 1)
32144       {
32145         /* This is guaranteed to be true as the value of diff
32146            is 7, 3, 1 and we should have enough elements in the
32147            queue to generate this. Getting a vector mask with a
32148            value of diff other than these values implies that
32149            something is wrong by the time we get here.  */
32150         gcc_assert (i + j < nelt);
32151         if (d->perm[i + j] != i + diff - j)
32152           return false;
32153       }
32154
32155   /* Success! */
32156   if (d->testing_p)
32157     return true;
32158
32159   emit_insn (gen (d->vmode, d->target, d->op0));
32160   return true;
32161 }
32162
32163 /* Recognize patterns for the VTRN insns.  */
32164
32165 static bool
32166 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
32167 {
32168   unsigned int i, odd, mask, nelt = d->perm.length ();
32169   rtx out0, out1, in0, in1;
32170
32171   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
32172     return false;
32173
32174   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
32175   if (d->perm[0] == 0)
32176     odd = 0;
32177   else if (d->perm[0] == 1)
32178     odd = 1;
32179   else
32180     return false;
32181   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
32182
32183   for (i = 0; i < nelt; i += 2)
32184     {
32185       if (d->perm[i] != i + odd)
32186         return false;
32187       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
32188         return false;
32189     }
32190
32191   /* Success!  */
32192   if (d->testing_p)
32193     return true;
32194
32195   in0 = d->op0;
32196   in1 = d->op1;
32197   if (BYTES_BIG_ENDIAN)
32198     {
32199       std::swap (in0, in1);
32200       odd = !odd;
32201     }
32202
32203   out0 = d->target;
32204   out1 = gen_reg_rtx (d->vmode);
32205   if (odd)
32206     std::swap (out0, out1);
32207
32208   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
32209   return true;
32210 }
32211
32212 /* Recognize patterns for the VEXT insns.  */
32213
32214 static bool
32215 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
32216 {
32217   unsigned int i, nelt = d->perm.length ();
32218   rtx offset;
32219
32220   unsigned int location;
32221
32222   unsigned int next  = d->perm[0] + 1;
32223
32224   /* TODO: Handle GCC's numbering of elements for big-endian.  */
32225   if (BYTES_BIG_ENDIAN)
32226     return false;
32227
32228   /* Check if the extracted indexes are increasing by one.  */
32229   for (i = 1; i < nelt; next++, i++)
32230     {
32231       /* If we hit the most significant element of the 2nd vector in
32232          the previous iteration, no need to test further.  */
32233       if (next == 2 * nelt)
32234         return false;
32235
32236       /* If we are operating on only one vector: it could be a
32237          rotation.  If there are only two elements of size < 64, let
32238          arm_evpc_neon_vrev catch it.  */
32239       if (d->one_vector_p && (next == nelt))
32240         {
32241           if ((nelt == 2) && (d->vmode != V2DImode))
32242             return false;
32243           else
32244             next = 0;
32245         }
32246
32247       if (d->perm[i] != next)
32248         return false;
32249     }
32250
32251   location = d->perm[0];
32252
32253   /* Success! */
32254   if (d->testing_p)
32255     return true;
32256
32257   offset = GEN_INT (location);
32258
32259   if(d->vmode == E_DImode)
32260     return false;
32261
32262   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
32263   return true;
32264 }
32265
32266 /* The NEON VTBL instruction is a fully variable permuation that's even
32267    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
32268    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
32269    can do slightly better by expanding this as a constant where we don't
32270    have to apply a mask.  */
32271
32272 static bool
32273 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
32274 {
32275   rtx rperm[MAX_VECT_LEN], sel;
32276   machine_mode vmode = d->vmode;
32277   unsigned int i, nelt = d->perm.length ();
32278
32279   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
32280      numbering of elements for big-endian, we must reverse the order.  */
32281   if (BYTES_BIG_ENDIAN)
32282     return false;
32283
32284   if (d->testing_p)
32285     return true;
32286
32287   /* Generic code will try constant permutation twice.  Once with the
32288      original mode and again with the elements lowered to QImode.
32289      So wait and don't do the selector expansion ourselves.  */
32290   if (vmode != V8QImode && vmode != V16QImode)
32291     return false;
32292
32293   for (i = 0; i < nelt; ++i)
32294     rperm[i] = GEN_INT (d->perm[i]);
32295   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
32296   sel = force_reg (vmode, sel);
32297
32298   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
32299   return true;
32300 }
32301
32302 static bool
32303 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
32304 {
32305   /* Check if the input mask matches vext before reordering the
32306      operands.  */
32307   if (TARGET_NEON)
32308     if (arm_evpc_neon_vext (d))
32309       return true;
32310
32311   /* The pattern matching functions above are written to look for a small
32312      number to begin the sequence (0, 1, N/2).  If we begin with an index
32313      from the second operand, we can swap the operands.  */
32314   unsigned int nelt = d->perm.length ();
32315   if (d->perm[0] >= nelt)
32316     {
32317       d->perm.rotate_inputs (1);
32318       std::swap (d->op0, d->op1);
32319     }
32320
32321   if (TARGET_NEON)
32322     {
32323       if (arm_evpc_neon_vuzp (d))
32324         return true;
32325       if (arm_evpc_neon_vzip (d))
32326         return true;
32327       if (arm_evpc_neon_vrev (d))
32328         return true;
32329       if (arm_evpc_neon_vtrn (d))
32330         return true;
32331       return arm_evpc_neon_vtbl (d);
32332     }
32333   return false;
32334 }
32335
32336 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
32337
32338 static bool
32339 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
32340                               rtx target, rtx op0, rtx op1,
32341                               const vec_perm_indices &sel)
32342 {
32343   if (vmode != op_mode)
32344     return false;
32345
32346   struct expand_vec_perm_d d;
32347   int i, nelt, which;
32348
32349   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
32350     return false;
32351
32352   d.target = target;
32353   if (op0)
32354     {
32355       rtx nop0 = force_reg (vmode, op0);
32356       if (op0 == op1)
32357         op1 = nop0;
32358       op0 = nop0;
32359     }
32360   if (op1)
32361     op1 = force_reg (vmode, op1);
32362   d.op0 = op0;
32363   d.op1 = op1;
32364
32365   d.vmode = vmode;
32366   gcc_assert (VECTOR_MODE_P (d.vmode));
32367   d.testing_p = !target;
32368
32369   nelt = GET_MODE_NUNITS (d.vmode);
32370   for (i = which = 0; i < nelt; ++i)
32371     {
32372       int ei = sel[i] & (2 * nelt - 1);
32373       which |= (ei < nelt ? 1 : 2);
32374     }
32375
32376   switch (which)
32377     {
32378     default:
32379       gcc_unreachable();
32380
32381     case 3:
32382       d.one_vector_p = false;
32383       if (d.testing_p || !rtx_equal_p (op0, op1))
32384         break;
32385
32386       /* The elements of PERM do not suggest that only the first operand
32387          is used, but both operands are identical.  Allow easier matching
32388          of the permutation by folding the permutation into the single
32389          input vector.  */
32390       /* FALLTHRU */
32391     case 2:
32392       d.op0 = op1;
32393       d.one_vector_p = true;
32394       break;
32395
32396     case 1:
32397       d.op1 = op0;
32398       d.one_vector_p = true;
32399       break;
32400     }
32401
32402   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32403
32404   if (!d.testing_p)
32405     return arm_expand_vec_perm_const_1 (&d);
32406
32407   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32408   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32409   if (!d.one_vector_p)
32410     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32411
32412   start_sequence ();
32413   bool ret = arm_expand_vec_perm_const_1 (&d);
32414   end_sequence ();
32415
32416   return ret;
32417 }
32418
32419 bool
32420 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32421 {
32422   /* If we are soft float and we do not have ldrd
32423      then all auto increment forms are ok.  */
32424   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32425     return true;
32426
32427   switch (code)
32428     {
32429       /* Post increment and Pre Decrement are supported for all
32430          instruction forms except for vector forms.  */
32431     case ARM_POST_INC:
32432     case ARM_PRE_DEC:
32433       if (VECTOR_MODE_P (mode))
32434         {
32435           if (code != ARM_PRE_DEC)
32436             return true;
32437           else
32438             return false;
32439         }
32440
32441       return true;
32442
32443     case ARM_POST_DEC:
32444     case ARM_PRE_INC:
32445       /* Without LDRD and mode size greater than
32446          word size, there is no point in auto-incrementing
32447          because ldm and stm will not have these forms.  */
32448       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32449         return false;
32450
32451       /* Vector and floating point modes do not support
32452          these auto increment forms.  */
32453       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32454         return false;
32455
32456       return true;
32457
32458     default:
32459       return false;
32460
32461     }
32462
32463   return false;
32464 }
32465
32466 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32467    on ARM, since we know that shifts by negative amounts are no-ops.
32468    Additionally, the default expansion code is not available or suitable
32469    for post-reload insn splits (this can occur when the register allocator
32470    chooses not to do a shift in NEON).
32471
32472    This function is used in both initial expand and post-reload splits, and
32473    handles all kinds of 64-bit shifts.
32474
32475    Input requirements:
32476     - It is safe for the input and output to be the same register, but
32477       early-clobber rules apply for the shift amount and scratch registers.
32478     - Shift by register requires both scratch registers.  In all other cases
32479       the scratch registers may be NULL.
32480     - Ashiftrt by a register also clobbers the CC register.  */
32481 void
32482 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32483                                rtx amount, rtx scratch1, rtx scratch2)
32484 {
32485   rtx out_high = gen_highpart (SImode, out);
32486   rtx out_low = gen_lowpart (SImode, out);
32487   rtx in_high = gen_highpart (SImode, in);
32488   rtx in_low = gen_lowpart (SImode, in);
32489
32490   /* Terminology:
32491         in = the register pair containing the input value.
32492         out = the destination register pair.
32493         up = the high- or low-part of each pair.
32494         down = the opposite part to "up".
32495      In a shift, we can consider bits to shift from "up"-stream to
32496      "down"-stream, so in a left-shift "up" is the low-part and "down"
32497      is the high-part of each register pair.  */
32498
32499   rtx out_up   = code == ASHIFT ? out_low : out_high;
32500   rtx out_down = code == ASHIFT ? out_high : out_low;
32501   rtx in_up   = code == ASHIFT ? in_low : in_high;
32502   rtx in_down = code == ASHIFT ? in_high : in_low;
32503
32504   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32505   gcc_assert (out
32506               && (REG_P (out) || SUBREG_P (out))
32507               && GET_MODE (out) == DImode);
32508   gcc_assert (in
32509               && (REG_P (in) || SUBREG_P (in))
32510               && GET_MODE (in) == DImode);
32511   gcc_assert (amount
32512               && (((REG_P (amount) || SUBREG_P (amount))
32513                    && GET_MODE (amount) == SImode)
32514                   || CONST_INT_P (amount)));
32515   gcc_assert (scratch1 == NULL
32516               || (GET_CODE (scratch1) == SCRATCH)
32517               || (GET_MODE (scratch1) == SImode
32518                   && REG_P (scratch1)));
32519   gcc_assert (scratch2 == NULL
32520               || (GET_CODE (scratch2) == SCRATCH)
32521               || (GET_MODE (scratch2) == SImode
32522                   && REG_P (scratch2)));
32523   gcc_assert (!REG_P (out) || !REG_P (amount)
32524               || !HARD_REGISTER_P (out)
32525               || (REGNO (out) != REGNO (amount)
32526                   && REGNO (out) + 1 != REGNO (amount)));
32527
32528   /* Macros to make following code more readable.  */
32529   #define SUB_32(DEST,SRC) \
32530             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32531   #define RSB_32(DEST,SRC) \
32532             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32533   #define SUB_S_32(DEST,SRC) \
32534             gen_addsi3_compare0 ((DEST), (SRC), \
32535                                  GEN_INT (-32))
32536   #define SET(DEST,SRC) \
32537             gen_rtx_SET ((DEST), (SRC))
32538   #define SHIFT(CODE,SRC,AMOUNT) \
32539             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32540   #define LSHIFT(CODE,SRC,AMOUNT) \
32541             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32542                             SImode, (SRC), (AMOUNT))
32543   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32544             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32545                             SImode, (SRC), (AMOUNT))
32546   #define ORR(A,B) \
32547             gen_rtx_IOR (SImode, (A), (B))
32548   #define BRANCH(COND,LABEL) \
32549             gen_arm_cond_branch ((LABEL), \
32550                                  gen_rtx_ ## COND (CCmode, cc_reg, \
32551                                                    const0_rtx), \
32552                                  cc_reg)
32553
32554   /* Shifts by register and shifts by constant are handled separately.  */
32555   if (CONST_INT_P (amount))
32556     {
32557       /* We have a shift-by-constant.  */
32558
32559       /* First, handle out-of-range shift amounts.
32560          In both cases we try to match the result an ARM instruction in a
32561          shift-by-register would give.  This helps reduce execution
32562          differences between optimization levels, but it won't stop other
32563          parts of the compiler doing different things.  This is "undefined
32564          behavior, in any case.  */
32565       if (INTVAL (amount) <= 0)
32566         emit_insn (gen_movdi (out, in));
32567       else if (INTVAL (amount) >= 64)
32568         {
32569           if (code == ASHIFTRT)
32570             {
32571               rtx const31_rtx = GEN_INT (31);
32572               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32573               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32574             }
32575           else
32576             emit_insn (gen_movdi (out, const0_rtx));
32577         }
32578
32579       /* Now handle valid shifts. */
32580       else if (INTVAL (amount) < 32)
32581         {
32582           /* Shifts by a constant less than 32.  */
32583           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32584
32585           /* Clearing the out register in DImode first avoids lots
32586              of spilling and results in less stack usage.
32587              Later this redundant insn is completely removed.
32588              Do that only if "in" and "out" are different registers.  */
32589           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32590             emit_insn (SET (out, const0_rtx));
32591           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32592           emit_insn (SET (out_down,
32593                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
32594                                out_down)));
32595           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32596         }
32597       else
32598         {
32599           /* Shifts by a constant greater than 31.  */
32600           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32601
32602           if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32603             emit_insn (SET (out, const0_rtx));
32604           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32605           if (code == ASHIFTRT)
32606             emit_insn (gen_ashrsi3 (out_up, in_up,
32607                                     GEN_INT (31)));
32608           else
32609             emit_insn (SET (out_up, const0_rtx));
32610         }
32611     }
32612   else
32613     {
32614       /* We have a shift-by-register.  */
32615       rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32616
32617       /* This alternative requires the scratch registers.  */
32618       gcc_assert (scratch1 && REG_P (scratch1));
32619       gcc_assert (scratch2 && REG_P (scratch2));
32620
32621       /* We will need the values "amount-32" and "32-amount" later.
32622          Swapping them around now allows the later code to be more general. */
32623       switch (code)
32624         {
32625         case ASHIFT:
32626           emit_insn (SUB_32 (scratch1, amount));
32627           emit_insn (RSB_32 (scratch2, amount));
32628           break;
32629         case ASHIFTRT:
32630           emit_insn (RSB_32 (scratch1, amount));
32631           /* Also set CC = amount > 32.  */
32632           emit_insn (SUB_S_32 (scratch2, amount));
32633           break;
32634         case LSHIFTRT:
32635           emit_insn (RSB_32 (scratch1, amount));
32636           emit_insn (SUB_32 (scratch2, amount));
32637           break;
32638         default:
32639           gcc_unreachable ();
32640         }
32641
32642       /* Emit code like this:
32643
32644          arithmetic-left:
32645             out_down = in_down << amount;
32646             out_down = (in_up << (amount - 32)) | out_down;
32647             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32648             out_up = in_up << amount;
32649
32650          arithmetic-right:
32651             out_down = in_down >> amount;
32652             out_down = (in_up << (32 - amount)) | out_down;
32653             if (amount < 32)
32654               out_down = ((signed)in_up >> (amount - 32)) | out_down;
32655             out_up = in_up << amount;
32656
32657          logical-right:
32658             out_down = in_down >> amount;
32659             out_down = (in_up << (32 - amount)) | out_down;
32660             if (amount < 32)
32661               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32662             out_up = in_up << amount;
32663
32664           The ARM and Thumb2 variants are the same but implemented slightly
32665           differently.  If this were only called during expand we could just
32666           use the Thumb2 case and let combine do the right thing, but this
32667           can also be called from post-reload splitters.  */
32668
32669       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32670
32671       if (!TARGET_THUMB2)
32672         {
32673           /* Emit code for ARM mode.  */
32674           emit_insn (SET (out_down,
32675                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32676           if (code == ASHIFTRT)
32677             {
32678               rtx_code_label *done_label = gen_label_rtx ();
32679               emit_jump_insn (BRANCH (LT, done_label));
32680               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32681                                              out_down)));
32682               emit_label (done_label);
32683             }
32684           else
32685             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32686                                            out_down)));
32687         }
32688       else
32689         {
32690           /* Emit code for Thumb2 mode.
32691              Thumb2 can't do shift and or in one insn.  */
32692           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32693           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32694
32695           if (code == ASHIFTRT)
32696             {
32697               rtx_code_label *done_label = gen_label_rtx ();
32698               emit_jump_insn (BRANCH (LT, done_label));
32699               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32700               emit_insn (SET (out_down, ORR (out_down, scratch2)));
32701               emit_label (done_label);
32702             }
32703           else
32704             {
32705               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32706               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32707             }
32708         }
32709
32710       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32711     }
32712
32713   #undef SUB_32
32714   #undef RSB_32
32715   #undef SUB_S_32
32716   #undef SET
32717   #undef SHIFT
32718   #undef LSHIFT
32719   #undef REV_LSHIFT
32720   #undef ORR
32721   #undef BRANCH
32722 }
32723
32724 /* Returns true if the pattern is a valid symbolic address, which is either a
32725    symbol_ref or (symbol_ref + addend).
32726
32727    According to the ARM ELF ABI, the initial addend of REL-type relocations
32728    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32729    literal field of the instruction as a 16-bit signed value in the range
32730    -32768 <= A < 32768.
32731
32732    In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32733    unsigned range of 0 <= A < 256 as described in the AAELF32
32734    relocation handling documentation: REL-type relocations are encoded
32735    as unsigned in this case.  */
32736
32737 bool
32738 arm_valid_symbolic_address_p (rtx addr)
32739 {
32740   rtx xop0, xop1 = NULL_RTX;
32741   rtx tmp = addr;
32742
32743   if (target_word_relocations)
32744     return false;
32745
32746   if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32747     return true;
32748
32749   /* (const (plus: symbol_ref const_int))  */
32750   if (GET_CODE (addr) == CONST)
32751     tmp = XEXP (addr, 0);
32752
32753   if (GET_CODE (tmp) == PLUS)
32754     {
32755       xop0 = XEXP (tmp, 0);
32756       xop1 = XEXP (tmp, 1);
32757
32758       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32759         {
32760           if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32761             return IN_RANGE (INTVAL (xop1), 0, 0xff);
32762           else
32763             return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32764         }
32765     }
32766
32767   return false;
32768 }
32769
32770 /* Returns true if a valid comparison operation and makes
32771    the operands in a form that is valid.  */
32772 bool
32773 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32774 {
32775   enum rtx_code code = GET_CODE (*comparison);
32776   int code_int;
32777   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32778     ? GET_MODE (*op2) : GET_MODE (*op1);
32779
32780   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32781
32782   if (code == UNEQ || code == LTGT)
32783     return false;
32784
32785   code_int = (int)code;
32786   arm_canonicalize_comparison (&code_int, op1, op2, 0);
32787   PUT_CODE (*comparison, (enum rtx_code)code_int);
32788
32789   switch (mode)
32790     {
32791     case E_SImode:
32792       if (!arm_add_operand (*op1, mode))
32793         *op1 = force_reg (mode, *op1);
32794       if (!arm_add_operand (*op2, mode))
32795         *op2 = force_reg (mode, *op2);
32796       return true;
32797
32798     case E_DImode:
32799       /* gen_compare_reg() will sort out any invalid operands.  */
32800       return true;
32801
32802     case E_HFmode:
32803       if (!TARGET_VFP_FP16INST)
32804         break;
32805       /* FP16 comparisons are done in SF mode.  */
32806       mode = SFmode;
32807       *op1 = convert_to_mode (mode, *op1, 1);
32808       *op2 = convert_to_mode (mode, *op2, 1);
32809       /* Fall through.  */
32810     case E_SFmode:
32811     case E_DFmode:
32812       if (!vfp_compare_operand (*op1, mode))
32813         *op1 = force_reg (mode, *op1);
32814       if (!vfp_compare_operand (*op2, mode))
32815         *op2 = force_reg (mode, *op2);
32816       return true;
32817     default:
32818       break;
32819     }
32820
32821   return false;
32822
32823 }
32824
32825 /* Maximum number of instructions to set block of memory.  */
32826 static int
32827 arm_block_set_max_insns (void)
32828 {
32829   if (optimize_function_for_size_p (cfun))
32830     return 4;
32831   else
32832     return current_tune->max_insns_inline_memset;
32833 }
32834
32835 /* Return TRUE if it's profitable to set block of memory for
32836    non-vectorized case.  VAL is the value to set the memory
32837    with.  LENGTH is the number of bytes to set.  ALIGN is the
32838    alignment of the destination memory in bytes.  UNALIGNED_P
32839    is TRUE if we can only set the memory with instructions
32840    meeting alignment requirements.  USE_STRD_P is TRUE if we
32841    can use strd to set the memory.  */
32842 static bool
32843 arm_block_set_non_vect_profit_p (rtx val,
32844                                  unsigned HOST_WIDE_INT length,
32845                                  unsigned HOST_WIDE_INT align,
32846                                  bool unaligned_p, bool use_strd_p)
32847 {
32848   int num = 0;
32849   /* For leftovers in bytes of 0-7, we can set the memory block using
32850      strb/strh/str with minimum instruction number.  */
32851   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32852
32853   if (unaligned_p)
32854     {
32855       num = arm_const_inline_cost (SET, val);
32856       num += length / align + length % align;
32857     }
32858   else if (use_strd_p)
32859     {
32860       num = arm_const_double_inline_cost (val);
32861       num += (length >> 3) + leftover[length & 7];
32862     }
32863   else
32864     {
32865       num = arm_const_inline_cost (SET, val);
32866       num += (length >> 2) + leftover[length & 3];
32867     }
32868
32869   /* We may be able to combine last pair STRH/STRB into a single STR
32870      by shifting one byte back.  */
32871   if (unaligned_access && length > 3 && (length & 3) == 3)
32872     num--;
32873
32874   return (num <= arm_block_set_max_insns ());
32875 }
32876
32877 /* Return TRUE if it's profitable to set block of memory for
32878    vectorized case.  LENGTH is the number of bytes to set.
32879    ALIGN is the alignment of destination memory in bytes.
32880    MODE is the vector mode used to set the memory.  */
32881 static bool
32882 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32883                              unsigned HOST_WIDE_INT align,
32884                              machine_mode mode)
32885 {
32886   int num;
32887   bool unaligned_p = ((align & 3) != 0);
32888   unsigned int nelt = GET_MODE_NUNITS (mode);
32889
32890   /* Instruction loading constant value.  */
32891   num = 1;
32892   /* Instructions storing the memory.  */
32893   num += (length + nelt - 1) / nelt;
32894   /* Instructions adjusting the address expression.  Only need to
32895      adjust address expression if it's 4 bytes aligned and bytes
32896      leftover can only be stored by mis-aligned store instruction.  */
32897   if (!unaligned_p && (length & 3) != 0)
32898     num++;
32899
32900   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
32901   if (!unaligned_p && mode == V16QImode)
32902     num--;
32903
32904   return (num <= arm_block_set_max_insns ());
32905 }
32906
32907 /* Set a block of memory using vectorization instructions for the
32908    unaligned case.  We fill the first LENGTH bytes of the memory
32909    area starting from DSTBASE with byte constant VALUE.  ALIGN is
32910    the alignment requirement of memory.  Return TRUE if succeeded.  */
32911 static bool
32912 arm_block_set_unaligned_vect (rtx dstbase,
32913                               unsigned HOST_WIDE_INT length,
32914                               unsigned HOST_WIDE_INT value,
32915                               unsigned HOST_WIDE_INT align)
32916 {
32917   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32918   rtx dst, mem;
32919   rtx val_vec, reg;
32920   rtx (*gen_func) (rtx, rtx);
32921   machine_mode mode;
32922   unsigned HOST_WIDE_INT v = value;
32923   unsigned int offset = 0;
32924   gcc_assert ((align & 0x3) != 0);
32925   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32926   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32927   if (length >= nelt_v16)
32928     {
32929       mode = V16QImode;
32930       gen_func = gen_movmisalignv16qi;
32931     }
32932   else
32933     {
32934       mode = V8QImode;
32935       gen_func = gen_movmisalignv8qi;
32936     }
32937   nelt_mode = GET_MODE_NUNITS (mode);
32938   gcc_assert (length >= nelt_mode);
32939   /* Skip if it isn't profitable.  */
32940   if (!arm_block_set_vect_profit_p (length, align, mode))
32941     return false;
32942
32943   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32944   mem = adjust_automodify_address (dstbase, mode, dst, offset);
32945
32946   v = sext_hwi (v, BITS_PER_WORD);
32947
32948   reg = gen_reg_rtx (mode);
32949   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32950   /* Emit instruction loading the constant value.  */
32951   emit_move_insn (reg, val_vec);
32952
32953   /* Handle nelt_mode bytes in a vector.  */
32954   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32955     {
32956       emit_insn ((*gen_func) (mem, reg));
32957       if (i + 2 * nelt_mode <= length)
32958         {
32959           emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32960           offset += nelt_mode;
32961           mem = adjust_automodify_address (dstbase, mode, dst, offset);
32962         }
32963     }
32964
32965   /* If there are not less than nelt_v8 bytes leftover, we must be in
32966      V16QI mode.  */
32967   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32968
32969   /* Handle (8, 16) bytes leftover.  */
32970   if (i + nelt_v8 < length)
32971     {
32972       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32973       offset += length - i;
32974       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32975
32976       /* We are shifting bytes back, set the alignment accordingly.  */
32977       if ((length & 1) != 0 && align >= 2)
32978         set_mem_align (mem, BITS_PER_UNIT);
32979
32980       emit_insn (gen_movmisalignv16qi (mem, reg));
32981     }
32982   /* Handle (0, 8] bytes leftover.  */
32983   else if (i < length && i + nelt_v8 >= length)
32984     {
32985       if (mode == V16QImode)
32986         reg = gen_lowpart (V8QImode, reg);
32987
32988       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32989                                               + (nelt_mode - nelt_v8))));
32990       offset += (length - i) + (nelt_mode - nelt_v8);
32991       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32992
32993       /* We are shifting bytes back, set the alignment accordingly.  */
32994       if ((length & 1) != 0 && align >= 2)
32995         set_mem_align (mem, BITS_PER_UNIT);
32996
32997       emit_insn (gen_movmisalignv8qi (mem, reg));
32998     }
32999
33000   return true;
33001 }
33002
33003 /* Set a block of memory using vectorization instructions for the
33004    aligned case.  We fill the first LENGTH bytes of the memory area
33005    starting from DSTBASE with byte constant VALUE.  ALIGN is the
33006    alignment requirement of memory.  Return TRUE if succeeded.  */
33007 static bool
33008 arm_block_set_aligned_vect (rtx dstbase,
33009                             unsigned HOST_WIDE_INT length,
33010                             unsigned HOST_WIDE_INT value,
33011                             unsigned HOST_WIDE_INT align)
33012 {
33013   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
33014   rtx dst, addr, mem;
33015   rtx val_vec, reg;
33016   machine_mode mode;
33017   unsigned int offset = 0;
33018
33019   gcc_assert ((align & 0x3) == 0);
33020   nelt_v8 = GET_MODE_NUNITS (V8QImode);
33021   nelt_v16 = GET_MODE_NUNITS (V16QImode);
33022   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
33023     mode = V16QImode;
33024   else
33025     mode = V8QImode;
33026
33027   nelt_mode = GET_MODE_NUNITS (mode);
33028   gcc_assert (length >= nelt_mode);
33029   /* Skip if it isn't profitable.  */
33030   if (!arm_block_set_vect_profit_p (length, align, mode))
33031     return false;
33032
33033   dst = copy_addr_to_reg (XEXP (dstbase, 0));
33034
33035   reg = gen_reg_rtx (mode);
33036   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
33037   /* Emit instruction loading the constant value.  */
33038   emit_move_insn (reg, val_vec);
33039
33040   i = 0;
33041   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
33042   if (mode == V16QImode)
33043     {
33044       mem = adjust_automodify_address (dstbase, mode, dst, offset);
33045       emit_insn (gen_movmisalignv16qi (mem, reg));
33046       i += nelt_mode;
33047       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
33048       if (i + nelt_v8 < length && i + nelt_v16 > length)
33049         {
33050           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
33051           offset += length - nelt_mode;
33052           mem = adjust_automodify_address (dstbase, mode, dst, offset);
33053           /* We are shifting bytes back, set the alignment accordingly.  */
33054           if ((length & 0x3) == 0)
33055             set_mem_align (mem, BITS_PER_UNIT * 4);
33056           else if ((length & 0x1) == 0)
33057             set_mem_align (mem, BITS_PER_UNIT * 2);
33058           else
33059             set_mem_align (mem, BITS_PER_UNIT);
33060
33061           emit_insn (gen_movmisalignv16qi (mem, reg));
33062           return true;
33063         }
33064       /* Fall through for bytes leftover.  */
33065       mode = V8QImode;
33066       nelt_mode = GET_MODE_NUNITS (mode);
33067       reg = gen_lowpart (V8QImode, reg);
33068     }
33069
33070   /* Handle 8 bytes in a vector.  */
33071   for (; (i + nelt_mode <= length); i += nelt_mode)
33072     {
33073       addr = plus_constant (Pmode, dst, i);
33074       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
33075       if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
33076         emit_move_insn (mem, reg);
33077       else
33078         emit_insn (gen_unaligned_storev8qi (mem, reg));
33079     }
33080
33081   /* Handle single word leftover by shifting 4 bytes back.  We can
33082      use aligned access for this case.  */
33083   if (i + UNITS_PER_WORD == length)
33084     {
33085       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
33086       offset += i - UNITS_PER_WORD;
33087       mem = adjust_automodify_address (dstbase, mode, addr, offset);
33088       /* We are shifting 4 bytes back, set the alignment accordingly.  */
33089       if (align > UNITS_PER_WORD)
33090         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
33091
33092       emit_insn (gen_unaligned_storev8qi (mem, reg));
33093     }
33094   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
33095      We have to use unaligned access for this case.  */
33096   else if (i < length)
33097     {
33098       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
33099       offset += length - nelt_mode;
33100       mem = adjust_automodify_address (dstbase, mode, dst, offset);
33101       /* We are shifting bytes back, set the alignment accordingly.  */
33102       if ((length & 1) == 0)
33103         set_mem_align (mem, BITS_PER_UNIT * 2);
33104       else
33105         set_mem_align (mem, BITS_PER_UNIT);
33106
33107       emit_insn (gen_movmisalignv8qi (mem, reg));
33108     }
33109
33110   return true;
33111 }
33112
33113 /* Set a block of memory using plain strh/strb instructions, only
33114    using instructions allowed by ALIGN on processor.  We fill the
33115    first LENGTH bytes of the memory area starting from DSTBASE
33116    with byte constant VALUE.  ALIGN is the alignment requirement
33117    of memory.  */
33118 static bool
33119 arm_block_set_unaligned_non_vect (rtx dstbase,
33120                                   unsigned HOST_WIDE_INT length,
33121                                   unsigned HOST_WIDE_INT value,
33122                                   unsigned HOST_WIDE_INT align)
33123 {
33124   unsigned int i;
33125   rtx dst, addr, mem;
33126   rtx val_exp, val_reg, reg;
33127   machine_mode mode;
33128   HOST_WIDE_INT v = value;
33129
33130   gcc_assert (align == 1 || align == 2);
33131
33132   if (align == 2)
33133     v |= (value << BITS_PER_UNIT);
33134
33135   v = sext_hwi (v, BITS_PER_WORD);
33136   val_exp = GEN_INT (v);
33137   /* Skip if it isn't profitable.  */
33138   if (!arm_block_set_non_vect_profit_p (val_exp, length,
33139                                         align, true, false))
33140     return false;
33141
33142   dst = copy_addr_to_reg (XEXP (dstbase, 0));
33143   mode = (align == 2 ? HImode : QImode);
33144   val_reg = force_reg (SImode, val_exp);
33145   reg = gen_lowpart (mode, val_reg);
33146
33147   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
33148     {
33149       addr = plus_constant (Pmode, dst, i);
33150       mem = adjust_automodify_address (dstbase, mode, addr, i);
33151       emit_move_insn (mem, reg);
33152     }
33153
33154   /* Handle single byte leftover.  */
33155   if (i + 1 == length)
33156     {
33157       reg = gen_lowpart (QImode, val_reg);
33158       addr = plus_constant (Pmode, dst, i);
33159       mem = adjust_automodify_address (dstbase, QImode, addr, i);
33160       emit_move_insn (mem, reg);
33161       i++;
33162     }
33163
33164   gcc_assert (i == length);
33165   return true;
33166 }
33167
33168 /* Set a block of memory using plain strd/str/strh/strb instructions,
33169    to permit unaligned copies on processors which support unaligned
33170    semantics for those instructions.  We fill the first LENGTH bytes
33171    of the memory area starting from DSTBASE with byte constant VALUE.
33172    ALIGN is the alignment requirement of memory.  */
33173 static bool
33174 arm_block_set_aligned_non_vect (rtx dstbase,
33175                                 unsigned HOST_WIDE_INT length,
33176                                 unsigned HOST_WIDE_INT value,
33177                                 unsigned HOST_WIDE_INT align)
33178 {
33179   unsigned int i;
33180   rtx dst, addr, mem;
33181   rtx val_exp, val_reg, reg;
33182   unsigned HOST_WIDE_INT v;
33183   bool use_strd_p;
33184
33185   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
33186                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
33187
33188   v = (value | (value << 8) | (value << 16) | (value << 24));
33189   if (length < UNITS_PER_WORD)
33190     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
33191
33192   if (use_strd_p)
33193     v |= (v << BITS_PER_WORD);
33194   else
33195     v = sext_hwi (v, BITS_PER_WORD);
33196
33197   val_exp = GEN_INT (v);
33198   /* Skip if it isn't profitable.  */
33199   if (!arm_block_set_non_vect_profit_p (val_exp, length,
33200                                         align, false, use_strd_p))
33201     {
33202       if (!use_strd_p)
33203         return false;
33204
33205       /* Try without strd.  */
33206       v = (v >> BITS_PER_WORD);
33207       v = sext_hwi (v, BITS_PER_WORD);
33208       val_exp = GEN_INT (v);
33209       use_strd_p = false;
33210       if (!arm_block_set_non_vect_profit_p (val_exp, length,
33211                                             align, false, use_strd_p))
33212         return false;
33213     }
33214
33215   i = 0;
33216   dst = copy_addr_to_reg (XEXP (dstbase, 0));
33217   /* Handle double words using strd if possible.  */
33218   if (use_strd_p)
33219     {
33220       val_reg = force_reg (DImode, val_exp);
33221       reg = val_reg;
33222       for (; (i + 8 <= length); i += 8)
33223         {
33224           addr = plus_constant (Pmode, dst, i);
33225           mem = adjust_automodify_address (dstbase, DImode, addr, i);
33226           if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
33227             emit_move_insn (mem, reg);
33228           else
33229             emit_insn (gen_unaligned_storedi (mem, reg));
33230         }
33231     }
33232   else
33233     val_reg = force_reg (SImode, val_exp);
33234
33235   /* Handle words.  */
33236   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
33237   for (; (i + 4 <= length); i += 4)
33238     {
33239       addr = plus_constant (Pmode, dst, i);
33240       mem = adjust_automodify_address (dstbase, SImode, addr, i);
33241       if ((align & 3) == 0)
33242         emit_move_insn (mem, reg);
33243       else
33244         emit_insn (gen_unaligned_storesi (mem, reg));
33245     }
33246
33247   /* Merge last pair of STRH and STRB into a STR if possible.  */
33248   if (unaligned_access && i > 0 && (i + 3) == length)
33249     {
33250       addr = plus_constant (Pmode, dst, i - 1);
33251       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
33252       /* We are shifting one byte back, set the alignment accordingly.  */
33253       if ((align & 1) == 0)
33254         set_mem_align (mem, BITS_PER_UNIT);
33255
33256       /* Most likely this is an unaligned access, and we can't tell at
33257          compilation time.  */
33258       emit_insn (gen_unaligned_storesi (mem, reg));
33259       return true;
33260     }
33261
33262   /* Handle half word leftover.  */
33263   if (i + 2 <= length)
33264     {
33265       reg = gen_lowpart (HImode, val_reg);
33266       addr = plus_constant (Pmode, dst, i);
33267       mem = adjust_automodify_address (dstbase, HImode, addr, i);
33268       if ((align & 1) == 0)
33269         emit_move_insn (mem, reg);
33270       else
33271         emit_insn (gen_unaligned_storehi (mem, reg));
33272
33273       i += 2;
33274     }
33275
33276   /* Handle single byte leftover.  */
33277   if (i + 1 == length)
33278     {
33279       reg = gen_lowpart (QImode, val_reg);
33280       addr = plus_constant (Pmode, dst, i);
33281       mem = adjust_automodify_address (dstbase, QImode, addr, i);
33282       emit_move_insn (mem, reg);
33283     }
33284
33285   return true;
33286 }
33287
33288 /* Set a block of memory using vectorization instructions for both
33289    aligned and unaligned cases.  We fill the first LENGTH bytes of
33290    the memory area starting from DSTBASE with byte constant VALUE.
33291    ALIGN is the alignment requirement of memory.  */
33292 static bool
33293 arm_block_set_vect (rtx dstbase,
33294                     unsigned HOST_WIDE_INT length,
33295                     unsigned HOST_WIDE_INT value,
33296                     unsigned HOST_WIDE_INT align)
33297 {
33298   /* Check whether we need to use unaligned store instruction.  */
33299   if (((align & 3) != 0 || (length & 3) != 0)
33300       /* Check whether unaligned store instruction is available.  */
33301       && (!unaligned_access || BYTES_BIG_ENDIAN))
33302     return false;
33303
33304   if ((align & 3) == 0)
33305     return arm_block_set_aligned_vect (dstbase, length, value, align);
33306   else
33307     return arm_block_set_unaligned_vect (dstbase, length, value, align);
33308 }
33309
33310 /* Expand string store operation.  Firstly we try to do that by using
33311    vectorization instructions, then try with ARM unaligned access and
33312    double-word store if profitable.  OPERANDS[0] is the destination,
33313    OPERANDS[1] is the number of bytes, operands[2] is the value to
33314    initialize the memory, OPERANDS[3] is the known alignment of the
33315    destination.  */
33316 bool
33317 arm_gen_setmem (rtx *operands)
33318 {
33319   rtx dstbase = operands[0];
33320   unsigned HOST_WIDE_INT length;
33321   unsigned HOST_WIDE_INT value;
33322   unsigned HOST_WIDE_INT align;
33323
33324   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
33325     return false;
33326
33327   length = UINTVAL (operands[1]);
33328   if (length > 64)
33329     return false;
33330
33331   value = (UINTVAL (operands[2]) & 0xFF);
33332   align = UINTVAL (operands[3]);
33333   if (TARGET_NEON && length >= 8
33334       && current_tune->string_ops_prefer_neon
33335       && arm_block_set_vect (dstbase, length, value, align))
33336     return true;
33337
33338   if (!unaligned_access && (align & 3) != 0)
33339     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
33340
33341   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
33342 }
33343
33344
33345 static bool
33346 arm_macro_fusion_p (void)
33347 {
33348   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
33349 }
33350
33351 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33352    for MOVW / MOVT macro fusion.  */
33353
33354 static bool
33355 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
33356 {
33357   /* We are trying to fuse
33358      movw imm / movt imm
33359     instructions as a group that gets scheduled together.  */
33360
33361   rtx set_dest = SET_DEST (curr_set);
33362
33363   if (GET_MODE (set_dest) != SImode)
33364     return false;
33365
33366   /* We are trying to match:
33367      prev (movw)  == (set (reg r0) (const_int imm16))
33368      curr (movt) == (set (zero_extract (reg r0)
33369                                         (const_int 16)
33370                                         (const_int 16))
33371                           (const_int imm16_1))
33372      or
33373      prev (movw) == (set (reg r1)
33374                           (high (symbol_ref ("SYM"))))
33375     curr (movt) == (set (reg r0)
33376                         (lo_sum (reg r1)
33377                                 (symbol_ref ("SYM"))))  */
33378
33379     if (GET_CODE (set_dest) == ZERO_EXTRACT)
33380       {
33381         if (CONST_INT_P (SET_SRC (curr_set))
33382             && CONST_INT_P (SET_SRC (prev_set))
33383             && REG_P (XEXP (set_dest, 0))
33384             && REG_P (SET_DEST (prev_set))
33385             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
33386           return true;
33387
33388       }
33389     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
33390              && REG_P (SET_DEST (curr_set))
33391              && REG_P (SET_DEST (prev_set))
33392              && GET_CODE (SET_SRC (prev_set)) == HIGH
33393              && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
33394       return true;
33395
33396   return false;
33397 }
33398
33399 static bool
33400 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33401 {
33402   rtx prev_set = single_set (prev);
33403   rtx curr_set = single_set (curr);
33404
33405   if (!prev_set
33406       || !curr_set)
33407     return false;
33408
33409   if (any_condjump_p (curr))
33410     return false;
33411
33412   if (!arm_macro_fusion_p ())
33413     return false;
33414
33415   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33416       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33417     return true;
33418
33419   return false;
33420 }
33421
33422 /* Return true iff the instruction fusion described by OP is enabled.  */
33423 bool
33424 arm_fusion_enabled_p (tune_params::fuse_ops op)
33425 {
33426   return current_tune->fusible_ops & op;
33427 }
33428
33429 /* Return TRUE if return address signing mechanism is enabled.  */
33430 bool
33431 arm_current_function_pac_enabled_p (void)
33432 {
33433   return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
33434           || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
33435               && !crtl->is_leaf));
33436 }
33437
33438 /* Raise an error if the current target arch is not bti compatible.  */
33439 void aarch_bti_arch_check (void)
33440 {
33441   if (!arm_arch8m_main)
33442     error ("This architecture does not support branch protection instructions");
33443 }
33444
33445 /* Return TRUE if Branch Target Identification Mechanism is enabled.  */
33446 bool
33447 aarch_bti_enabled (void)
33448 {
33449   return aarch_enable_bti != 0;
33450 }
33451
33452 /* Check if INSN is a BTI J insn.  */
33453 bool
33454 aarch_bti_j_insn_p (rtx_insn *insn)
33455 {
33456   if (!insn || !INSN_P (insn))
33457     return false;
33458
33459   rtx pat = PATTERN (insn);
33460   return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == VUNSPEC_BTI_NOP;
33461 }
33462
33463 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction.  */
33464 bool
33465 aarch_pac_insn_p (rtx x)
33466 {
33467   if (!x || !INSN_P (x))
33468     return false;
33469
33470   rtx pat = PATTERN (x);
33471
33472   if (GET_CODE (pat) == SET)
33473     {
33474       rtx tmp = XEXP (pat, 1);
33475       if (tmp
33476           && ((GET_CODE (tmp) == UNSPEC
33477                && XINT (tmp, 1) == UNSPEC_PAC_NOP)
33478               || (GET_CODE (tmp) == UNSPEC_VOLATILE
33479                   && XINT (tmp, 1) == VUNSPEC_PACBTI_NOP)))
33480         return true;
33481     }
33482
33483   return false;
33484 }
33485
33486  /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33487     For Arm, both of these map to a simple BTI instruction.  */
33488
33489 rtx
33490 aarch_gen_bti_c (void)
33491 {
33492   return gen_bti_nop ();
33493 }
33494
33495 rtx
33496 aarch_gen_bti_j (void)
33497 {
33498   return gen_bti_nop ();
33499 }
33500
33501 /* For AArch32, we always return false because indirect_return attribute
33502    is only supported on AArch64 targets.  */
33503
33504 bool
33505 aarch_fun_is_indirect_return (rtx_insn *)
33506 {
33507   return false;
33508 }
33509
33510 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
33511    scheduled for speculative execution.  Reject the long-running division
33512    and square-root instructions.  */
33513
33514 static bool
33515 arm_sched_can_speculate_insn (rtx_insn *insn)
33516 {
33517   switch (get_attr_type (insn))
33518     {
33519       case TYPE_SDIV:
33520       case TYPE_UDIV:
33521       case TYPE_FDIVS:
33522       case TYPE_FDIVD:
33523       case TYPE_FSQRTS:
33524       case TYPE_FSQRTD:
33525       case TYPE_NEON_FP_SQRT_S:
33526       case TYPE_NEON_FP_SQRT_D:
33527       case TYPE_NEON_FP_SQRT_S_Q:
33528       case TYPE_NEON_FP_SQRT_D_Q:
33529       case TYPE_NEON_FP_DIV_S:
33530       case TYPE_NEON_FP_DIV_D:
33531       case TYPE_NEON_FP_DIV_S_Q:
33532       case TYPE_NEON_FP_DIV_D_Q:
33533         return false;
33534       default:
33535         return true;
33536     }
33537 }
33538
33539 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
33540
33541 static unsigned HOST_WIDE_INT
33542 arm_asan_shadow_offset (void)
33543 {
33544   return HOST_WIDE_INT_1U << 29;
33545 }
33546
33547
33548 /* This is a temporary fix for PR60655.  Ideally we need
33549    to handle most of these cases in the generic part but
33550    currently we reject minus (..) (sym_ref).  We try to
33551    ameliorate the case with minus (sym_ref1) (sym_ref2)
33552    where they are in the same section.  */
33553
33554 static bool
33555 arm_const_not_ok_for_debug_p (rtx p)
33556 {
33557   tree decl_op0 = NULL;
33558   tree decl_op1 = NULL;
33559
33560   if (GET_CODE (p) == UNSPEC)
33561     return true;
33562   if (GET_CODE (p) == MINUS)
33563     {
33564       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33565         {
33566           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33567           if (decl_op1
33568               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33569               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33570             {
33571               if ((VAR_P (decl_op1)
33572                    || TREE_CODE (decl_op1) == CONST_DECL)
33573                   && (VAR_P (decl_op0)
33574                       || TREE_CODE (decl_op0) == CONST_DECL))
33575                 return (get_variable_section (decl_op1, false)
33576                         != get_variable_section (decl_op0, false));
33577
33578               if (TREE_CODE (decl_op1) == LABEL_DECL
33579                   && TREE_CODE (decl_op0) == LABEL_DECL)
33580                 return (DECL_CONTEXT (decl_op1)
33581                         != DECL_CONTEXT (decl_op0));
33582             }
33583
33584           return true;
33585         }
33586     }
33587
33588   return false;
33589 }
33590
33591 /* return TRUE if x is a reference to a value in a constant pool */
33592 extern bool
33593 arm_is_constant_pool_ref (rtx x)
33594 {
33595   return (MEM_P (x)
33596           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33597           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33598 }
33599
33600 /* Remember the last target of arm_set_current_function.  */
33601 static GTY(()) tree arm_previous_fndecl;
33602
33603 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
33604
33605 void
33606 save_restore_target_globals (tree new_tree)
33607 {
33608   /* If we have a previous state, use it.  */
33609   if (TREE_TARGET_GLOBALS (new_tree))
33610     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33611   else if (new_tree == target_option_default_node)
33612     restore_target_globals (&default_target_globals);
33613   else
33614     {
33615       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
33616       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33617     }
33618
33619   arm_option_params_internal ();
33620 }
33621
33622 /* Invalidate arm_previous_fndecl.  */
33623
33624 void
33625 arm_reset_previous_fndecl (void)
33626 {
33627   arm_previous_fndecl = NULL_TREE;
33628 }
33629
33630 /* Establish appropriate back-end context for processing the function
33631    FNDECL.  The argument might be NULL to indicate processing at top
33632    level, outside of any function scope.  */
33633
33634 static void
33635 arm_set_current_function (tree fndecl)
33636 {
33637   if (!fndecl || fndecl == arm_previous_fndecl)
33638     return;
33639
33640   tree old_tree = (arm_previous_fndecl
33641                    ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33642                    : NULL_TREE);
33643
33644   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33645
33646   /* If current function has no attributes but previous one did,
33647      use the default node.  */
33648   if (! new_tree && old_tree)
33649     new_tree = target_option_default_node;
33650
33651   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
33652      the default have been handled by save_restore_target_globals from
33653      arm_pragma_target_parse.  */
33654   if (old_tree == new_tree)
33655     return;
33656
33657   arm_previous_fndecl = fndecl;
33658
33659   /* First set the target options.  */
33660   cl_target_option_restore (&global_options, &global_options_set,
33661                             TREE_TARGET_OPTION (new_tree));
33662
33663   save_restore_target_globals (new_tree);
33664
33665   arm_override_options_after_change_1 (&global_options, &global_options_set);
33666 }
33667
33668 /* Implement TARGET_OPTION_PRINT.  */
33669
33670 static void
33671 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33672 {
33673   int flags = ptr->x_target_flags;
33674   const char *fpu_name;
33675
33676   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33677               ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33678
33679   fprintf (file, "%*sselected isa %s\n", indent, "",
33680            TARGET_THUMB2_P (flags) ? "thumb2" :
33681            TARGET_THUMB_P (flags) ? "thumb1" :
33682            "arm");
33683
33684   if (ptr->x_arm_arch_string)
33685     fprintf (file, "%*sselected architecture %s\n", indent, "",
33686              ptr->x_arm_arch_string);
33687
33688   if (ptr->x_arm_cpu_string)
33689     fprintf (file, "%*sselected CPU %s\n", indent, "",
33690              ptr->x_arm_cpu_string);
33691
33692   if (ptr->x_arm_tune_string)
33693     fprintf (file, "%*sselected tune %s\n", indent, "",
33694              ptr->x_arm_tune_string);
33695
33696   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33697 }
33698
33699 /* Hook to determine if one function can safely inline another.  */
33700
33701 static bool
33702 arm_can_inline_p (tree caller, tree callee)
33703 {
33704   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33705   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33706   bool can_inline = true;
33707
33708   struct cl_target_option *caller_opts
33709         = TREE_TARGET_OPTION (caller_tree ? caller_tree
33710                                            : target_option_default_node);
33711
33712   struct cl_target_option *callee_opts
33713         = TREE_TARGET_OPTION (callee_tree ? callee_tree
33714                                            : target_option_default_node);
33715
33716   if (callee_opts == caller_opts)
33717     return true;
33718
33719   /* Callee's ISA features should be a subset of the caller's.  */
33720   struct arm_build_target caller_target;
33721   struct arm_build_target callee_target;
33722   caller_target.isa = sbitmap_alloc (isa_num_bits);
33723   callee_target.isa = sbitmap_alloc (isa_num_bits);
33724
33725   arm_configure_build_target (&caller_target, caller_opts, false);
33726   arm_configure_build_target (&callee_target, callee_opts, false);
33727   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33728     can_inline = false;
33729
33730   sbitmap_free (caller_target.isa);
33731   sbitmap_free (callee_target.isa);
33732
33733   /* OK to inline between different modes.
33734      Function with mode specific instructions, e.g using asm,
33735      must be explicitly protected with noinline.  */
33736   return can_inline;
33737 }
33738
33739 /* Hook to fix function's alignment affected by target attribute.  */
33740
33741 static void
33742 arm_relayout_function (tree fndecl)
33743 {
33744   if (DECL_USER_ALIGN (fndecl))
33745     return;
33746
33747   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33748
33749   if (!callee_tree)
33750     callee_tree = target_option_default_node;
33751
33752   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33753   SET_DECL_ALIGN
33754     (fndecl,
33755      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33756 }
33757
33758 /* Inner function to process the attribute((target(...))), take an argument and
33759    set the current options from the argument.  If we have a list, recursively
33760    go over the list.  */
33761
33762 static bool
33763 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33764 {
33765   if (TREE_CODE (args) == TREE_LIST)
33766     {
33767       bool ret = true;
33768
33769       for (; args; args = TREE_CHAIN (args))
33770         if (TREE_VALUE (args)
33771             && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33772           ret = false;
33773       return ret;
33774     }
33775
33776   else if (TREE_CODE (args) != STRING_CST)
33777     {
33778       error ("attribute %<target%> argument not a string");
33779       return false;
33780     }
33781
33782   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33783   char *q;
33784
33785   while ((q = strtok (argstr, ",")) != NULL)
33786     {
33787       argstr = NULL;
33788       if (!strcmp (q, "thumb"))
33789         {
33790           opts->x_target_flags |= MASK_THUMB;
33791           if (TARGET_FDPIC && !arm_arch_thumb2)
33792             sorry ("FDPIC mode is not supported in Thumb-1 mode");
33793         }
33794
33795       else if (!strcmp (q, "arm"))
33796         opts->x_target_flags &= ~MASK_THUMB;
33797
33798       else if (!strcmp (q, "general-regs-only"))
33799         opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33800
33801       else if (startswith (q, "fpu="))
33802         {
33803           int fpu_index;
33804           if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33805                                        &fpu_index, CL_TARGET))
33806             {
33807               error ("invalid fpu for target attribute or pragma %qs", q);
33808               return false;
33809             }
33810           if (fpu_index == TARGET_FPU_auto)
33811             {
33812               /* This doesn't really make sense until we support
33813                  general dynamic selection of the architecture and all
33814                  sub-features.  */
33815               sorry ("auto fpu selection not currently permitted here");
33816               return false;
33817             }
33818           opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33819         }
33820       else if (startswith (q, "arch="))
33821         {
33822           char *arch = q + 5;
33823           const arch_option *arm_selected_arch
33824              = arm_parse_arch_option_name (all_architectures, "arch", arch);
33825
33826           if (!arm_selected_arch)
33827             {
33828               error ("invalid architecture for target attribute or pragma %qs",
33829                      q);
33830               return false;
33831             }
33832
33833           opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33834         }
33835       else if (q[0] == '+')
33836         {
33837           opts->x_arm_arch_string
33838             = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33839         }
33840       else
33841         {
33842           error ("unknown target attribute or pragma %qs", q);
33843           return false;
33844         }
33845     }
33846
33847   return true;
33848 }
33849
33850 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
33851
33852 tree
33853 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33854                                  struct gcc_options *opts_set)
33855 {
33856   struct cl_target_option cl_opts;
33857
33858   if (!arm_valid_target_attribute_rec (args, opts))
33859     return NULL_TREE;
33860
33861   cl_target_option_save (&cl_opts, opts, opts_set);
33862   arm_configure_build_target (&arm_active_target, &cl_opts, false);
33863   arm_option_check_internal (opts);
33864   /* Do any overrides, such as global options arch=xxx.
33865      We do this since arm_active_target was overridden.  */
33866   arm_option_reconfigure_globals ();
33867   arm_options_perform_arch_sanity_checks ();
33868   arm_option_override_internal (opts, opts_set);
33869
33870   return build_target_option_node (opts, opts_set);
33871 }
33872
33873 static void
33874 add_attribute (const char * mode, tree *attributes)
33875 {
33876   size_t len = strlen (mode);
33877   tree value = build_string (len, mode);
33878
33879   TREE_TYPE (value) = build_array_type (char_type_node,
33880                                         build_index_type (size_int (len)));
33881
33882   *attributes = tree_cons (get_identifier ("target"),
33883                            build_tree_list (NULL_TREE, value),
33884                            *attributes);
33885 }
33886
33887 /* For testing. Insert thumb or arm modes alternatively on functions.  */
33888
33889 static void
33890 arm_insert_attributes (tree fndecl, tree * attributes)
33891 {
33892   const char *mode;
33893
33894   if (! TARGET_FLIP_THUMB)
33895     return;
33896
33897   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33898       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33899    return;
33900
33901   /* Nested definitions must inherit mode.  */
33902   if (current_function_decl)
33903    {
33904      mode = TARGET_THUMB ? "thumb" : "arm";
33905      add_attribute (mode, attributes);
33906      return;
33907    }
33908
33909   /* If there is already a setting don't change it.  */
33910   if (lookup_attribute ("target", *attributes) != NULL)
33911     return;
33912
33913   mode = thumb_flipper ? "thumb" : "arm";
33914   add_attribute (mode, attributes);
33915
33916   thumb_flipper = !thumb_flipper;
33917 }
33918
33919 /* Hook to validate attribute((target("string"))).  */
33920
33921 static bool
33922 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33923                               tree args, int ARG_UNUSED (flags))
33924 {
33925   bool ret = true;
33926   struct gcc_options func_options, func_options_set;
33927   tree cur_tree, new_optimize;
33928   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33929
33930   /* Get the optimization options of the current function.  */
33931   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33932
33933   /* If the function changed the optimization levels as well as setting target
33934      options, start with the optimizations specified.  */
33935   if (!func_optimize)
33936     func_optimize = optimization_default_node;
33937
33938   /* Init func_options.  */
33939   memset (&func_options, 0, sizeof (func_options));
33940   init_options_struct (&func_options, NULL);
33941   lang_hooks.init_options_struct (&func_options);
33942   memset (&func_options_set, 0, sizeof (func_options_set));
33943
33944   /* Initialize func_options to the defaults.  */
33945   cl_optimization_restore (&func_options, &func_options_set,
33946                            TREE_OPTIMIZATION (func_optimize));
33947
33948   cl_target_option_restore (&func_options, &func_options_set,
33949                             TREE_TARGET_OPTION (target_option_default_node));
33950
33951   /* Set func_options flags with new target mode.  */
33952   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33953                                               &func_options_set);
33954
33955   if (cur_tree == NULL_TREE)
33956     ret = false;
33957
33958   new_optimize = build_optimization_node (&func_options, &func_options_set);
33959
33960   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33961
33962   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33963
33964   return ret;
33965 }
33966
33967 /* Match an ISA feature bitmap to a named FPU.  We always use the
33968    first entry that exactly matches the feature set, so that we
33969    effectively canonicalize the FPU name for the assembler.  */
33970 static const char*
33971 arm_identify_fpu_from_isa (sbitmap isa)
33972 {
33973   auto_sbitmap fpubits (isa_num_bits);
33974   auto_sbitmap cand_fpubits (isa_num_bits);
33975
33976   bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33977
33978   /* If there are no ISA feature bits relating to the FPU, we must be
33979      doing soft-float.  */
33980   if (bitmap_empty_p (fpubits))
33981     return "softvfp";
33982
33983   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33984     {
33985       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33986       if (bitmap_equal_p (fpubits, cand_fpubits))
33987         return all_fpus[i].name;
33988     }
33989   /* We must find an entry, or things have gone wrong.  */
33990   gcc_unreachable ();
33991 }
33992
33993 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
33994    by the function fndecl.  */
33995 void
33996 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33997 {
33998   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33999
34000   struct cl_target_option *targ_options;
34001   if (target_parts)
34002     targ_options = TREE_TARGET_OPTION (target_parts);
34003   else
34004     targ_options = TREE_TARGET_OPTION (target_option_current_node);
34005   gcc_assert (targ_options);
34006
34007   arm_print_asm_arch_directives (stream, targ_options);
34008
34009   fprintf (stream, "\t.syntax unified\n");
34010
34011   if (TARGET_THUMB)
34012     {
34013       if (is_called_in_ARM_mode (decl)
34014           || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
34015               && cfun->is_thunk))
34016         fprintf (stream, "\t.code 32\n");
34017       else if (TARGET_THUMB1)
34018         fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
34019       else
34020         fprintf (stream, "\t.thumb\n\t.thumb_func\n");
34021     }
34022   else
34023     fprintf (stream, "\t.arm\n");
34024
34025   if (TARGET_POKE_FUNCTION_NAME)
34026     arm_poke_function_name (stream, (const char *) name);
34027 }
34028
34029 /* If MEM is in the form of [base+offset], extract the two parts
34030    of address and set to BASE and OFFSET, otherwise return false
34031    after clearing BASE and OFFSET.  */
34032
34033 static bool
34034 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
34035 {
34036   rtx addr;
34037
34038   gcc_assert (MEM_P (mem));
34039
34040   addr = XEXP (mem, 0);
34041
34042   /* Strip off const from addresses like (const (addr)).  */
34043   if (GET_CODE (addr) == CONST)
34044     addr = XEXP (addr, 0);
34045
34046   if (REG_P (addr))
34047     {
34048       *base = addr;
34049       *offset = const0_rtx;
34050       return true;
34051     }
34052
34053   if (GET_CODE (addr) == PLUS
34054       && GET_CODE (XEXP (addr, 0)) == REG
34055       && CONST_INT_P (XEXP (addr, 1)))
34056     {
34057       *base = XEXP (addr, 0);
34058       *offset = XEXP (addr, 1);
34059       return true;
34060     }
34061
34062   *base = NULL_RTX;
34063   *offset = NULL_RTX;
34064
34065   return false;
34066 }
34067
34068 /* If INSN is a load or store of address in the form of [base+offset],
34069    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
34070    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
34071    otherwise return FALSE.  */
34072
34073 static bool
34074 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
34075 {
34076   rtx x, dest, src;
34077
34078   gcc_assert (INSN_P (insn));
34079   x = PATTERN (insn);
34080   if (GET_CODE (x) != SET)
34081     return false;
34082
34083   src = SET_SRC (x);
34084   dest = SET_DEST (x);
34085   if (REG_P (src) && MEM_P (dest))
34086     {
34087       *is_load = false;
34088       extract_base_offset_in_addr (dest, base, offset);
34089     }
34090   else if (MEM_P (src) && REG_P (dest))
34091     {
34092       *is_load = true;
34093       extract_base_offset_in_addr (src, base, offset);
34094     }
34095   else
34096     return false;
34097
34098   return (*base != NULL_RTX && *offset != NULL_RTX);
34099 }
34100
34101 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
34102
34103    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
34104    and PRI are only calculated for these instructions.  For other instruction,
34105    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
34106    instruction fusion can be supported by returning different priorities.
34107
34108    It's important that irrelevant instructions get the largest FUSION_PRI.  */
34109
34110 static void
34111 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
34112                            int *fusion_pri, int *pri)
34113 {
34114   int tmp, off_val;
34115   bool is_load;
34116   rtx base, offset;
34117
34118   gcc_assert (INSN_P (insn));
34119
34120   tmp = max_pri - 1;
34121   if (!fusion_load_store (insn, &base, &offset, &is_load))
34122     {
34123       *pri = tmp;
34124       *fusion_pri = tmp;
34125       return;
34126     }
34127
34128   /* Load goes first.  */
34129   if (is_load)
34130     *fusion_pri = tmp - 1;
34131   else
34132     *fusion_pri = tmp - 2;
34133
34134   tmp /= 2;
34135
34136   /* INSN with smaller base register goes first.  */
34137   tmp -= ((REGNO (base) & 0xff) << 20);
34138
34139   /* INSN with smaller offset goes first.  */
34140   off_val = (int)(INTVAL (offset));
34141   if (off_val >= 0)
34142     tmp -= (off_val & 0xfffff);
34143   else
34144     tmp += ((- off_val) & 0xfffff);
34145
34146   *pri = tmp;
34147   return;
34148 }
34149
34150
34151 /* Construct and return a PARALLEL RTX vector with elements numbering the
34152    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
34153    the vector - from the perspective of the architecture.  This does not
34154    line up with GCC's perspective on lane numbers, so we end up with
34155    different masks depending on our target endian-ness.  The diagram
34156    below may help.  We must draw the distinction when building masks
34157    which select one half of the vector.  An instruction selecting
34158    architectural low-lanes for a big-endian target, must be described using
34159    a mask selecting GCC high-lanes.
34160
34161                  Big-Endian             Little-Endian
34162
34163 GCC             0   1   2   3           3   2   1   0
34164               | x | x | x | x |       | x | x | x | x |
34165 Architecture    3   2   1   0           3   2   1   0
34166
34167 Low Mask:         { 2, 3 }                { 0, 1 }
34168 High Mask:        { 0, 1 }                { 2, 3 }
34169 */
34170
34171 rtx
34172 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
34173 {
34174   int nunits = GET_MODE_NUNITS (mode);
34175   rtvec v = rtvec_alloc (nunits / 2);
34176   int high_base = nunits / 2;
34177   int low_base = 0;
34178   int base;
34179   rtx t1;
34180   int i;
34181
34182   if (BYTES_BIG_ENDIAN)
34183     base = high ? low_base : high_base;
34184   else
34185     base = high ? high_base : low_base;
34186
34187   for (i = 0; i < nunits / 2; i++)
34188     RTVEC_ELT (v, i) = GEN_INT (base + i);
34189
34190   t1 = gen_rtx_PARALLEL (mode, v);
34191   return t1;
34192 }
34193
34194 /* Check OP for validity as a PARALLEL RTX vector with elements
34195    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
34196    from the perspective of the architecture.  See the diagram above
34197    arm_simd_vect_par_cnst_half_p for more details.  */
34198
34199 bool
34200 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
34201                                        bool high)
34202 {
34203   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
34204   HOST_WIDE_INT count_op = XVECLEN (op, 0);
34205   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
34206   int i = 0;
34207
34208   if (!VECTOR_MODE_P (mode))
34209     return false;
34210
34211   if (count_op != count_ideal)
34212     return false;
34213
34214   for (i = 0; i < count_ideal; i++)
34215     {
34216       rtx elt_op = XVECEXP (op, 0, i);
34217       rtx elt_ideal = XVECEXP (ideal, 0, i);
34218
34219       if (!CONST_INT_P (elt_op)
34220           || INTVAL (elt_ideal) != INTVAL (elt_op))
34221         return false;
34222     }
34223   return true;
34224 }
34225
34226 /* Can output mi_thunk for all cases except for non-zero vcall_offset
34227    in Thumb1.  */
34228 static bool
34229 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
34230                          const_tree)
34231 {
34232   /* For now, we punt and not handle this for TARGET_THUMB1.  */
34233   if (vcall_offset && TARGET_THUMB1)
34234     return false;
34235
34236   /* Otherwise ok.  */
34237   return true;
34238 }
34239
34240 /* Generate RTL for a conditional branch with rtx comparison CODE in
34241    mode CC_MODE. The destination of the unlikely conditional branch
34242    is LABEL_REF.  */
34243
34244 void
34245 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
34246                           rtx label_ref)
34247 {
34248   rtx x;
34249   x = gen_rtx_fmt_ee (code, VOIDmode,
34250                       gen_rtx_REG (cc_mode, CC_REGNUM),
34251                       const0_rtx);
34252
34253   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
34254                             gen_rtx_LABEL_REF (VOIDmode, label_ref),
34255                             pc_rtx);
34256   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
34257 }
34258
34259 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34260
34261    For pure-code sections there is no letter code for this attribute, so
34262    output all the section flags numerically when this is needed.  */
34263
34264 static bool
34265 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
34266 {
34267
34268   if (flags & SECTION_ARM_PURECODE)
34269     {
34270       *num = 0x20000000;
34271
34272       if (!(flags & SECTION_DEBUG))
34273         *num |= 0x2;
34274       if (flags & SECTION_EXCLUDE)
34275         *num |= 0x80000000;
34276       if (flags & SECTION_WRITE)
34277         *num |= 0x1;
34278       if (flags & SECTION_CODE)
34279         *num |= 0x4;
34280       if (flags & SECTION_MERGE)
34281         *num |= 0x10;
34282       if (flags & SECTION_STRINGS)
34283         *num |= 0x20;
34284       if (flags & SECTION_TLS)
34285         *num |= 0x400;
34286       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
34287         *num |= 0x200;
34288
34289         return true;
34290     }
34291
34292   return false;
34293 }
34294
34295 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34296
34297    If pure-code is passed as an option, make sure all functions are in
34298    sections that have the SHF_ARM_PURECODE attribute.  */
34299
34300 static section *
34301 arm_function_section (tree decl, enum node_frequency freq,
34302                       bool startup, bool exit)
34303 {
34304   const char * section_name;
34305   section * sec;
34306
34307   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
34308     return default_function_section (decl, freq, startup, exit);
34309
34310   if (!target_pure_code)
34311     return default_function_section (decl, freq, startup, exit);
34312
34313
34314   section_name = DECL_SECTION_NAME (decl);
34315
34316   /* If a function is not in a named section then it falls under the 'default'
34317      text section, also known as '.text'.  We can preserve previous behavior as
34318      the default text section already has the SHF_ARM_PURECODE section
34319      attribute.  */
34320   if (!section_name)
34321     {
34322       section *default_sec = default_function_section (decl, freq, startup,
34323                                                        exit);
34324
34325       /* If default_sec is not null, then it must be a special section like for
34326          example .text.startup.  We set the pure-code attribute and return the
34327          same section to preserve existing behavior.  */
34328       if (default_sec)
34329           default_sec->common.flags |= SECTION_ARM_PURECODE;
34330       return default_sec;
34331     }
34332
34333   /* Otherwise look whether a section has already been created with
34334      'section_name'.  */
34335   sec = get_named_section (decl, section_name, 0);
34336   if (!sec)
34337     /* If that is not the case passing NULL as the section's name to
34338        'get_named_section' will create a section with the declaration's
34339        section name.  */
34340     sec = get_named_section (decl, NULL, 0);
34341
34342   /* Set the SHF_ARM_PURECODE attribute.  */
34343   sec->common.flags |= SECTION_ARM_PURECODE;
34344
34345   return sec;
34346 }
34347
34348 /* Implements the TARGET_SECTION_FLAGS hook.
34349
34350    If DECL is a function declaration and pure-code is passed as an option
34351    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
34352    section's name and RELOC indicates whether the declarations initializer may
34353    contain runtime relocations.  */
34354
34355 static unsigned int
34356 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
34357 {
34358   unsigned int flags = default_section_type_flags (decl, name, reloc);
34359
34360   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
34361     flags |= SECTION_ARM_PURECODE;
34362
34363   return flags;
34364 }
34365
34366 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
34367
34368 static void
34369 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
34370                            rtx op0, rtx op1,
34371                            rtx *quot_p, rtx *rem_p)
34372 {
34373   if (mode == SImode)
34374     gcc_assert (!TARGET_IDIV);
34375
34376   scalar_int_mode libval_mode
34377     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode)).require ();
34378
34379   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
34380                                         libval_mode, op0, mode, op1, mode);
34381
34382   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
34383   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
34384                                        GET_MODE_SIZE (mode));
34385
34386   gcc_assert (quotient);
34387   gcc_assert (remainder);
34388
34389   *quot_p = quotient;
34390   *rem_p = remainder;
34391 }
34392
34393 /*  This function checks for the availability of the coprocessor builtin passed
34394     in BUILTIN for the current target.  Returns true if it is available and
34395     false otherwise.  If a BUILTIN is passed for which this function has not
34396     been implemented it will cause an exception.  */
34397
34398 bool
34399 arm_coproc_builtin_available (enum unspecv builtin)
34400 {
34401   /* None of these builtins are available in Thumb mode if the target only
34402      supports Thumb-1.  */
34403   if (TARGET_THUMB1)
34404     return false;
34405
34406   switch (builtin)
34407     {
34408       case VUNSPEC_CDP:
34409       case VUNSPEC_LDC:
34410       case VUNSPEC_LDCL:
34411       case VUNSPEC_STC:
34412       case VUNSPEC_STCL:
34413       case VUNSPEC_MCR:
34414       case VUNSPEC_MRC:
34415         if (arm_arch4)
34416           return true;
34417         break;
34418       case VUNSPEC_CDP2:
34419       case VUNSPEC_LDC2:
34420       case VUNSPEC_LDC2L:
34421       case VUNSPEC_STC2:
34422       case VUNSPEC_STC2L:
34423       case VUNSPEC_MCR2:
34424       case VUNSPEC_MRC2:
34425         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34426            ARMv8-{A,M}.  */
34427         if (arm_arch5t)
34428           return true;
34429         break;
34430       case VUNSPEC_MCRR:
34431       case VUNSPEC_MRRC:
34432         /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34433            ARMv8-{A,M}.  */
34434         if (arm_arch6 || arm_arch5te)
34435           return true;
34436         break;
34437       case VUNSPEC_MCRR2:
34438       case VUNSPEC_MRRC2:
34439         if (arm_arch6)
34440           return true;
34441         break;
34442       default:
34443         gcc_unreachable ();
34444     }
34445   return false;
34446 }
34447
34448 /* This function returns true if OP is a valid memory operand for the ldc and
34449    stc coprocessor instructions and false otherwise.  */
34450
34451 bool
34452 arm_coproc_ldc_stc_legitimate_address (rtx op)
34453 {
34454   HOST_WIDE_INT range;
34455   /* Has to be a memory operand.  */
34456   if (!MEM_P (op))
34457     return false;
34458
34459   op = XEXP (op, 0);
34460
34461   /* We accept registers.  */
34462   if (REG_P (op))
34463     return true;
34464
34465   switch GET_CODE (op)
34466     {
34467       case PLUS:
34468         {
34469           /* Or registers with an offset.  */
34470           if (!REG_P (XEXP (op, 0)))
34471             return false;
34472
34473           op = XEXP (op, 1);
34474
34475           /* The offset must be an immediate though.  */
34476           if (!CONST_INT_P (op))
34477             return false;
34478
34479           range = INTVAL (op);
34480
34481           /* Within the range of [-1020,1020].  */
34482           if (!IN_RANGE (range, -1020, 1020))
34483             return false;
34484
34485           /* And a multiple of 4.  */
34486           return (range % 4) == 0;
34487         }
34488       case PRE_INC:
34489       case POST_INC:
34490       case PRE_DEC:
34491       case POST_DEC:
34492         return REG_P (XEXP (op, 0));
34493       default:
34494         gcc_unreachable ();
34495     }
34496   return false;
34497 }
34498
34499 /* Return true if OP is a valid memory operand for LDRD/STRD without any
34500    register overlap restrictions.  Allow [base] and [base, imm] for now.  */
34501 bool
34502 arm_ldrd_legitimate_address (rtx op)
34503 {
34504   if (!MEM_P (op))
34505     return false;
34506
34507   op = XEXP (op, 0);
34508   if (REG_P (op))
34509     return true;
34510
34511   if (GET_CODE (op) != PLUS)
34512     return false;
34513   if (!REG_P (XEXP (op, 0)) || !CONST_INT_P (XEXP (op, 1)))
34514     return false;
34515
34516   HOST_WIDE_INT val = INTVAL (XEXP (op, 1));
34517
34518   if (TARGET_ARM)
34519     return IN_RANGE (val, -255, 255);
34520   return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
34521 }
34522
34523 /* Return the diagnostic message string if conversion from FROMTYPE to
34524    TOTYPE is not allowed, NULL otherwise.  */
34525
34526 static const char *
34527 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34528 {
34529   if (element_mode (fromtype) != element_mode (totype))
34530     {
34531       /* Do no allow conversions to/from BFmode scalar types.  */
34532       if (TYPE_MODE (fromtype) == BFmode)
34533         return N_("invalid conversion from type %<bfloat16_t%>");
34534       if (TYPE_MODE (totype) == BFmode)
34535         return N_("invalid conversion to type %<bfloat16_t%>");
34536     }
34537
34538   /* Conversion allowed.  */
34539   return NULL;
34540 }
34541
34542 /* Return the diagnostic message string if the unary operation OP is
34543    not permitted on TYPE, NULL otherwise.  */
34544
34545 static const char *
34546 arm_invalid_unary_op (int op, const_tree type)
34547 {
34548   /* Reject all single-operand operations on BFmode except for &.  */
34549   if (element_mode (type) == BFmode && op != ADDR_EXPR)
34550     return N_("operation not permitted on type %<bfloat16_t%>");
34551
34552   /* Operation allowed.  */
34553   return NULL;
34554 }
34555
34556 /* Return the diagnostic message string if the binary operation OP is
34557    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
34558
34559 static const char *
34560 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34561                            const_tree type2)
34562 {
34563   /* Reject all 2-operand operations on BFmode.  */
34564   if (element_mode (type1) == BFmode
34565       || element_mode (type2) == BFmode)
34566     return N_("operation not permitted on type %<bfloat16_t%>");
34567
34568   /* Operation allowed.  */
34569   return NULL;
34570 }
34571
34572 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34573
34574    In VFPv1, VFP registers could only be accessed in the mode they were
34575    set, so subregs would be invalid there.  However, we don't support
34576    VFPv1 at the moment, and the restriction was lifted in VFPv2.
34577
34578    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34579    VFP registers in little-endian order.  We can't describe that accurately to
34580    GCC, so avoid taking subregs of such values.
34581
34582    The only exception is going from a 128-bit to a 64-bit type.  In that
34583    case the data layout happens to be consistent for big-endian, so we
34584    explicitly allow that case.  */
34585
34586 static bool
34587 arm_can_change_mode_class (machine_mode from, machine_mode to,
34588                            reg_class_t rclass)
34589 {
34590   if (TARGET_BIG_END
34591       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34592       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34593           || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34594       && reg_classes_intersect_p (VFP_REGS, rclass))
34595     return false;
34596   return true;
34597 }
34598
34599 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
34600    strcpy from constants will be faster.  */
34601
34602 static HOST_WIDE_INT
34603 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34604 {
34605   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34606   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34607     return MAX (align, BITS_PER_WORD * factor);
34608   return align;
34609 }
34610
34611 /* Emit a speculation barrier on target architectures that do not have
34612    DSB/ISB directly.  Such systems probably don't need a barrier
34613    themselves, but if the code is ever run on a later architecture, it
34614    might become a problem.  */
34615 void
34616 arm_emit_speculation_barrier_function ()
34617 {
34618   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34619 }
34620
34621 /* Have we recorded an explicit access to the Q bit of APSR?.  */
34622 bool
34623 arm_q_bit_access (void)
34624 {
34625   if (cfun && cfun->decl)
34626     return lookup_attribute ("acle qbit",
34627                              DECL_ATTRIBUTES (cfun->decl));
34628   return true;
34629 }
34630
34631 /* Have we recorded an explicit access to the GE bits of PSTATE?.  */
34632 bool
34633 arm_ge_bits_access (void)
34634 {
34635   if (cfun && cfun->decl)
34636     return lookup_attribute ("acle gebits",
34637                              DECL_ATTRIBUTES (cfun->decl));
34638   return true;
34639 }
34640
34641 /* NULL if insn INSN is valid within a low-overhead loop.
34642    Otherwise return why doloop cannot be applied.  */
34643
34644 static const char *
34645 arm_invalid_within_doloop (const rtx_insn *insn)
34646 {
34647   if (!TARGET_HAVE_LOB)
34648     return default_invalid_within_doloop (insn);
34649
34650   if (CALL_P (insn))
34651     return "Function call in the loop.";
34652
34653   if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34654     return "LR is used inside loop.";
34655
34656   return NULL;
34657 }
34658
34659 bool
34660 arm_target_bb_ok_for_lob (basic_block bb)
34661 {
34662   /* Make sure the basic block is a simple latch having as the single
34663      predecessor and successor the body of the loop itself.
34664      Only simple loops with a single basic block as body are supported for
34665      low over head loops, making sure that LE target is above LE instruction
34666      in the generated code.  */
34667   return (single_succ_p (bb)
34668           && single_pred_p (bb)
34669           && single_succ_edge (bb)->dest == single_pred_edge (bb)->src);
34670 }
34671
34672 /* Utility fuction: Given a VCTP or a VCTP_M insn, return the number of MVE
34673    lanes based on the machine mode being used.  */
34674
34675 static int
34676 arm_mve_get_vctp_lanes (rtx_insn *insn)
34677 {
34678   rtx insn_set = single_set (insn);
34679   if (insn_set
34680       && GET_CODE (SET_SRC (insn_set)) == UNSPEC
34681       && (XINT (SET_SRC (insn_set), 1) == VCTP
34682           || XINT (SET_SRC (insn_set), 1) == VCTP_M))
34683     {
34684       machine_mode mode = GET_MODE (SET_SRC (insn_set));
34685       return ((VECTOR_MODE_P (mode) && VALID_MVE_PRED_MODE (mode))
34686               ? GET_MODE_NUNITS (mode) : 0);
34687     }
34688   return 0;
34689 }
34690
34691 enum arm_dl_usage_type { DL_USAGE_ANY = 0,
34692                          DL_USAGE_READ = 1,
34693                          DL_USAGE_WRITE = 2 };
34694
34695 /* Check if INSN requires the use of the VPR reg, if it does, return the
34696    sub-rtx of the VPR reg.  The TYPE argument controls whether
34697    this function should:
34698    * For TYPE == DL_USAGE_ANY, check all operands, including the OUT operands,
34699      and return the first occurrence of the VPR reg.
34700    * For TYPE == DL_USAGE_READ, only check the input operands.
34701    * For TYPE == DL_USAGE_WRITE, only check the output operands.
34702    (INOUT operands are considered both as input and output operands)
34703 */
34704 static rtx
34705 arm_get_required_vpr_reg (rtx_insn *insn,
34706                           arm_dl_usage_type type = DL_USAGE_ANY)
34707 {
34708   gcc_assert (type < 3);
34709   if (!NONJUMP_INSN_P (insn))
34710     return NULL_RTX;
34711
34712   bool requires_vpr;
34713   extract_constrain_insn (insn);
34714   int n_operands = recog_data.n_operands;
34715   if (recog_data.n_alternatives == 0)
34716     return NULL_RTX;
34717
34718   /* Fill in recog_op_alt with information about the constraints of
34719      this insn.  */
34720   preprocess_constraints (insn);
34721
34722   for (int op = 0; op < n_operands; op++)
34723     {
34724       requires_vpr = true;
34725       if (type == DL_USAGE_READ
34726           && recog_data.operand_type[op] == OP_OUT)
34727         continue;
34728       else if (type == DL_USAGE_WRITE
34729                && recog_data.operand_type[op] == OP_IN)
34730         continue;
34731
34732       /* Iterate through alternatives of operand "op" in recog_op_alt and
34733          identify if the operand is required to be the VPR.  */
34734       for (int alt = 0; alt < recog_data.n_alternatives; alt++)
34735         {
34736           const operand_alternative *op_alt
34737               = &recog_op_alt[alt * n_operands];
34738           /* Fetch the reg_class for each entry and check it against the
34739              VPR_REG reg_class.  */
34740           if (alternative_class (op_alt, op) != VPR_REG)
34741             requires_vpr = false;
34742         }
34743       /* If all alternatives of the insn require the VPR reg for this operand,
34744          it means that either this is VPR-generating instruction, like a vctp,
34745          vcmp, etc., or it is a VPT-predicated insruction.  Return the subrtx
34746          of the VPR reg operand.  */
34747       if (requires_vpr)
34748         return recog_data.operand[op];
34749     }
34750   return NULL_RTX;
34751 }
34752
34753 /* Wrapper function of arm_get_required_vpr_reg with TYPE == DL_USAGE_READ,
34754    so return the VPR only if it is an input operand to the insn.  */
34755
34756 static rtx
34757 arm_get_required_vpr_reg_param (rtx_insn *insn)
34758 {
34759   return arm_get_required_vpr_reg (insn, DL_USAGE_READ);
34760 }
34761
34762 /* Wrapper function of arm_get_required_vpr_reg with TYPE == DL_USAGE_WRITE,
34763    so return the VPR only if it is the return value, an output of, or is
34764    clobbered by the insn.  */
34765
34766 static rtx
34767 arm_get_required_vpr_reg_ret_val (rtx_insn *insn)
34768 {
34769   return arm_get_required_vpr_reg (insn, DL_USAGE_WRITE);
34770 }
34771
34772 /* Return the first VCTP instruction in BB, if it exists, or NULL otherwise.  */
34773
34774 static rtx_insn *
34775 arm_mve_get_loop_vctp (basic_block bb)
34776 {
34777   rtx_insn *insn = BB_HEAD (bb);
34778
34779   /* Now scan through all the instruction patterns and pick out the VCTP
34780      instruction.  We require arm_get_required_vpr_reg_param to be false
34781      to make sure we pick up a VCTP, rather than a VCTP_M.  */
34782   FOR_BB_INSNS (bb, insn)
34783     if (NONDEBUG_INSN_P (insn))
34784       if (arm_get_required_vpr_reg_ret_val (insn)
34785           && (arm_mve_get_vctp_lanes (insn) != 0)
34786           && !arm_get_required_vpr_reg_param (insn))
34787         return insn;
34788   return NULL;
34789 }
34790
34791 /* Return true if INSN is a MVE instruction that is VPT-predicable and is
34792    predicated on VPR_REG.  */
34793
34794 static bool
34795 arm_mve_insn_predicated_by (rtx_insn *insn, rtx vpr_reg)
34796 {
34797   rtx insn_vpr_reg_operand = (MVE_VPT_PREDICATED_INSN_P (insn)
34798                               ? arm_get_required_vpr_reg_param (insn)
34799                               : NULL_RTX);
34800   return (insn_vpr_reg_operand
34801           && rtx_equal_p (vpr_reg, insn_vpr_reg_operand));
34802 }
34803
34804 /* Utility function to identify if INSN is an MVE instruction that performs
34805    some across lane operation (and as a result does not align with normal
34806    lane predication rules).  All such instructions give one only scalar
34807    output, except for vshlcq which gives a PARALLEL of a vector and a scalar
34808    (one vector result and one carry output).  */
34809
34810 static bool
34811 arm_mve_across_lane_insn_p (rtx_insn* insn)
34812 {
34813   df_ref insn_defs = NULL;
34814   if (!MVE_VPT_PREDICABLE_INSN_P (insn))
34815     return false;
34816
34817   FOR_EACH_INSN_DEF (insn_defs, insn)
34818     if (!VALID_MVE_MODE (GET_MODE (DF_REF_REG (insn_defs)))
34819         && !arm_get_required_vpr_reg_ret_val (insn))
34820       return true;
34821
34822   return false;
34823 }
34824
34825 /* Utility function to identify if INSN is an MVE load or store instruction.
34826    * For TYPE == DL_USAGE_ANY, check all operands.  If the function returns
34827      true, INSN is a load or a store insn.
34828    * For TYPE == DL_USAGE_READ, only check the input operands.  If the
34829      function returns true, INSN is a load insn.
34830    * For TYPE == DL_USAGE_WRITE, only check the output operands.  If the
34831      function returns true, INSN is a store insn.  */
34832
34833 static bool
34834 arm_mve_load_store_insn_p (rtx_insn* insn,
34835                            arm_dl_usage_type type = DL_USAGE_ANY)
34836 {
34837   gcc_assert (type < 3);
34838   int n_operands = recog_data.n_operands;
34839   extract_insn (insn);
34840
34841   for (int op = 0; op < n_operands; op++)
34842     {
34843       if (type == DL_USAGE_READ && recog_data.operand_type[op] == OP_OUT)
34844         continue;
34845       else if (type == DL_USAGE_WRITE && recog_data.operand_type[op] == OP_IN)
34846         continue;
34847       if (mve_memory_operand (recog_data.operand[op],
34848                               GET_MODE (recog_data.operand[op])))
34849         return true;
34850     }
34851   return false;
34852 }
34853
34854 /* Return TRUE if INSN is validated for implicit predication by how its outputs
34855    are used.
34856
34857    If INSN is a MVE operation across lanes that is not predicated by
34858    VCTP_VPR_GENERATED it can not be validated by the use of its ouputs.
34859
34860    Any other INSN is safe to implicit predicate if we don't use its outputs
34861    outside the loop.  The instructions that use this INSN's outputs will be
34862    validated as we go through the analysis.  */
34863
34864 static bool
34865 arm_mve_impl_pred_on_outputs_p (rtx_insn *insn, rtx vctp_vpr_generated)
34866 {
34867   /* Reject any unpredicated across lane operation.  */
34868   if (!arm_mve_insn_predicated_by (insn, vctp_vpr_generated)
34869       && arm_mve_across_lane_insn_p (insn))
34870     return false;
34871
34872   /* Next, scan forward to the various USEs of the DEFs in this insn.  */
34873   df_ref insn_def = NULL;
34874   basic_block insn_bb = BLOCK_FOR_INSN (insn);
34875   FOR_EACH_INSN_DEF (insn_def, insn)
34876     {
34877       for (df_ref use = DF_REG_USE_CHAIN (DF_REF_REGNO (insn_def));
34878            use;
34879            use = DF_REF_NEXT_REG (use))
34880         {
34881           rtx_insn *next_use_insn = DF_REF_INSN (use);
34882           if (!INSN_P (next_use_insn) || DEBUG_INSN_P (next_use_insn))
34883             continue;
34884
34885           if (insn_bb != BLOCK_FOR_INSN (next_use_insn))
34886             return false;
34887         }
34888     }
34889   return true;
34890 }
34891
34892
34893 /* Returns the prevailing definition of OP before CUR_INSN in the same
34894    basic block as CUR_INSN, if one exists, returns NULL otherwise.  */
34895
34896 static rtx_insn*
34897 arm_last_vect_def_insn (rtx op, rtx_insn *cur_insn)
34898 {
34899   if (!REG_P (op)
34900       || !BLOCK_FOR_INSN (cur_insn))
34901     return NULL;
34902
34903   df_ref def_insns;
34904   rtx_insn *last_def = NULL;
34905   for (def_insns = DF_REG_DEF_CHAIN (REGNO (op));
34906        def_insns;
34907        def_insns = DF_REF_NEXT_REG (def_insns))
34908     {
34909       rtx_insn *def_insn = DF_REF_INSN (def_insns);
34910       /* Definition not in the loop body or after the current insn.  */
34911       if (DF_REF_BB (def_insns) != BLOCK_FOR_INSN (cur_insn)
34912           || INSN_UID (def_insn) >= INSN_UID (cur_insn))
34913         continue;
34914
34915       if (!last_def || INSN_UID (def_insn) > INSN_UID (last_def))
34916         last_def = def_insn;
34917     }
34918   return last_def;
34919 }
34920
34921
34922 /* This function returns TRUE if we can validate the implicit predication of
34923    INSN_IN with VCTP_VPR_GENERATED based on the definition of the instruction's
34924    input operands.
34925
34926    If INSN_IN is a MVE operation across lanes then all of its MVE vector
34927    operands must have its tail-predicated lanes be zeroes.  We keep track of any
34928    instructions that define vector operands for which this is true in
34929    PROPS_ZERO_SET.
34930
34931    For any other INSN_IN, the definition of all its operands must be defined
34932    inside the loop body by an instruction that comes before INSN_IN and not be
34933    a MVE load predicated by a different VPR.  These instructions have all been
34934    validated for explicit or implicit predication.
34935  */
34936
34937 static bool
34938 arm_mve_impl_pred_on_inputs_p (vec <rtx_insn *> *props_zero_set,
34939                                rtx_insn *insn_in, rtx vctp_vpr_generated)
34940 {
34941   /* If all inputs come from instructions that are explicitly or
34942      implicitly predicated by the same predicate then it is safe to
34943      implicitly predicate this instruction.  */
34944   df_ref insn_uses = NULL;
34945   bool across_lane = arm_mve_across_lane_insn_p (insn_in);
34946   FOR_EACH_INSN_USE (insn_uses, insn_in)
34947   {
34948     rtx op = DF_REF_REG (insn_uses);
34949     rtx_insn *def_insn = arm_last_vect_def_insn (op, insn_in);
34950     if (across_lane)
34951       {
34952         if (!VALID_MVE_MODE (GET_MODE (op)))
34953           continue;
34954         if (!def_insn || !props_zero_set->contains (def_insn))
34955           return false;
34956
34957         continue;
34958       }
34959
34960     if (!def_insn
34961         || (!arm_mve_insn_predicated_by (def_insn, vctp_vpr_generated)
34962             && arm_mve_load_store_insn_p (def_insn, DL_USAGE_READ)))
34963       return false;
34964   }
34965
34966   return true;
34967 }
34968
34969
34970 /* Determine whether INSN_IN is safe to implicitly predicate based on the type
34971    of instruction and where needed the definition of its inputs and the uses of
34972    its outputs.
34973    Return TRUE if it is safe to implicitly predicate and FALSE otherwise.
34974
34975    * If INSN_IN is a store, then it is always unsafe to implicitly predicate it.
34976    * If INSN_IN is a load, only reject implicit predication if its uses
34977      directly invalidate it.
34978    * If INSN_IN operates across vector lanes and does not have the
34979      "mve_safe_imp_xlane_pred" attribute, then it is always unsafe to implicitly
34980      predicate.
34981    * If INSN_IN operates on Floating Point elements and we are not compiling
34982      with -Ofast, then it is unsafe to implicitly predicate it as we may be
34983      changing exception and cumulative bits behaviour.
34984    * If INSN_IN is a VCTP instruction, then it is safe to implicitly predicate,
34985      but instructions that use this predicate will need to be checked
34986      just like any other UNPREDICATED MVE instruction.
34987    * Otherwise check if INSN_IN's inputs or uses of outputs can validate its
34988      implicit predication.
34989
34990    * If all inputs come from instructions that are explicitly or implicitly
34991      predicated by the same predicate then it is safe to implicitly predicate
34992      this instruction.
34993    * If INSN_IN is an operation across lanes with the "mve_safe_imp_xlane_pred"
34994      attribute, then all it's operands must have zeroed falsely predicated tail
34995      lanes.
34996
34997    * Otherwise, check if the implicit predication of INSN_IN can be validated
34998      based on its inputs, and if not check whether it can be validated based on
34999      how its outputs are used.  */
35000
35001 static bool
35002 arm_mve_impl_predicated_p (vec <rtx_insn *> *props_zero_set,
35003                            rtx_insn *insn_in, rtx vctp_vpr_generated)
35004 {
35005
35006   /* If INSN_IN is a store, then it is always unsafe to implicitly
35007      predicate it.  */
35008   if (arm_mve_load_store_insn_p (insn_in, DL_USAGE_WRITE))
35009     return false;
35010
35011   /* If INSN_IN is a load, only reject implicit predication if its uses
35012      directly invalidate it.  */
35013   if (arm_mve_load_store_insn_p (insn_in, DL_USAGE_READ))
35014     {
35015       if (!arm_mve_impl_pred_on_outputs_p (insn_in, vctp_vpr_generated))
35016         return false;
35017       return true;
35018     }
35019
35020   /* If INSN_IN operates across vector lanes and does not have the
35021      "mve_safe_imp_xlane_pred" attribute, then it is always unsafe to implicitly
35022      predicate.  */
35023   if (arm_mve_across_lane_insn_p (insn_in)
35024       && (get_attr_mve_safe_imp_xlane_pred (insn_in)
35025           != MVE_SAFE_IMP_XLANE_PRED_YES))
35026     return false;
35027
35028   /* If INSN_IN operates on Floating Point elements and we are not compiling
35029      with -Ofast, then it is unsafe to implicitly predicate it as we may be
35030      changing exception and cumulative bits behaviour.  */
35031   if (!flag_unsafe_math_optimizations
35032       && flag_trapping_math
35033       && MVE_VPT_UNPREDICATED_INSN_P (insn_in))
35034     {
35035       df_ref def;
35036       FOR_EACH_INSN_DEF (def, insn_in)
35037         if (DF_REF_TYPE (def) == DF_REF_REG_DEF
35038             && FLOAT_MODE_P (GET_MODE (DF_REF_REG (def))))
35039           return false;
35040       FOR_EACH_INSN_USE (def, insn_in)
35041         if (DF_REF_TYPE (def) == DF_REF_REG_DEF
35042             && FLOAT_MODE_P (GET_MODE (DF_REF_REG (def))))
35043           return false;
35044     }
35045
35046   /* If INSN_IN is a VCTP instruction, then it is safe to implicitly predicate,
35047      but instructions that use this predicate will need to be checked
35048      just like any other UNPREDICATED MVE instruction.  */
35049   if (arm_get_required_vpr_reg_ret_val (insn_in)
35050       && (arm_mve_get_vctp_lanes (insn_in) != 0))
35051     return true;
35052
35053   /* Otherwise, check if the implicit predication of INSN_IN can be validated
35054      based on its inputs, and if not check whether it can be validated based on
35055      how its outputs are used.  */
35056   return (arm_mve_impl_pred_on_inputs_p (props_zero_set, insn_in, vctp_vpr_generated)
35057           || arm_mve_impl_pred_on_outputs_p (insn_in, vctp_vpr_generated));
35058 }
35059
35060 /* Helper function to `arm_mve_dlstp_check_inc_counter` and to
35061    `arm_mve_dlstp_check_dec_counter`.  In the situations where the loop counter
35062    is incrementing by 1 or decrementing by 1 in each iteration, ensure that the
35063    number of iterations, the value of REG, going into the loop, was calculated
35064    as:
35065     REG = (N + [1, VCTP_STEP - 1]) / VCTP_STEP
35066
35067   where N is equivalent to the VCTP_REG.
35068 */
35069
35070 static bool
35071 arm_mve_check_reg_origin_is_num_elems (loop *loop, rtx reg, rtx vctp_step,
35072                                        rtx vctp_reg)
35073 {
35074   df_ref counter_max_last_def = NULL;
35075
35076   /* More than one reaching definition.  */
35077   if (DF_REG_DEF_COUNT (REGNO (reg)) > 2)
35078     return false;
35079
35080   /* Look for a single defition of REG going into the loop.  The DEF_CHAIN will
35081      have at least two values, as this is a loop induction variable that is
35082      defined outside the loop.  */
35083   for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
35084        def;
35085        def = DF_REF_NEXT_REG (def))
35086     {
35087       /* Skip the update inside the loop, this has already been checked by the
35088          iv_analyze call earlier.  */
35089       if (DF_REF_BB (def) == loop->header)
35090         continue;
35091
35092       counter_max_last_def = def;
35093       break;
35094     }
35095
35096   if (!counter_max_last_def)
35097     return false;
35098
35099   rtx counter_max_last_set = single_set (DF_REF_INSN (counter_max_last_def));
35100
35101   if (!counter_max_last_set)
35102     return false;
35103
35104   /* If we encounter a simple SET from a REG, follow it through.  */
35105   if (REG_P (SET_SRC (counter_max_last_set)))
35106     {
35107       if (DF_REG_DEF_COUNT (REGNO (SET_SRC (counter_max_last_set))) != 1)
35108         return false;
35109
35110       counter_max_last_def
35111         = DF_REG_DEF_CHAIN (REGNO (SET_SRC (counter_max_last_set)));
35112       counter_max_last_set
35113         = single_set (DF_REF_INSN (counter_max_last_def));
35114
35115       if (!counter_max_last_set)
35116         return false;
35117     }
35118
35119   /* We are looking for:
35120       COUNTER_MAX_LAST_SET = (N + VCTP_STEP - 1) / VCTP_STEP.
35121      We currently only support the unsigned VCTP_OP case.  */
35122   rtx division = SET_SRC (counter_max_last_set);
35123   if (GET_CODE (division) != LSHIFTRT)
35124     return false;
35125
35126   /* Now check that we are dividing by VCTP_STEP, i.e. the number of lanes.  */
35127   rtx divisor = XEXP (division, 1);
35128   unsigned vctp_step_cst = abs_hwi (INTVAL (vctp_step));
35129   if (!CONST_INT_P (divisor)
35130       || (1U << INTVAL (divisor) != vctp_step_cst))
35131     return false;
35132
35133   rtx dividend = XEXP (division, 0);
35134   if (!REG_P (dividend))
35135     /* Subreg? */
35136     return false;
35137
35138   /* For now only support the simple case, this only works for unsigned N, any
35139      signed N will have further computations to deal with overflow.  */
35140   if (DF_REG_DEF_COUNT (REGNO (dividend)) != 1)
35141     return false;
35142
35143   rtx_insn *dividend_insn = DF_REF_INSN (DF_REG_DEF_CHAIN (REGNO (dividend)));
35144   rtx dividend_op = single_set (dividend_insn);
35145   if (!dividend_op
35146       && GET_CODE (SET_SRC (dividend_op)) != PLUS)
35147     return false;
35148
35149   /* Check if PLUS_OP is (VCTP_OP + VAL), where VAL = [1, VCTP_STEP - 1].  */
35150   rtx plus_op = SET_SRC (dividend_op);
35151   if (!REG_P (XEXP (plus_op, 0))
35152       || !CONST_INT_P (XEXP (plus_op, 1))
35153       || !IN_RANGE (INTVAL (XEXP (plus_op, 1)), 1, vctp_step_cst - 1))
35154     return false;
35155
35156   /* VCTP_REG may have been copied before entering the loop, let's see if we can
35157      trace such a copy back.  If we have more than one reaching definition then
35158      bail out as analysis will be too difficult.  */
35159   if (DF_REG_DEF_COUNT (REGNO (vctp_reg)) > 2)
35160     return false;
35161
35162   /* Look for the definition of N. */
35163   for (df_ref def = DF_REG_DEF_CHAIN (REGNO (vctp_reg));
35164        def;
35165        def = DF_REF_NEXT_REG (def))
35166     {
35167       if (DF_REF_BB (def) == loop->header)
35168        continue;
35169       rtx set = single_set (DF_REF_INSN (def));
35170       if (set
35171           && REG_P (SET_SRC (set))
35172           && !HARD_REGISTER_P (SET_SRC (set)))
35173         vctp_reg = SET_SRC (set);
35174     }
35175
35176   return rtx_equal_p (vctp_reg, XEXP (plus_op, 0));
35177 }
35178
35179 /* If we have identified the loop to have an incrementing counter, we need to
35180    make sure that it increments by 1 and that the loop is structured correctly:
35181     * The counter starts from 0
35182     * The counter terminates at (num_of_elem + num_of_lanes - 1) / num_of_lanes
35183     * The vctp insn uses a reg that decrements appropriately in each iteration.
35184 */
35185
35186 static rtx_insn*
35187 arm_mve_dlstp_check_inc_counter (loop *loop, rtx_insn* vctp_insn,
35188                                  rtx condconst, rtx condcount)
35189 {
35190   rtx vctp_reg = XVECEXP (XEXP (PATTERN (vctp_insn), 1), 0, 0);
35191   /* The loop latch has to be empty.  When compiling all the known MVE LoLs in
35192      user applications, none of those with incrementing counters had any real
35193      insns in the loop latch.  As such, this function has only been tested with
35194      an empty latch and may misbehave or ICE if we somehow get here with an
35195      increment in the latch, so, for correctness, error out early.  */
35196   if (!empty_block_p (loop->latch))
35197     return NULL;
35198
35199   class rtx_iv vctp_reg_iv;
35200   /* For loops of DLSTP_TYPE_B, the loop counter is independent of the decrement
35201      of the reg used in the vctp_insn. So run iv analysis on that reg.  This
35202      has to succeed for such loops to be supported.  */
35203   if (!iv_analyze (vctp_insn, as_a<scalar_int_mode> (GET_MODE (vctp_reg)),
35204       vctp_reg, &vctp_reg_iv))
35205     return NULL;
35206
35207   /* Extract the decrementnum of the vctp reg from the iv.  This decrementnum
35208      is the number of lanes/elements it decrements from the remaining number of
35209      lanes/elements to process in the loop, for this reason this is always a
35210      negative number, but to simplify later checks we use it's absolute value.  */
35211   HOST_WIDE_INT decrementnum = INTVAL (vctp_reg_iv.step);
35212   if (decrementnum >= 0)
35213     return NULL;
35214   decrementnum = abs_hwi (decrementnum);
35215
35216   /* Find where both of those are modified in the loop header bb.  */
35217   df_ref condcount_reg_set_df = df_bb_regno_only_def_find (loop->header,
35218                                                            REGNO (condcount));
35219   df_ref vctp_reg_set_df = df_bb_regno_only_def_find (loop->header,
35220                                                       REGNO (vctp_reg));
35221   if (!condcount_reg_set_df || !vctp_reg_set_df)
35222     return NULL;
35223   rtx condcount_reg_set = single_set (DF_REF_INSN (condcount_reg_set_df));
35224   rtx vctp_reg_set = single_set (DF_REF_INSN (vctp_reg_set_df));
35225   if (!condcount_reg_set || !vctp_reg_set)
35226     return NULL;
35227
35228   /* Ensure the modification of the vctp reg from df is consistent with
35229      the iv and the number of lanes on the vctp insn.  */
35230   if (GET_CODE (SET_SRC (vctp_reg_set)) != PLUS
35231       || !REG_P (SET_DEST (vctp_reg_set))
35232       || !REG_P (XEXP (SET_SRC (vctp_reg_set), 0))
35233       || REGNO (SET_DEST (vctp_reg_set))
35234           != REGNO (XEXP (SET_SRC (vctp_reg_set), 0))
35235       || !CONST_INT_P (XEXP (SET_SRC (vctp_reg_set), 1))
35236       || INTVAL (XEXP (SET_SRC (vctp_reg_set), 1)) >= 0
35237       || decrementnum != abs_hwi (INTVAL (XEXP (SET_SRC (vctp_reg_set), 1)))
35238       || decrementnum != arm_mve_get_vctp_lanes (vctp_insn))
35239     return NULL;
35240
35241   if (REG_P (condcount) && REG_P (condconst))
35242     {
35243       /* First we need to prove that the loop is going 0..condconst with an
35244          inc of 1 in each iteration.  */
35245       if (GET_CODE (SET_SRC (condcount_reg_set)) == PLUS
35246           && CONST_INT_P (XEXP (SET_SRC (condcount_reg_set), 1))
35247           && INTVAL (XEXP (SET_SRC (condcount_reg_set), 1)) == 1)
35248         {
35249             rtx counter_reg = SET_DEST (condcount_reg_set);
35250             /* Check that the counter did indeed start from zero.  */
35251             df_ref this_set = DF_REG_DEF_CHAIN (REGNO (counter_reg));
35252             if (!this_set)
35253               return NULL;
35254             df_ref last_set_def = DF_REF_NEXT_REG (this_set);
35255             if (!last_set_def)
35256               return NULL;
35257             rtx_insn* last_set_insn = DF_REF_INSN (last_set_def);
35258             rtx last_set = single_set (last_set_insn);
35259             if (!last_set)
35260               return NULL;
35261             rtx counter_orig_set;
35262             counter_orig_set = SET_SRC (last_set);
35263             if (!CONST_INT_P (counter_orig_set)
35264                 || (INTVAL (counter_orig_set) != 0))
35265               return NULL;
35266             /* And finally check that the target value of the counter,
35267                condconst, is of the correct shape.  */
35268             if (!arm_mve_check_reg_origin_is_num_elems (loop, condconst,
35269                                                         vctp_reg_iv.step,
35270                                                         vctp_reg))
35271               return NULL;
35272         }
35273       else
35274         return NULL;
35275     }
35276   else
35277     return NULL;
35278
35279   /* Everything looks valid.  */
35280   return vctp_insn;
35281 }
35282
35283 /* Helper function to 'arm_mve_dlstp_check_dec_counter' to make sure DEC_INSN
35284    is of the expected form:
35285    (set (reg a) (plus (reg a) (const_int)))
35286    where (reg a) is the same as CONDCOUNT.
35287    Return a rtx with the set if it is in the right format or NULL_RTX
35288    otherwise.  */
35289
35290 static rtx
35291 check_dec_insn (rtx_insn *dec_insn, rtx condcount)
35292 {
35293   if (!NONDEBUG_INSN_P (dec_insn))
35294     return NULL_RTX;
35295   rtx dec_set = single_set (dec_insn);
35296   if (!dec_set
35297       || !REG_P (SET_DEST (dec_set))
35298       || GET_CODE (SET_SRC (dec_set)) != PLUS
35299       || !REG_P (XEXP (SET_SRC (dec_set), 0))
35300       || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
35301       || REGNO (SET_DEST (dec_set))
35302           != REGNO (XEXP (SET_SRC (dec_set), 0))
35303       || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
35304     return NULL_RTX;
35305
35306   return dec_set;
35307 }
35308
35309 /* Helper function to `arm_mve_loop_valid_for_dlstp`.  In the case of a
35310    counter that is decrementing, ensure that it is decrementing by the
35311    right amount in each iteration and that the target condition is what
35312    we expect.  */
35313
35314 static rtx_insn*
35315 arm_mve_dlstp_check_dec_counter (loop *loop, rtx_insn* vctp_insn,
35316                                  rtx condconst, rtx condcount)
35317 {
35318   rtx vctp_reg = XVECEXP (XEXP (PATTERN (vctp_insn), 1), 0, 0);
35319   class rtx_iv vctp_reg_iv;
35320   HOST_WIDE_INT decrementnum;
35321   /* For decrementing loops of DLSTP_TYPE_A, the counter is usually present in the
35322      loop latch.  Here we simply need to verify that this counter is the same
35323      reg that is also used in the vctp_insn and that it is not otherwise
35324      modified.  */
35325   rtx dec_set = check_dec_insn (BB_END (loop->latch), condcount);
35326   /* If not in the loop latch, try to find the decrement in the loop header.  */
35327   if (dec_set == NULL_RTX)
35328   {
35329     df_ref temp = df_bb_regno_only_def_find (loop->header, REGNO (condcount));
35330     /* If we haven't been able to find the decrement, bail out.  */
35331     if (!temp)
35332       return NULL;
35333     dec_set = check_dec_insn (DF_REF_INSN (temp), condcount);
35334
35335     if (dec_set == NULL_RTX)
35336       return NULL;
35337   }
35338
35339   decrementnum = INTVAL (XEXP (SET_SRC (dec_set), 1));
35340
35341   /* This decrementnum is the number of lanes/elements it decrements from the
35342      remaining number of lanes/elements to process in the loop, for this reason
35343      this is always a negative number, but to simplify later checks we use its
35344      absolute value.  */
35345   if (decrementnum >= 0)
35346     return NULL;
35347   decrementnum = -decrementnum;
35348
35349   /* If the decrementnum is a 1, then we need to look at the loop vctp_reg and
35350      verify that it also decrements correctly.
35351      Then, we need to establish that the starting value of the loop decrement
35352      originates from the starting value of the vctp decrement.  */
35353   if (decrementnum == 1)
35354     {
35355       class rtx_iv vctp_reg_iv, condcount_reg_iv;
35356       /* The loop counter is found to be independent of the decrement
35357          of the reg used in the vctp_insn, again.  Ensure that IV analysis
35358          succeeds and check the step.  */
35359       if (!iv_analyze (vctp_insn, as_a<scalar_int_mode> (GET_MODE (vctp_reg)),
35360                        vctp_reg, &vctp_reg_iv))
35361         return NULL;
35362       /* Ensure it matches the number of lanes of the vctp instruction.  */
35363       if (abs (INTVAL (vctp_reg_iv.step))
35364           != arm_mve_get_vctp_lanes (vctp_insn))
35365         return NULL;
35366
35367       if (!arm_mve_check_reg_origin_is_num_elems (loop, condcount,
35368                                                   vctp_reg_iv.step,
35369                                                   vctp_reg))
35370         return NULL;
35371     }
35372   /* If the decrements are the same, then the situation is simple: either they
35373      are also the same reg, which is safe, or they are different registers, in
35374      which case makse sure that there is a only simple SET from one to the
35375      other inside the loop.*/
35376   else if (decrementnum == arm_mve_get_vctp_lanes (vctp_insn))
35377     {
35378       if (REGNO (condcount) != REGNO (vctp_reg))
35379         {
35380           /* It wasn't the same reg, but it could be behild a
35381              (set (vctp_reg) (condcount)), so instead find where
35382              the VCTP insn is DEF'd inside the loop.  */
35383           rtx_insn *vctp_reg_insn
35384             = DF_REF_INSN (df_bb_regno_only_def_find (loop->header,
35385                                                       REGNO (vctp_reg)));
35386           rtx vctp_reg_set = single_set (vctp_reg_insn);
35387           /* This must just be a simple SET from the condcount.  */
35388           if (!vctp_reg_set
35389               || !REG_P (SET_DEST (vctp_reg_set))
35390               || !REG_P (SET_SRC (vctp_reg_set))
35391               || REGNO (SET_SRC (vctp_reg_set)) != REGNO (condcount))
35392             return NULL;
35393         }
35394     }
35395   else
35396     return NULL;
35397
35398   /* We now only need to find out that the loop terminates with a LE
35399      zero condition.  If condconst is a const_int, then this is easy.
35400      If its a REG, look at the last condition+jump in a bb before
35401      the loop, because that usually will have a branch jumping over
35402      the loop header.  */
35403   rtx_insn *jump_insn = BB_END (loop->header);
35404   if (CONST_INT_P (condconst)
35405       && !(INTVAL (condconst) == 0 && JUMP_P (jump_insn)
35406            && GET_CODE (XEXP (PATTERN (jump_insn), 1)) == IF_THEN_ELSE
35407            && (GET_CODE (XEXP (XEXP (PATTERN (jump_insn), 1), 0)) == NE
35408                ||GET_CODE (XEXP (XEXP (PATTERN (jump_insn), 1), 0)) == GT)))
35409     return NULL;
35410   else if (REG_P (condconst))
35411     {
35412       basic_block preheader_b = loop_preheader_edge (loop)->src;
35413       if (!single_pred_p (preheader_b))
35414         return NULL;
35415       basic_block pre_loop_bb = single_pred (preheader_b);
35416
35417       rtx initial_compare = NULL_RTX;
35418       if (!(prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb))
35419             && INSN_P (prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb)))))
35420         return NULL;
35421       else
35422         initial_compare
35423             = single_set (prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb)));
35424       if (!(initial_compare
35425             && cc_register (SET_DEST (initial_compare), VOIDmode)
35426             && GET_CODE (SET_SRC (initial_compare)) == COMPARE
35427             && CONST_INT_P (XEXP (SET_SRC (initial_compare), 1))
35428             && INTVAL (XEXP (SET_SRC (initial_compare), 1)) == 0))
35429         return NULL;
35430
35431       /* Usually this is a LE condition, but it can also just be a GT or an EQ
35432          condition (if the value is unsigned or the compiler knows its not negative)  */
35433       rtx_insn *loop_jumpover = BB_END (pre_loop_bb);
35434       if (!(JUMP_P (loop_jumpover)
35435             && GET_CODE (XEXP (PATTERN (loop_jumpover), 1)) == IF_THEN_ELSE
35436             && (GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover), 1), 0)) == LE
35437                 || GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover), 1), 0)) == GT
35438                 || GET_CODE (XEXP (XEXP (PATTERN (loop_jumpover), 1), 0)) == EQ)))
35439         return NULL;
35440     }
35441
35442   /* Everything looks valid.  */
35443   return vctp_insn;
35444 }
35445
35446 /* Function to check a loop's structure to see if it is a valid candidate for
35447    an MVE Tail Predicated Low-Overhead Loop.  Returns the loop's VCTP_INSN if
35448    it is valid, or NULL if it isn't.  */
35449
35450 static rtx_insn*
35451 arm_mve_loop_valid_for_dlstp (loop *loop)
35452 {
35453   /* Doloop can only be done "elementwise" with predicated dlstp/letp if it
35454      contains a VCTP on the number of elements processed by the loop.
35455      Find the VCTP predicate generation inside the loop body BB.  */
35456   rtx_insn *vctp_insn = arm_mve_get_loop_vctp (loop->header);
35457   if (!vctp_insn)
35458     return NULL;
35459
35460   /* We only support two loop forms for tail predication:
35461       DLSTP_TYPE_A) Loops of the form:
35462           int num_of_lanes = 128 / elem_size;
35463           while (num_of_elem > 0)
35464             {
35465               p = vctp<size> (num_of_elem);
35466               num_of_elem -= num_of_lanes;
35467             }
35468       DLSTP_TYPE_B) Loops of the form:
35469           int num_of_lanes = 128 / elem_size;
35470           int num_of_iters = (num_of_elem + num_of_lanes - 1) / num_of_lanes;
35471           for (i = 0; i < num_of_iters; i++)
35472             {
35473               p = vctp<size> (num_of_elem);
35474               num_of_elem -= num_of_lanes;
35475             }
35476
35477     Then, depending on the type of loop above we need will need to do
35478     different sets of checks.  */
35479   iv_analysis_loop_init (loop);
35480
35481   /* In order to find out if the loop is of DLSTP_TYPE_A or DLSTP_TYPE_B above
35482      look for the loop counter: it will either be incrementing by one per
35483      iteration or it will be decrementing by num_of_lanes.  We can find the
35484      loop counter in the condition at the end of the loop.  */
35485   rtx_insn *loop_cond = prev_nonnote_nondebug_insn_bb (BB_END (loop->header));
35486   if (!(cc_register (XEXP (PATTERN (loop_cond), 0), VOIDmode)
35487         && GET_CODE (XEXP (PATTERN (loop_cond), 1)) == COMPARE))
35488     return NULL;
35489
35490   /* The operands in the condition:  Try to identify which one is the
35491      constant and which is the counter and run IV analysis on the latter.  */
35492   rtx cond_arg_1 = XEXP (XEXP (PATTERN (loop_cond), 1), 0);
35493   rtx cond_arg_2 = XEXP (XEXP (PATTERN (loop_cond), 1), 1);
35494
35495   rtx loop_cond_constant;
35496   rtx loop_counter;
35497   class rtx_iv cond_counter_iv, cond_temp_iv;
35498
35499   if (CONST_INT_P (cond_arg_1))
35500     {
35501       /* cond_arg_1 is the constant and cond_arg_2 is the counter.  */
35502       loop_cond_constant = cond_arg_1;
35503       loop_counter = cond_arg_2;
35504       iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_2)),
35505                   cond_arg_2, &cond_counter_iv);
35506     }
35507   else if (CONST_INT_P (cond_arg_2))
35508     {
35509       /* cond_arg_2 is the constant and cond_arg_1 is the counter.  */
35510       loop_cond_constant = cond_arg_2;
35511       loop_counter = cond_arg_1;
35512       iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_1)),
35513                   cond_arg_1, &cond_counter_iv);
35514     }
35515   else if (REG_P (cond_arg_1) && REG_P (cond_arg_2))
35516     {
35517       /* If both operands to the compare are REGs, we can safely
35518          run IV analysis on both and then determine which is the
35519          constant by looking at the step.
35520          First assume cond_arg_1 is the counter.  */
35521       loop_counter = cond_arg_1;
35522       loop_cond_constant = cond_arg_2;
35523       iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_1)),
35524                   cond_arg_1, &cond_counter_iv);
35525       iv_analyze (loop_cond, as_a<scalar_int_mode> (GET_MODE (cond_arg_2)),
35526                   cond_arg_2, &cond_temp_iv);
35527
35528       /* Look at the steps and swap around the rtx's if needed.  Error out if
35529          one of them cannot be identified as constant.  */
35530       if (!CONST_INT_P (cond_counter_iv.step) || !CONST_INT_P (cond_temp_iv.step))
35531         return NULL;
35532       if (INTVAL (cond_counter_iv.step) != 0 && INTVAL (cond_temp_iv.step) != 0)
35533         return NULL;
35534       if (INTVAL (cond_counter_iv.step) == 0 && INTVAL (cond_temp_iv.step) != 0)
35535         {
35536           loop_counter = cond_arg_2;
35537           loop_cond_constant = cond_arg_1;
35538           cond_counter_iv = cond_temp_iv;
35539         }
35540     }
35541   else
35542     return NULL;
35543
35544   if (!REG_P (loop_counter))
35545     return NULL;
35546   if (!(REG_P (loop_cond_constant) || CONST_INT_P (loop_cond_constant)))
35547     return NULL;
35548
35549   /* Now we have extracted the IV step of the loop counter, call the
35550      appropriate checking function.  */
35551   if (INTVAL (cond_counter_iv.step) > 0)
35552     return arm_mve_dlstp_check_inc_counter (loop, vctp_insn,
35553                                             loop_cond_constant, loop_counter);
35554   else if (INTVAL (cond_counter_iv.step) < 0)
35555     return arm_mve_dlstp_check_dec_counter (loop, vctp_insn,
35556                                             loop_cond_constant, loop_counter);
35557   else
35558     return NULL;
35559 }
35560
35561 /* Predict whether the given loop in gimple will be transformed in the RTL
35562    doloop_optimize pass.  It could be argued that turning large enough loops
35563    into low-overhead loops would not show a signficant performance boost.
35564    However, in the case of tail predication we would still avoid using VPT/VPST
35565    instructions inside the loop, and in either case using low-overhead loops
35566    would not be detrimental, so we decided to not consider size, avoiding the
35567    need of a heuristic to determine what an appropriate size boundary is.  */
35568
35569 static bool
35570 arm_predict_doloop_p (struct loop *loop)
35571 {
35572   gcc_assert (loop);
35573   /* On arm, targetm.can_use_doloop_p is actually
35574      can_use_doloop_if_innermost.  Ensure the loop is innermost,
35575      it is valid and as per arm_target_bb_ok_for_lob and the
35576      correct architecture flags are enabled.  */
35577   if (!(TARGET_HAVE_LOB && optimize > 0))
35578     {
35579       if (dump_file && (dump_flags & TDF_DETAILS))
35580         fprintf (dump_file, "Predict doloop failure due to"
35581                             " target architecture or optimisation flags.\n");
35582       return false;
35583     }
35584   else if (loop->inner != NULL)
35585     {
35586       if (dump_file && (dump_flags & TDF_DETAILS))
35587         fprintf (dump_file, "Predict doloop failure due to"
35588                             " loop nesting.\n");
35589       return false;
35590     }
35591   else if (!arm_target_bb_ok_for_lob (loop->header->next_bb))
35592     {
35593       if (dump_file && (dump_flags & TDF_DETAILS))
35594         fprintf (dump_file, "Predict doloop failure due to"
35595                             " loop bb complexity.\n");
35596       return false;
35597     }
35598   else
35599     {
35600       gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
35601       while (!gsi_end_p (gsi))
35602         {
35603           if (is_gimple_call (gsi_stmt (gsi))
35604               && !gimple_call_builtin_p (gsi_stmt (gsi)))
35605             {
35606               if (dump_file && (dump_flags & TDF_DETAILS))
35607                 fprintf (dump_file, "Predict doloop failure due to"
35608                                     " call in loop.\n");
35609               return false;
35610             }
35611           gsi_next (&gsi);
35612         }
35613     }
35614
35615   return true;
35616 }
35617
35618 /* Implement targetm.loop_unroll_adjust.  Use this to block unrolling of loops
35619    that may later be turned into MVE Tail Predicated Low Overhead Loops.  The
35620    performance benefit of an MVE LoL is likely to be much higher than that of
35621    the unrolling.  */
35622
35623 unsigned
35624 arm_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
35625 {
35626   if (TARGET_HAVE_MVE
35627       && arm_target_bb_ok_for_lob (loop->latch)
35628       && arm_mve_loop_valid_for_dlstp (loop))
35629     return 0;
35630   else
35631     return nunroll;
35632 }
35633
35634 /* Function to hadle emitting a VPT-unpredicated version of a VPT-predicated
35635    insn to a sequence.  */
35636
35637 static bool
35638 arm_emit_mve_unpredicated_insn_to_seq (rtx_insn* insn)
35639 {
35640   rtx insn_vpr_reg_operand = arm_get_required_vpr_reg_param (insn);
35641   int new_icode = get_attr_mve_unpredicated_insn (insn);
35642   if (!in_sequence_p ()
35643       || !MVE_VPT_PREDICATED_INSN_P (insn)
35644       || (!insn_vpr_reg_operand)
35645       || (!new_icode))
35646     return false;
35647
35648   extract_insn (insn);
35649   rtx arr[8];
35650   int j = 0;
35651
35652   /* When transforming a VPT-predicated instruction into its unpredicated
35653      equivalent we need to drop the VPR operand and we may need to also drop a
35654      merge "vuninit" input operand, depending on the instruction pattern.  Here
35655      ensure that we have at most a two-operand difference between the two
35656      instrunctions.  */
35657   int n_operands_diff
35658       = recog_data.n_operands - insn_data[new_icode].n_operands;
35659   if (!(n_operands_diff > 0 && n_operands_diff <= 2))
35660     return false;
35661
35662   rtx move = NULL_RTX;
35663   /* Then, loop through the operands of the predicated
35664      instruction, and retain the ones that map to the
35665      unpredicated instruction.  */
35666   for (int i = 0; i < recog_data.n_operands; i++)
35667     {
35668       /* Ignore the VPR and, if needed, the vuninit
35669          operand.  */
35670       if (insn_vpr_reg_operand == recog_data.operand[i])
35671         continue;
35672       if (n_operands_diff == 2
35673           && !strcmp (recog_data.constraints[i], "0"))
35674         {
35675           move = gen_rtx_SET (arr[0], recog_data.operand[i]);
35676           arr[0] = recog_data.operand[i];
35677         }
35678       else
35679         arr[j++] = recog_data.operand[i];
35680     }
35681
35682   /* Finally, emit the upredicated instruction.  */
35683   rtx_insn *new_insn;
35684   switch (j)
35685     {
35686       case 1:
35687         new_insn = emit_insn (GEN_FCN (new_icode) (arr[0]));
35688         break;
35689       case 2:
35690         new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1]));
35691         break;
35692       case 3:
35693         new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2]));
35694         break;
35695       case 4:
35696         new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
35697                                                    arr[3]));
35698         break;
35699       case 5:
35700         new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
35701                                                    arr[3], arr[4]));
35702         break;
35703       case 6:
35704         new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
35705                                                    arr[3], arr[4], arr[5]));
35706         break;
35707       case 7:
35708         new_insn = emit_insn (GEN_FCN (new_icode) (arr[0], arr[1], arr[2],
35709                                                    arr[3], arr[4], arr[5],
35710                                                    arr[6]));
35711         break;
35712       default:
35713         gcc_unreachable ();
35714     }
35715   INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
35716   if (move)
35717     {
35718       new_insn = emit_insn (move);
35719       INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
35720     }
35721   return true;
35722 }
35723
35724 /* Return TRUE if INSN defines a MVE vector operand that has zeroed
35725    tail-predicated lanes.  This is either true if:
35726    * INSN is predicated by VCTP_VPR_GENERATED and the 'invalid lanes' operand
35727      is in the PROPS_ZERO_SET,
35728    * all MVE vector operands are in the PROPS_ZERO_SET
35729 */
35730
35731 static bool
35732 arm_mve_propagate_zero_pred_p (vec <rtx_insn *> *props_zero_set,
35733                                rtx_insn *insn, rtx vctp_vpr_generated)
35734 {
35735   if (arm_mve_load_store_insn_p (insn, DL_USAGE_READ))
35736     return true;
35737   if (arm_mve_load_store_insn_p (insn, DL_USAGE_WRITE))
35738     return false;
35739
35740   int inactive_idx = -1;
35741
35742   extract_insn (insn);
35743   /* If INSN is predicated by VCTP_VPR_GENERATED, then all tail-predicated
35744      lanes will keep the value that is in the 'invalid lanes' register which we
35745      identify by the "0" constraint, to ensure it is the same as the 'result'
35746      register of this instruction.  */
35747   if (arm_mve_insn_predicated_by (insn, vctp_vpr_generated))
35748     {
35749       for (int i = 0; i < recog_data.n_operands; i++)
35750         {
35751           if (strcmp (recog_data.constraints[i], "0") == 0
35752               && VALID_MVE_MODE (GET_MODE (recog_data.operand[i])))
35753             {
35754               inactive_idx = i;
35755               break;
35756             }
35757         }
35758     }
35759
35760   if (inactive_idx > 0)
35761     {
35762       rtx op = recog_data.operand[inactive_idx];
35763       rtx_insn *def_insn =  arm_last_vect_def_insn (op, insn);
35764       return def_insn != NULL_RTX && props_zero_set->contains (def_insn);
35765     }
35766
35767   /* If this instruction is not predicated by VCTP_VPR_GENERATED, then we must
35768      check that all vector operands have zeroed tail-predicated lanes, and that
35769      it has at least one vector operand.  */
35770   bool at_least_one_vector = false;
35771   df_ref insn_uses;
35772   FOR_EACH_INSN_USE (insn_uses, insn)
35773     {
35774       rtx reg = DF_REF_REG (insn_uses);
35775       if (!VALID_MVE_MODE (GET_MODE (reg)))
35776         continue;
35777
35778       rtx_insn *def_insn = arm_last_vect_def_insn (reg, insn);
35779       if (def_insn && props_zero_set->contains (def_insn))
35780         at_least_one_vector |= true;
35781       else
35782         return false;
35783
35784     }
35785   return at_least_one_vector;
35786 }
35787
35788
35789 /* Attempt to transform the loop contents of loop basic block from VPT
35790    predicated insns into unpredicated insns for a dlstp/letp loop.  Returns
35791    the number to decrement from the total number of elements each iteration.
35792    Returns 1 if tail predication can not be performed and fallback to scalar
35793    low-overhead loops.  */
35794
35795 int
35796 arm_attempt_dlstp_transform (rtx label)
35797 {
35798   if (!dlstp_enabled)
35799     return 1;
35800
35801   basic_block body = single_succ (BLOCK_FOR_INSN (label));
35802
35803   /* Ensure that the bb is within a loop that has all required metadata.  */
35804   if (!body->loop_father || !body->loop_father->header
35805       || !body->loop_father->simple_loop_desc)
35806     return 1;
35807
35808   loop *loop = body->loop_father;
35809   /* Instruction that sets the predicate mask depending on how many elements
35810      are left to process.  */
35811   rtx_insn *vctp_insn = arm_mve_loop_valid_for_dlstp (loop);
35812   if (!vctp_insn)
35813     return 1;
35814
35815   gcc_assert (single_set (vctp_insn));
35816
35817   rtx vctp_vpr_generated = single_set (vctp_insn);
35818   if (!vctp_vpr_generated)
35819     return 1;
35820
35821   vctp_vpr_generated = SET_DEST (vctp_vpr_generated);
35822
35823   if (!vctp_vpr_generated || !REG_P (vctp_vpr_generated)
35824       || !VALID_MVE_PRED_MODE (GET_MODE (vctp_vpr_generated)))
35825     return 1;
35826
35827   /* decrementunum is already known to be valid at this point.  */
35828   int decrementnum = arm_mve_get_vctp_lanes (vctp_insn);
35829
35830   rtx_insn *insn = 0;
35831   rtx_insn *cur_insn = 0;
35832   rtx_insn *seq;
35833   auto_vec <rtx_insn *> props_zero_set;
35834
35835   /* Scan through the insns in the loop bb and emit the transformed bb
35836      insns to a sequence.  */
35837   start_sequence ();
35838   FOR_BB_INSNS (body, insn)
35839     {
35840       if (GET_CODE (insn) == CODE_LABEL || NOTE_INSN_BASIC_BLOCK_P (insn))
35841         continue;
35842       else if (NOTE_P (insn))
35843         emit_note ((enum insn_note)NOTE_KIND (insn));
35844       else if (DEBUG_INSN_P (insn))
35845         emit_debug_insn (PATTERN (insn));
35846       else if (!INSN_P (insn))
35847         {
35848           end_sequence ();
35849           return 1;
35850         }
35851       /* If the transformation is successful we no longer need the vctp
35852          instruction.  */
35853       else if (insn == vctp_insn)
35854         continue;
35855       /* If the insn pattern requires the use of the VPR value from the
35856          vctp as an input parameter for predication.  */
35857       else if (arm_mve_insn_predicated_by (insn, vctp_vpr_generated))
35858         {
35859           /* Check whether this INSN propagates the zeroed tail-predication
35860              lanes.  */
35861           if (arm_mve_propagate_zero_pred_p (&props_zero_set, insn,
35862                                              vctp_vpr_generated))
35863             props_zero_set.safe_push (insn);
35864           bool success = arm_emit_mve_unpredicated_insn_to_seq (insn);
35865           if (!success)
35866             {
35867               end_sequence ();
35868               return 1;
35869             }
35870         }
35871       /* If the insn isn't VPT predicated on vctp_vpr_generated, we need to
35872          make sure that it is still valid within the dlstp/letp loop.  */
35873       else
35874         {
35875           /* If this instruction USE-s the vctp_vpr_generated other than for
35876              predication, this blocks the transformation as we are not allowed
35877              to optimise the VPR value away.  */
35878           df_ref insn_uses = NULL;
35879           FOR_EACH_INSN_USE (insn_uses, insn)
35880           {
35881             if (reg_overlap_mentioned_p (vctp_vpr_generated,
35882                                          DF_REF_REG (insn_uses)))
35883               {
35884                 end_sequence ();
35885                 return 1;
35886               }
35887           }
35888           /* If within the loop we have an MVE vector instruction that is
35889              unpredicated, the dlstp/letp looping will add implicit
35890              predication to it.  This will result in a change in behaviour
35891              of the instruction, so we need to find out if any instructions
35892              that feed into the current instruction were implicitly
35893              predicated.  */
35894           if (MVE_VPT_PREDICABLE_INSN_P (insn)
35895               && !arm_mve_impl_predicated_p (&props_zero_set, insn,
35896                                              vctp_vpr_generated))
35897             {
35898               end_sequence ();
35899               return 1;
35900             }
35901           emit_insn (PATTERN (insn));
35902         }
35903     }
35904   seq = get_insns ();
35905   end_sequence ();
35906
35907   /* Re-write the entire BB contents with the transformed
35908      sequence.  */
35909   FOR_BB_INSNS_SAFE (body, insn, cur_insn)
35910     if (!(GET_CODE (insn) == CODE_LABEL || NOTE_INSN_BASIC_BLOCK_P (insn)))
35911       delete_insn (insn);
35912
35913   emit_insn_after (seq, BB_END (body));
35914
35915   /* The transformation has succeeded, so now modify the "count"
35916      (a.k.a. niter_expr) for the middle-end.  Also set noloop_assumptions
35917      to NULL to stop the middle-end from making assumptions about the
35918      number of iterations.  */
35919   simple_loop_desc (body->loop_father)->niter_expr
35920     = XVECEXP (SET_SRC (PATTERN (vctp_insn)), 0, 0);
35921   simple_loop_desc (body->loop_father)->noloop_assumptions = NULL_RTX;
35922   return decrementnum;
35923 }
35924
35925 #if CHECKING_P
35926 namespace selftest {
35927
35928 /* Scan the static data tables generated by parsecpu.awk looking for
35929    potential issues with the data.  We primarily check for
35930    inconsistencies in the option extensions at present (extensions
35931    that duplicate others but aren't marked as aliases).  Furthermore,
35932    for correct canonicalization later options must never be a subset
35933    of an earlier option.  Any extension should also only specify other
35934    feature bits and never an architecture bit.  The architecture is inferred
35935    from the declaration of the extension.  */
35936 static void
35937 arm_test_cpu_arch_data (void)
35938 {
35939   const arch_option *arch;
35940   const cpu_option *cpu;
35941   auto_sbitmap target_isa (isa_num_bits);
35942   auto_sbitmap isa1 (isa_num_bits);
35943   auto_sbitmap isa2 (isa_num_bits);
35944
35945   for (arch = all_architectures; arch->common.name != NULL; ++arch)
35946     {
35947       const cpu_arch_extension *ext1, *ext2;
35948
35949       if (arch->common.extensions == NULL)
35950         continue;
35951
35952       arm_initialize_isa (target_isa, arch->common.isa_bits);
35953
35954       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
35955         {
35956           if (ext1->alias)
35957             continue;
35958
35959           arm_initialize_isa (isa1, ext1->isa_bits);
35960           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
35961             {
35962               if (ext2->alias || ext1->remove != ext2->remove)
35963                 continue;
35964
35965               arm_initialize_isa (isa2, ext2->isa_bits);
35966               /* If the option is a subset of the parent option, it doesn't
35967                  add anything and so isn't useful.  */
35968               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
35969
35970               /* If the extension specifies any architectural bits then
35971                  disallow it.  Extensions should only specify feature bits.  */
35972               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
35973             }
35974         }
35975     }
35976
35977   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
35978     {
35979       const cpu_arch_extension *ext1, *ext2;
35980
35981       if (cpu->common.extensions == NULL)
35982         continue;
35983
35984       arm_initialize_isa (target_isa, arch->common.isa_bits);
35985
35986       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
35987         {
35988           if (ext1->alias)
35989             continue;
35990
35991           arm_initialize_isa (isa1, ext1->isa_bits);
35992           for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
35993             {
35994               if (ext2->alias || ext1->remove != ext2->remove)
35995                 continue;
35996
35997               arm_initialize_isa (isa2, ext2->isa_bits);
35998               /* If the option is a subset of the parent option, it doesn't
35999                  add anything and so isn't useful.  */
36000               ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
36001
36002               /* If the extension specifies any architectural bits then
36003                  disallow it.  Extensions should only specify feature bits.  */
36004               ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
36005             }
36006         }
36007     }
36008 }
36009
36010 /* Scan the static data tables generated by parsecpu.awk looking for
36011    potential issues with the data.  Here we check for consistency between the
36012    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
36013    a feature bit that is not defined by any FPU flag.  */
36014 static void
36015 arm_test_fpu_data (void)
36016 {
36017   auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
36018   auto_sbitmap fpubits (isa_num_bits);
36019   auto_sbitmap tmpset (isa_num_bits);
36020
36021   static const enum isa_feature fpu_bitlist_internal[]
36022     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
36023   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
36024
36025   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
36026   {
36027     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
36028     bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
36029     bitmap_clear (isa_all_fpubits_internal);
36030     bitmap_copy (isa_all_fpubits_internal, tmpset);
36031   }
36032
36033   if (!bitmap_empty_p (isa_all_fpubits_internal))
36034     {
36035         fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
36036                          " group that are not defined by any FPU.\n"
36037                          "       Check your arm-cpus.in.\n");
36038         ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
36039     }
36040 }
36041
36042 static void
36043 arm_run_selftests (void)
36044 {
36045   arm_test_cpu_arch_data ();
36046   arm_test_fpu_data ();
36047 }
36048 } /* Namespace selftest.  */
36049
36050 #undef TARGET_RUN_TARGET_SELFTESTS
36051 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
36052 #endif /* CHECKING_P */
36053
36054 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
36055    global variable based guard use the default else
36056    return a null tree.  */
36057 static tree
36058 arm_stack_protect_guard (void)
36059 {
36060   if (arm_stack_protector_guard == SSP_GLOBAL)
36061     return default_stack_protect_guard ();
36062
36063   return NULL_TREE;
36064 }
36065
36066 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
36067    Unlike the arm version, we do NOT implement asm flag outputs.  */
36068
36069 rtx_insn *
36070 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
36071                       vec<machine_mode> & /*input_modes*/,
36072                       vec<const char *> &constraints,
36073                       vec<rtx> &, vec<rtx> & /*clobbers*/,
36074                       HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
36075 {
36076   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
36077     if (startswith (constraints[i], "=@cc"))
36078       {
36079         sorry ("%<asm%> flags not supported in thumb1 mode");
36080         break;
36081       }
36082   return NULL;
36083 }
36084
36085 /* Generate code to enable conditional branches in functions over 1 MiB.
36086    Parameters are:
36087      operands: is the operands list of the asm insn (see arm_cond_branch or
36088        arm_cond_branch_reversed).
36089      pos_label: is an index into the operands array where operands[pos_label] is
36090        the asm label of the final jump destination.
36091      dest: is a string which is used to generate the asm label of the intermediate
36092        destination
36093    branch_format: is a string denoting the intermediate branch format, e.g.
36094      "beq", "bne", etc.  */
36095
36096 const char *
36097 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
36098                     const char * branch_format)
36099 {
36100   rtx_code_label * tmp_label = gen_label_rtx ();
36101   char label_buf[256];
36102   char buffer[128];
36103   ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
36104                         CODE_LABEL_NUMBER (tmp_label));
36105   const char *label_ptr = arm_strip_name_encoding (label_buf);
36106   rtx dest_label = operands[pos_label];
36107   operands[pos_label] = tmp_label;
36108
36109   snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
36110   output_asm_insn (buffer, operands);
36111
36112   snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
36113   operands[pos_label] = dest_label;
36114   output_asm_insn (buffer, operands);
36115   return "";
36116 }
36117
36118 /* If given mode matches, load from memory to LO_REGS.
36119    (i.e [Rn], Rn <= LO_REGS).  */
36120 enum reg_class
36121 arm_mode_base_reg_class (machine_mode mode)
36122 {
36123   if (TARGET_HAVE_MVE
36124       && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
36125     return LO_REGS;
36126
36127   return MODE_BASE_REG_REG_CLASS (mode);
36128 }
36129
36130 #undef TARGET_DOCUMENTATION_NAME
36131 #define TARGET_DOCUMENTATION_NAME "ARM"
36132
36133 struct gcc_target targetm = TARGET_INITIALIZER;
36134
36135 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
36136
36137 opt_machine_mode
36138 arm_get_mask_mode (machine_mode mode)
36139 {
36140   if (TARGET_HAVE_MVE)
36141     return arm_mode_to_pred_mode (mode);
36142
36143   return default_get_mask_mode (mode);
36144 }
36145
36146 /* Helper function to determine whether SEQ represents a sequence of
36147    instructions representing the vsel<cond> floating point instructions.
36148    This is an heuristic to check whether the proposed optimisation is desired,
36149    the choice has no consequence for correctness.  */
36150 static bool
36151 arm_is_vsel_fp_insn (rtx_insn *seq)
36152 {
36153   rtx_insn *curr_insn = seq;
36154   rtx set = NULL_RTX;
36155   /* The pattern may start with a simple set with register operands.  Skip
36156      through any of those.  */
36157   while (curr_insn)
36158     {
36159       set = single_set (curr_insn);
36160       if (!set
36161           || !REG_P (SET_DEST (set)))
36162         return false;
36163
36164       if (!REG_P (SET_SRC (set)))
36165         break;
36166       curr_insn = NEXT_INSN (curr_insn);
36167     }
36168
36169   if (!set)
36170     return false;
36171
36172   /* The next instruction should be a compare.  */
36173   if (!REG_P (SET_DEST (set))
36174       || GET_CODE (SET_SRC (set)) != COMPARE)
36175     return false;
36176
36177   curr_insn = NEXT_INSN (curr_insn);
36178   if (!curr_insn)
36179     return false;
36180
36181   /* And the last instruction should be an IF_THEN_ELSE.  */
36182   set = single_set (curr_insn);
36183   if (!set
36184       || !REG_P (SET_DEST (set))
36185       || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
36186     return false;
36187
36188   return !NEXT_INSN (curr_insn);
36189 }
36190
36191
36192 /* Helper function to determine whether SEQ represents a sequence of
36193    instructions representing the Armv8.1-M Mainline conditional arithmetic
36194    instructions: csinc, csneg and csinv. The cinc instruction is generated
36195    using a different mechanism.
36196    This is an heuristic to check whether the proposed optimisation is desired,
36197    the choice has no consequence for correctness.  */
36198
36199 static bool
36200 arm_is_v81m_cond_insn (rtx_insn *seq)
36201 {
36202   rtx_insn *curr_insn = seq;
36203   rtx set = NULL_RTX;
36204   /* The pattern may start with a simple set with register operands.  Skip
36205      through any of those.  */
36206   while (curr_insn)
36207     {
36208       set = single_set (curr_insn);
36209       if (!set
36210           || !REG_P (SET_DEST (set)))
36211         return false;
36212
36213       if (!REG_P (SET_SRC (set)))
36214         break;
36215       curr_insn = NEXT_INSN (curr_insn);
36216     }
36217
36218   if (!set)
36219     return false;
36220
36221   /* The next instruction should be one of:
36222      NEG: for csneg,
36223      PLUS: for csinc,
36224      NOT: for csinv.  */
36225   if (GET_CODE (SET_SRC (set)) != NEG
36226       && GET_CODE (SET_SRC (set)) != PLUS
36227       && GET_CODE (SET_SRC (set)) != NOT)
36228     return false;
36229
36230   curr_insn = NEXT_INSN (curr_insn);
36231   if (!curr_insn)
36232     return false;
36233
36234   /* The next instruction should be a COMPARE.  */
36235   set = single_set (curr_insn);
36236   if (!set
36237       || !REG_P (SET_DEST (set))
36238       || GET_CODE (SET_SRC (set)) != COMPARE)
36239     return false;
36240
36241   curr_insn = NEXT_INSN (curr_insn);
36242   if (!curr_insn)
36243     return false;
36244
36245   /* And the last instruction should be an IF_THEN_ELSE.  */
36246   set = single_set (curr_insn);
36247   if (!set
36248       || !REG_P (SET_DEST (set))
36249       || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
36250     return false;
36251
36252   return !NEXT_INSN (curr_insn);
36253 }
36254
36255 /* For Armv8.1-M Mainline we have both conditional execution through IT blocks,
36256    as well as conditional arithmetic instructions controlled by
36257    TARGET_COND_ARITH.  To generate the latter we rely on a special part of the
36258    "ce" pass that generates code for targets that don't support conditional
36259    execution of general instructions known as "noce".  These transformations
36260    happen before 'reload_completed'.  However, "noce" also triggers for some
36261    unwanted patterns [PR 116444] that prevent "ce" optimisations after reload.
36262    To make sure we can get both we use the TARGET_NOCE_CONVERSION_PROFITABLE_P
36263    hook to only allow "noce" to generate the patterns that are profitable.  */
36264
36265 bool
36266 arm_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
36267 {
36268   if (!TARGET_COND_ARITH
36269       || reload_completed)
36270     return default_noce_conversion_profitable_p (seq, if_info);
36271
36272   if (arm_is_v81m_cond_insn (seq))
36273     return true;
36274
36275   /* Look for vsel<cond> opportunities as we still want to codegen these for
36276      Armv8.1-M Mainline targets.  */
36277   if (arm_is_vsel_fp_insn (seq))
36278     return true;
36279
36280   return false;
36281 }
36282
36283 /* Output assembly to read the thread pointer from the appropriate TPIDR
36284    register into DEST.  If PRED_P also emit the %? that can be used to
36285    output the predication code.  */
36286
36287 const char *
36288 arm_output_load_tpidr (rtx dst, bool pred_p)
36289 {
36290   char buf[64];
36291   int tpidr_coproc_num = -1;
36292   switch (target_thread_pointer)
36293     {
36294     case TP_TPIDRURW:
36295       tpidr_coproc_num = 2;
36296       break;
36297     case TP_TPIDRURO:
36298       tpidr_coproc_num = 3;
36299       break;
36300     case TP_TPIDRPRW:
36301       tpidr_coproc_num = 4;
36302       break;
36303     default:
36304       gcc_unreachable ();
36305     }
36306   snprintf (buf, sizeof (buf),
36307             "mrc%s\tp15, 0, %%0, c13, c0, %d\t@ load_tp_hard",
36308             pred_p ? "%?" : "", tpidr_coproc_num);
36309   output_asm_insn (buf, &dst);
36310   return "";
36311 }
36312
36313 /* Return the MVE vector mode that has NUNITS elements of mode INNER_MODE.  */
36314 opt_machine_mode
36315 arm_mve_data_mode (scalar_mode inner_mode, poly_uint64 nunits)
36316 {
36317   enum mode_class mclass
36318     = (SCALAR_FLOAT_MODE_P (inner_mode) ? MODE_VECTOR_FLOAT : MODE_VECTOR_INT);
36319   machine_mode mode;
36320   FOR_EACH_MODE_IN_CLASS (mode, mclass)
36321     if (inner_mode == GET_MODE_INNER (mode)
36322         && known_eq (nunits, GET_MODE_NUNITS (mode)))
36323       return mode;
36324   return opt_machine_mode ();
36325 }
36326
36327 #include "gt-arm.h"