libcpp, c, middle-end: Optimize initializers using #embed in C
[official-gcc.git] / gcc / config / sh / sh.cc
blob6ad202fd4263f7a690e06f6140611fed99c34a2c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2024 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
24 #define IN_TARGET_CODE 1
26 #include "config.h"
27 #define INCLUDE_VECTOR
28 #include "system.h"
29 #include "coretypes.h"
30 #include "backend.h"
31 #include "target.h"
32 #include "rtl.h"
33 #include "tree.h"
34 #include "gimple.h"
35 #include "cfghooks.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "optabs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "flags.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "reload.h"
54 #include "output.h"
55 #include "insn-attr.h"
56 #include "dwarf2.h"
57 #include "langhooks.h"
58 #include "cfgrtl.h"
59 #include "intl.h"
60 #include "sched-int.h"
61 #include "gimplify.h"
62 #include "tm-constrs.h"
63 #include "opts.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "builtins.h"
67 #include "rtl-iter.h"
68 #include "regs.h"
69 #include "toplev.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
76 #define CONST_OK_FOR_ADD(size) CONST_OK_FOR_I08 (size)
77 #define GEN_MOV (*(gen_movsi))
78 #define GEN_ADD3 (*(gen_addsi3))
79 #define GEN_SUB3 (*(gen_subsi3))
81 /* Used to simplify the logic below. Find the attributes wherever
82 they may be. */
83 #define SH_ATTRIBUTES(decl) \
84 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
85 : DECL_ATTRIBUTES (decl) \
86 ? (DECL_ATTRIBUTES (decl)) \
87 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
89 /* Set to true by expand_prologue() when the function is an
90 interrupt handler. */
91 bool current_function_interrupt;
93 tree sh_deferred_function_attributes;
94 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
96 /* Global variables for machine-dependent things. */
98 /* Which cpu are we scheduling for. */
99 enum processor_type sh_cpu;
101 /* Definitions used in ready queue reordering for first scheduling pass. */
103 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
104 static short *regmode_weight[2];
106 /* Total SFmode and SImode weights of scheduled insns. */
107 static int curr_regmode_pressure[2];
109 /* Number of r0 life regions. */
110 static int r0_life_regions;
112 /* If true, skip cycles for Q -> R movement. */
113 static int skip_cycles = 0;
115 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
116 and returned from sh_reorder2. */
117 static short cached_can_issue_more;
119 /* Unique number for UNSPEC_BBR pattern. */
120 static unsigned int unspec_bbr_uid = 1;
122 /* Provides the class number of the smallest class containing
123 reg number. */
124 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
126 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
159 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
160 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
161 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
162 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
163 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
164 GENERAL_REGS, GENERAL_REGS,
167 char sh_register_names[FIRST_PSEUDO_REGISTER] \
168 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
170 char sh_additional_register_names[ADDREGNAMES_SIZE] \
171 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
172 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
174 int assembler_dialect;
176 static void split_branches (rtx_insn *);
177 static int branch_dest (rtx);
178 static void print_slot (rtx_sequence *);
179 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
180 static void dump_table (rtx_insn *, rtx_insn *);
181 static bool broken_move (rtx_insn *);
182 static bool mova_p (rtx_insn *);
183 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
184 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
185 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
186 static void sh_reorg (void);
187 static void sh_option_override (void);
188 static void sh_override_options_after_change (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
190 static rtx_insn* emit_frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET* mask, bool interrupt_handler);
194 static int calc_live_regs (HARD_REG_SET *);
195 static HOST_WIDE_INT rounded_frame_size (int);
196 static bool sh_frame_pointer_required (void);
197 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
198 static int sh_mode_needed (int, rtx_insn *, HARD_REG_SET);
199 static int sh_mode_after (int, int, rtx_insn *, HARD_REG_SET);
200 static int sh_mode_entry (int);
201 static int sh_mode_exit (int);
202 static int sh_mode_priority (int entity, int n);
204 static rtx mark_constant_pool_use (rtx);
205 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
206 int, bool *);
207 static tree sh_handle_resbank_handler_attribute (tree *, tree,
208 tree, int, bool *);
209 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
210 tree, int, bool *);
211 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
212 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
213 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
214 static void sh_print_operand (FILE *, rtx, int);
215 static void sh_print_operand_address (FILE *, machine_mode, rtx);
216 static bool sh_print_operand_punct_valid_p (unsigned char code);
217 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
218 static void sh_output_function_epilogue (FILE *);
219 static void sh_insert_attributes (tree, tree *);
220 static const char *sh_check_pch_target_flags (int);
221 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
222 static int sh_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
223 static int sh_issue_rate (void);
224 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
225 static short find_set_regmode_weight (rtx, machine_mode);
226 static short find_insn_regmode_weight (rtx, machine_mode);
227 static void find_regmode_weight (basic_block, machine_mode);
228 static int find_r0_life_regions (basic_block);
229 static void sh_md_init_global (FILE *, int, int);
230 static void sh_md_finish_global (FILE *, int);
231 static int rank_for_reorder (const void *, const void *);
232 static void swap_reorder (rtx_insn **, int);
233 static void ready_reorder (rtx_insn **, int);
234 static bool high_pressure (machine_mode);
235 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
236 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
237 static void sh_md_init (FILE *, int, int);
238 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
240 static bool sh_function_ok_for_sibcall (tree, tree);
242 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
243 static bool sh_ms_bitfield_layout_p (const_tree);
245 static void sh_init_builtins (void);
246 static tree sh_builtin_decl (unsigned, bool);
247 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
248 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
249 HOST_WIDE_INT, tree);
250 static void sh_file_start (void);
251 static bool sh_assemble_integer (rtx, unsigned int, int);
252 static bool flow_dependent_p (rtx_insn *, rtx_insn *);
253 static void flow_dependent_p_1 (rtx, const_rtx, void *);
254 static int shiftcosts (rtx);
255 static int and_xor_ior_costs (rtx, int);
256 static int addsubcosts (rtx);
257 static int multcosts (rtx);
258 static bool unspec_caller_rtx_p (rtx);
259 static bool sh_cannot_copy_insn_p (rtx_insn *);
260 static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
261 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
262 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
263 static int sh_pr_n_sets (void);
264 static rtx sh_allocate_initial_value (rtx);
265 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
266 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
267 machine_mode,
268 struct secondary_reload_info *);
269 static bool sh_legitimate_address_p (machine_mode, rtx, bool,
270 code_helper = ERROR_MARK);
271 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
272 static rtx sh_delegitimize_address (rtx);
273 static bool sh_cannot_substitute_mem_equiv_p (rtx);
274 static bool sh_legitimize_address_displacement (rtx *, rtx *,
275 poly_int64, machine_mode);
276 static int scavenge_reg (HARD_REG_SET *s);
278 static rtx sh_struct_value_rtx (tree, int);
279 static rtx sh_function_value (const_tree, const_tree, bool);
280 static bool sh_function_value_regno_p (const unsigned int);
281 static rtx sh_libcall_value (machine_mode, const_rtx);
282 static bool sh_return_in_memory (const_tree, const_tree);
283 static rtx sh_builtin_saveregs (void);
284 static void sh_setup_incoming_varargs (cumulative_args_t,
285 const function_arg_info &, int *, int);
286 static bool sh_strict_argument_naming (cumulative_args_t);
287 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
288 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
289 static tree sh_build_builtin_va_list (void);
290 static void sh_va_start (tree, rtx);
291 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
292 static bool sh_promote_prototypes (const_tree);
293 static machine_mode sh_promote_function_mode (const_tree type,
294 machine_mode,
295 int *punsignedp,
296 const_tree funtype,
297 int for_return);
298 static bool sh_pass_by_reference (cumulative_args_t,
299 const function_arg_info &);
300 static bool sh_callee_copies (cumulative_args_t, const function_arg_info &);
301 static int sh_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
302 static void sh_function_arg_advance (cumulative_args_t,
303 const function_arg_info &);
304 static rtx sh_function_arg (cumulative_args_t, const function_arg_info &);
305 static int sh_dwarf_calling_convention (const_tree);
306 static void sh_encode_section_info (tree, rtx, int);
307 static bool sh2a_function_vector_p (tree);
308 static void sh_trampoline_init (rtx, tree, rtx);
309 static rtx sh_trampoline_adjust_address (rtx);
310 static void sh_conditional_register_usage (void);
311 static bool sh_legitimate_constant_p (machine_mode, rtx);
312 static int mov_insn_size (machine_mode, bool);
313 static int mov_insn_alignment_mask (machine_mode, bool);
314 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
315 unsigned int,
316 enum by_pieces_operation,
317 bool);
318 static bool sequence_insn_p (rtx_insn *);
319 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
320 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
321 machine_mode, bool);
322 static bool sh_legitimate_combined_insn (rtx_insn* insn);
324 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
326 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
327 static unsigned int sh_hard_regno_nregs (unsigned int, machine_mode);
328 static bool sh_hard_regno_mode_ok (unsigned int, machine_mode);
329 static bool sh_modes_tieable_p (machine_mode, machine_mode);
330 static bool sh_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
331 static machine_mode sh_c_mode_for_floating_type (enum tree_index);
333 TARGET_GNU_ATTRIBUTES (sh_attribute_table,
335 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
336 affects_type_identity, handler, exclude } */
337 { "interrupt_handler", 0, 0, true, false, false, false,
338 sh_handle_interrupt_handler_attribute, NULL },
339 { "sp_switch", 1, 1, true, false, false, false,
340 sh_handle_sp_switch_attribute, NULL },
341 { "trap_exit", 1, 1, true, false, false, false,
342 sh_handle_trap_exit_attribute, NULL },
343 { "renesas", 0, 0, false, true, false, false,
344 sh_handle_renesas_attribute, NULL },
345 { "trapa_handler", 0, 0, true, false, false, false,
346 sh_handle_interrupt_handler_attribute, NULL },
347 { "nosave_low_regs", 0, 0, true, false, false, false,
348 sh_handle_interrupt_handler_attribute, NULL },
349 { "resbank", 0, 0, true, false, false, false,
350 sh_handle_resbank_handler_attribute, NULL },
351 { "function_vector", 1, 1, true, false, false, false,
352 sh2a_handle_function_vector_handler_attribute, NULL }
355 /* Initialize the GCC target structure. */
356 #undef TARGET_ATTRIBUTE_TABLE
357 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
359 /* The next two are used for debug info when compiling with -gdwarf. */
360 #undef TARGET_ASM_UNALIGNED_HI_OP
361 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
362 #undef TARGET_ASM_UNALIGNED_SI_OP
363 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
365 #undef TARGET_OPTION_OVERRIDE
366 #define TARGET_OPTION_OVERRIDE sh_option_override
368 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
369 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
370 sh_override_options_after_change
372 #undef TARGET_PRINT_OPERAND
373 #define TARGET_PRINT_OPERAND sh_print_operand
374 #undef TARGET_PRINT_OPERAND_ADDRESS
375 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
376 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
377 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
378 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
379 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
381 #undef TARGET_ASM_FUNCTION_EPILOGUE
382 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
384 #undef TARGET_ASM_OUTPUT_MI_THUNK
385 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
387 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
388 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
389 hook_bool_const_tree_hwi_hwi_const_tree_true
391 #undef TARGET_ASM_FILE_START
392 #define TARGET_ASM_FILE_START sh_file_start
393 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
394 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
396 #undef TARGET_ASM_INTEGER
397 #define TARGET_ASM_INTEGER sh_assemble_integer
399 #undef TARGET_REGISTER_MOVE_COST
400 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
402 #undef TARGET_INSERT_ATTRIBUTES
403 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
405 #undef TARGET_SCHED_ADJUST_COST
406 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
408 #undef TARGET_SCHED_ISSUE_RATE
409 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
411 /* The next 5 hooks have been implemented for reenabling sched1. With the
412 help of these macros we are limiting the movement of insns in sched1 to
413 reduce the register pressure. The overall idea is to keep count of SImode
414 and SFmode regs required by already scheduled insns. When these counts
415 cross some threshold values; give priority to insns that free registers.
416 The insn that frees registers is most likely to be the insn with lowest
417 LUID (original insn order); but such an insn might be there in the stalled
418 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
419 up to a max of 8 cycles so that such insns may move from Q -> R.
421 The description of the hooks are as below:
423 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
424 scheduler; it is called inside the sched_init function just after
425 find_insn_reg_weights function call. It is used to calculate the SImode
426 and SFmode weights of insns of basic blocks; much similar to what
427 find_insn_reg_weights does.
428 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
430 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
431 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
432 (Q)->(R).
434 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
435 high; reorder the ready queue so that the insn with lowest LUID will be
436 issued next.
438 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
439 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
441 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
442 can be returned from TARGET_SCHED_REORDER2.
444 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
446 #undef TARGET_SCHED_DFA_NEW_CYCLE
447 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
449 #undef TARGET_SCHED_INIT_GLOBAL
450 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
452 #undef TARGET_SCHED_FINISH_GLOBAL
453 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
455 #undef TARGET_SCHED_VARIABLE_ISSUE
456 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
458 #undef TARGET_SCHED_REORDER
459 #define TARGET_SCHED_REORDER sh_reorder
461 #undef TARGET_SCHED_REORDER2
462 #define TARGET_SCHED_REORDER2 sh_reorder2
464 #undef TARGET_SCHED_INIT
465 #define TARGET_SCHED_INIT sh_md_init
467 #undef TARGET_DELEGITIMIZE_ADDRESS
468 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
470 #undef TARGET_LEGITIMIZE_ADDRESS
471 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
473 #undef TARGET_CAN_FOLLOW_JUMP
474 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
476 #undef TARGET_MS_BITFIELD_LAYOUT_P
477 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
479 #undef TARGET_INIT_BUILTINS
480 #define TARGET_INIT_BUILTINS sh_init_builtins
481 #undef TARGET_BUILTIN_DECL
482 #define TARGET_BUILTIN_DECL sh_builtin_decl
483 #undef TARGET_EXPAND_BUILTIN
484 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
486 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
487 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
489 #undef TARGET_CANNOT_COPY_INSN_P
490 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
491 #undef TARGET_RTX_COSTS
492 #define TARGET_RTX_COSTS sh_rtx_costs
493 #undef TARGET_ADDRESS_COST
494 #define TARGET_ADDRESS_COST sh_address_cost
495 #undef TARGET_ALLOCATE_INITIAL_VALUE
496 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
498 #undef TARGET_MACHINE_DEPENDENT_REORG
499 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
501 #undef TARGET_DWARF_REGISTER_SPAN
502 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
504 #ifdef HAVE_AS_TLS
505 #undef TARGET_HAVE_TLS
506 #define TARGET_HAVE_TLS true
507 #endif
509 #undef TARGET_PROMOTE_PROTOTYPES
510 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
511 #undef TARGET_PROMOTE_FUNCTION_MODE
512 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
514 #undef TARGET_FUNCTION_VALUE
515 #define TARGET_FUNCTION_VALUE sh_function_value
516 #undef TARGET_FUNCTION_VALUE_REGNO_P
517 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
518 #undef TARGET_LIBCALL_VALUE
519 #define TARGET_LIBCALL_VALUE sh_libcall_value
520 #undef TARGET_STRUCT_VALUE_RTX
521 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
522 #undef TARGET_RETURN_IN_MEMORY
523 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
525 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
526 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
527 #undef TARGET_SETUP_INCOMING_VARARGS
528 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
529 #undef TARGET_STRICT_ARGUMENT_NAMING
530 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
531 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
532 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
533 #undef TARGET_MUST_PASS_IN_STACK
534 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
535 #undef TARGET_PASS_BY_REFERENCE
536 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
537 #undef TARGET_CALLEE_COPIES
538 #define TARGET_CALLEE_COPIES sh_callee_copies
539 #undef TARGET_ARG_PARTIAL_BYTES
540 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
541 #undef TARGET_FUNCTION_ARG
542 #define TARGET_FUNCTION_ARG sh_function_arg
543 #undef TARGET_FUNCTION_ARG_ADVANCE
544 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
546 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
547 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
549 #undef TARGET_BUILD_BUILTIN_VA_LIST
550 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
551 #undef TARGET_EXPAND_BUILTIN_VA_START
552 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
553 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
554 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
556 #undef TARGET_VECTOR_MODE_SUPPORTED_P
557 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
559 #undef TARGET_CHECK_PCH_TARGET_FLAGS
560 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
562 #undef TARGET_DWARF_CALLING_CONVENTION
563 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
565 #undef TARGET_FRAME_POINTER_REQUIRED
566 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
568 #undef TARGET_MODE_EMIT
569 #define TARGET_MODE_EMIT sh_emit_mode_set
571 #undef TARGET_MODE_NEEDED
572 #define TARGET_MODE_NEEDED sh_mode_needed
574 #undef TARGET_MODE_AFTER
575 #define TARGET_MODE_AFTER sh_mode_after
577 #undef TARGET_MODE_ENTRY
578 #define TARGET_MODE_ENTRY sh_mode_entry
580 #undef TARGET_MODE_EXIT
581 #define TARGET_MODE_EXIT sh_mode_exit
583 #undef TARGET_MODE_PRIORITY
584 #define TARGET_MODE_PRIORITY sh_mode_priority
586 /* Return regmode weight for insn. */
587 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
588 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
590 /* Return current register pressure for regmode. */
591 #define CURR_REGMODE_PRESSURE(MODE)\
592 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
594 #undef TARGET_ENCODE_SECTION_INFO
595 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
597 #undef TARGET_LRA_P
598 #define TARGET_LRA_P sh_lra_p
600 #undef TARGET_SECONDARY_RELOAD
601 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
603 #undef TARGET_PREFERRED_RELOAD_CLASS
604 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
606 #undef TARGET_CONDITIONAL_REGISTER_USAGE
607 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
609 #undef TARGET_LEGITIMATE_ADDRESS_P
610 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
612 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
613 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
615 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
616 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
617 sh_legitimize_address_displacement
619 #undef TARGET_TRAMPOLINE_INIT
620 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
621 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
622 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
624 #undef TARGET_LEGITIMATE_CONSTANT_P
625 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
627 #undef TARGET_CANONICALIZE_COMPARISON
628 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
630 #undef TARGET_LEGITIMATE_COMBINED_INSN
631 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
633 #undef TARGET_FIXED_CONDITION_CODE_REGS
634 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
636 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
637 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
638 sh_use_by_pieces_infrastructure_p
640 /* Machine-specific symbol_ref flags. */
641 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
643 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
644 is used by optabs.cc atomic op expansion code as well as in sync.md. */
645 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
646 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
648 #undef TARGET_CANNOT_FORCE_CONST_MEM
649 #define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
651 #undef TARGET_HARD_REGNO_NREGS
652 #define TARGET_HARD_REGNO_NREGS sh_hard_regno_nregs
653 #undef TARGET_HARD_REGNO_MODE_OK
654 #define TARGET_HARD_REGNO_MODE_OK sh_hard_regno_mode_ok
656 #undef TARGET_MODES_TIEABLE_P
657 #define TARGET_MODES_TIEABLE_P sh_modes_tieable_p
659 #undef TARGET_CAN_CHANGE_MODE_CLASS
660 #define TARGET_CAN_CHANGE_MODE_CLASS sh_can_change_mode_class
662 #undef TARGET_CONSTANT_ALIGNMENT
663 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
665 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
666 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
668 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
669 #define TARGET_C_MODE_FOR_FLOATING_TYPE sh_c_mode_for_floating_type
671 struct gcc_target targetm = TARGET_INITIALIZER;
674 /* Information on the currently selected atomic model.
675 This is initialized in sh_option_override. */
676 static sh_atomic_model selected_atomic_model_;
678 const sh_atomic_model&
679 selected_atomic_model (void)
681 return selected_atomic_model_;
684 static sh_atomic_model
685 parse_validate_atomic_model_option (const char* str)
687 const char* model_names[sh_atomic_model::num_models];
688 model_names[sh_atomic_model::none] = "none";
689 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
690 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
691 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
692 model_names[sh_atomic_model::soft_imask] = "soft-imask";
694 const char* model_cdef_names[sh_atomic_model::num_models];
695 model_cdef_names[sh_atomic_model::none] = "NONE";
696 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
697 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
698 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
699 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
701 sh_atomic_model ret;
702 ret.type = sh_atomic_model::none;
703 ret.name = model_names[sh_atomic_model::none];
704 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
705 ret.strict = false;
706 ret.tcb_gbr_offset = -1;
708 /* Handle empty string as 'none'. */
709 if (str == NULL || *str == '\0')
710 return ret;
712 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
714 std::vector<std::string> tokens;
715 for (std::stringstream ss (str); ss.good (); )
717 tokens.push_back (std::string ());
718 std::getline (ss, tokens.back (), ',');
721 if (tokens.empty ())
722 err_ret ("invalid atomic model option");
724 /* The first token must be the atomic model name. */
726 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
727 if (tokens.front () == model_names[i])
729 ret.type = (sh_atomic_model::enum_type)i;
730 ret.name = model_names[i];
731 ret.cdef_name = model_cdef_names[i];
732 goto got_mode_name;
735 err_ret ("invalid atomic model name %qs", tokens.front ().c_str ());
736 got_mode_name:;
739 /* Go through the remaining tokens. */
740 for (size_t i = 1; i < tokens.size (); ++i)
742 if (tokens[i] == "strict")
743 ret.strict = true;
744 else if (!tokens[i].compare (0, strlen ("gbr-offset="), "gbr-offset="))
746 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
747 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
748 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
749 err_ret ("could not parse gbr-offset value %qs in atomic model "
750 "option", offset_str.c_str ());
752 else
753 err_ret ("unknown parameter %qs in atomic model option",
754 tokens[i].c_str ());
757 /* Check that the selection makes sense. */
758 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
759 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
760 ret.name);
762 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
763 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
765 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
766 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
768 if (ret.type == sh_atomic_model::soft_tcb
769 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
770 || (ret.tcb_gbr_offset & 3) != 0))
771 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
772 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
773 ret.name);
775 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
776 err_ret ("cannot use atomic model %s in user mode", ret.name);
778 return ret;
780 #undef err_ret
783 /* Register SH specific RTL passes. */
784 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
785 const char* name);
786 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
787 const char* name);
788 static void
789 register_sh_passes (void)
791 /* Running the sh_treg_combine pass after ce1 generates better code when
792 comparisons are combined and reg-reg moves are introduced, because
793 reg-reg moves will be eliminated afterwards. However, there are quite
794 some cases where combine will be unable to fold comparison related insns,
795 thus for now don't do it.
796 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
797 PASS_POS_INSERT_AFTER, "ce1", 1);
800 /* Run sh_treg_combine pass after combine but before register allocation. */
801 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
802 PASS_POS_INSERT_AFTER, "split1", 1);
804 /* Run sh_treg_combine pass after register allocation and basic block
805 reordering as this sometimes creates new opportunities. */
806 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
807 PASS_POS_INSERT_AFTER, "split3", 1);
809 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
810 is known after a conditional branch.
811 This must be done after basic blocks and branch conditions have
812 stabilized and won't be changed by further passes. */
813 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
814 PASS_POS_INSERT_BEFORE, "sched2", 1);
817 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
818 various options, and do some machine dependent initialization. */
819 static void
820 sh_option_override (void)
822 int regno;
824 SUBTARGET_OVERRIDE_OPTIONS;
826 sh_cpu = PROCESSOR_SH1;
827 assembler_dialect = 0;
828 if (TARGET_SH2)
829 sh_cpu = PROCESSOR_SH2;
830 if (TARGET_SH2E)
831 sh_cpu = PROCESSOR_SH2E;
832 if (TARGET_SH2A)
833 sh_cpu = PROCESSOR_SH2A;
834 if (TARGET_SH3)
835 sh_cpu = PROCESSOR_SH3;
836 if (TARGET_SH3E)
837 sh_cpu = PROCESSOR_SH3E;
838 if (TARGET_SH4)
840 assembler_dialect = 1;
841 sh_cpu = PROCESSOR_SH4;
843 if (TARGET_SH4A)
845 assembler_dialect = 1;
846 sh_cpu = PROCESSOR_SH4A;
849 /* User/priviledged mode is supported only on SH3* and SH4*.
850 Disable it for everything else. */
851 if (!TARGET_SH3 && TARGET_USERMODE)
852 TARGET_USERMODE = false;
854 if (! strcmp (sh_div_str, "call-div1"))
855 sh_div_strategy = SH_DIV_CALL_DIV1;
856 else if (! strcmp (sh_div_str, "call-fp") && TARGET_FPU_ANY)
857 sh_div_strategy = SH_DIV_CALL_FP;
858 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
859 sh_div_strategy = SH_DIV_CALL_TABLE;
860 else
862 /* Pick one that makes most sense for the target in general.
863 It is not much good to use different functions depending on -Os,
864 since then we'll end up with two different functions when some of
865 the code is compiled for size, and some for speed. */
867 /* SH4 tends to emphasize speed. */
868 if (TARGET_HARD_SH4)
869 sh_div_strategy = SH_DIV_CALL_TABLE;
870 /* These have their own way of doing things. */
871 else if (TARGET_SH2A)
872 sh_div_strategy = SH_DIV_INTRINSIC;
873 /* SH1 .. SH3 cores often go into small-footprint systems, so
874 default to the smallest implementation available. */
875 else
876 sh_div_strategy = SH_DIV_CALL_DIV1;
879 if (sh_divsi3_libfunc[0])
880 ; /* User supplied - leave it alone. */
881 else if (TARGET_DIVIDE_CALL_FP)
882 sh_divsi3_libfunc = "__sdivsi3_i4";
883 else if (TARGET_DIVIDE_CALL_TABLE)
884 sh_divsi3_libfunc = "__sdivsi3_i4i";
885 else
886 sh_divsi3_libfunc = "__sdivsi3";
888 if (sh_branch_cost == -1)
890 /* The SH1 does not have delay slots, hence we get a pipeline stall
891 at every branch. The SH4 is superscalar, so the single delay slot
892 is not sufficient to keep both pipelines filled.
893 In any case, set the default branch cost to '2', as it results in
894 slightly overall smaller code and also enables some if conversions
895 that are required for matching special T bit related insns. */
896 sh_branch_cost = 2;
899 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
900 if (! OPTION_SET_P (TARGET_ZDCBRANCH) && TARGET_HARD_SH4)
901 TARGET_ZDCBRANCH = 1;
903 /* FDPIC code is a special form of PIC, and the vast majority of code
904 generation constraints that apply to PIC also apply to FDPIC, so we
905 set flag_pic to avoid the need to check TARGET_FDPIC everywhere
906 flag_pic is checked. */
907 if (TARGET_FDPIC && !flag_pic)
908 flag_pic = 2;
910 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
911 if (! VALID_REGISTER_P (regno))
912 sh_register_names[regno][0] = '\0';
914 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
915 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
916 sh_additional_register_names[regno][0] = '\0';
918 if (flag_pic && ! TARGET_PREFERGOT)
919 flag_no_function_cse = 1;
921 if (targetm.small_register_classes_for_mode_p (VOIDmode))
923 /* Never run scheduling before reload, since that can
924 break global alloc, and generates slower code anyway due
925 to the pressure on R0. */
926 /* Enable sched1 for SH4 if the user explicitly requests.
927 When sched1 is enabled, the ready queue will be reordered by
928 the target hooks if pressure is high. We cannot do this for
929 PIC, SH3 and lower as they give spill failures for R0. */
930 if (!TARGET_HARD_SH4 || flag_pic)
931 flag_schedule_insns = 0;
932 /* ??? Current exception handling places basic block boundaries
933 after call_insns. It causes the high pressure on R0 and gives
934 spill failures for R0 in reload. See PR 22553 and the thread
935 on gcc-patches
936 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
937 else if (flag_exceptions)
939 if (flag_schedule_insns && OPTION_SET_P (flag_schedule_insns))
940 warning (0, "ignoring %<-fschedule-insns%> because of exception "
941 "handling bug");
942 flag_schedule_insns = 0;
944 else if (flag_schedule_insns
945 && !OPTION_SET_P (flag_schedule_insns))
946 flag_schedule_insns = 0;
949 /* Unwind info is not correct around the CFG unless either a frame
950 pointer is present or M_A_O_A is set. Fixing this requires rewriting
951 unwind info generation to be aware of the CFG and propagating states
952 around edges. */
953 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
954 || flag_exceptions || flag_non_call_exceptions)
955 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
957 warning (0, "unwind tables currently require either a frame pointer "
958 "or %<-maccumulate-outgoing-args%> for correctness");
959 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
962 if (flag_unsafe_math_optimizations)
964 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
965 if (OPTION_SET_P (TARGET_FSCA) == 0
966 && (TARGET_SH4A_FP || TARGET_FPU_SH4_300))
967 TARGET_FSCA = 1;
969 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
970 if (OPTION_SET_P (TARGET_FSRRA) == 0
971 && (TARGET_SH4A_FP || TARGET_FPU_SH4_300))
972 TARGET_FSRRA = 1;
975 /* Allow fsrra insn only if -funsafe-math-optimizations and
976 -ffinite-math-only is enabled. */
977 TARGET_FSRRA = TARGET_FSRRA
978 && flag_unsafe_math_optimizations
979 && flag_finite_math_only;
981 /* If the -mieee option was not explicitly set by the user, turn it on
982 unless -ffinite-math-only was specified. See also PR 33135. */
983 if (! OPTION_SET_P (TARGET_IEEE))
984 TARGET_IEEE = ! flag_finite_math_only;
986 if (sh_fixed_range_str)
987 sh_fix_range (sh_fixed_range_str);
989 /* This target defaults to strict volatile bitfields. */
990 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
991 flag_strict_volatile_bitfields = 1;
993 sh_override_options_after_change ();
995 /* Parse atomic model option and make sure it is valid for the current
996 target CPU. */
997 selected_atomic_model_
998 = parse_validate_atomic_model_option (sh_atomic_model_str);
1000 register_sh_passes ();
1003 /* Implement targetm.override_options_after_change. */
1005 static void
1006 sh_override_options_after_change (void)
1008 /* Adjust loop, jump and function alignment values (in bytes), if those
1009 were not specified by the user using -falign-loops, -falign-jumps
1010 and -falign-functions options.
1011 32 bit alignment is better for speed, because instructions can be
1012 fetched as a pair from a longword boundary. For size use 16 bit
1013 alignment to get more compact code.
1014 Aligning all jumps increases the code size, even if it might
1015 result in slightly faster code. Thus, it is set to the smallest
1016 alignment possible if not specified by the user. */
1017 if (flag_align_loops && !str_align_loops)
1018 str_align_loops = optimize_size ? "2" : "4";
1020 /* Parse values so that we can compare for current value. */
1021 parse_alignment_opts ();
1022 if (flag_align_jumps && !str_align_jumps)
1023 str_align_jumps = "2";
1024 else if (align_jumps.levels[0].get_value () < 2)
1025 str_align_jumps = "2";
1027 if (flag_align_functions && !str_align_functions)
1028 str_align_functions = optimize_size ? "2" : "4";
1030 /* The linker relaxation code breaks when a function contains
1031 alignments that are larger than that at the start of a
1032 compilation unit. */
1033 if (TARGET_RELAX)
1035 /* Parse values so that we can compare for current value. */
1036 parse_alignment_opts ();
1037 int min_align = MAX (align_loops.levels[0].get_value (),
1038 align_jumps.levels[0].get_value ());
1040 /* Also take possible .long constants / mova tables into account. */
1041 if (min_align < 4)
1042 min_align = 4;
1043 if (align_functions.levels[0].get_value () < min_align)
1045 char *r = XNEWVEC (char, 16);
1046 sprintf (r, "%d", min_align);
1047 str_align_functions = r;
1052 /* Print the operand address in x to the stream. */
1053 static void
1054 sh_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
1056 switch (GET_CODE (x))
1058 case REG:
1059 case SUBREG:
1060 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1061 break;
1063 case PLUS:
1065 rtx base = XEXP (x, 0);
1066 rtx index = XEXP (x, 1);
1068 switch (GET_CODE (index))
1070 case CONST_INT:
1071 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1072 reg_names[true_regnum (base)]);
1073 break;
1075 case REG:
1076 case SUBREG:
1078 int base_num = true_regnum (base);
1079 int index_num = true_regnum (index);
1081 /* If base or index is R0, make sure that it comes first.
1082 Usually one of them will be R0, but the order might be wrong.
1083 If neither base nor index are R0 it's an error and we just
1084 pass it on to the assembler. This avoids silent wrong code
1085 bugs. */
1086 if (base_num == 0 && index_num != 0)
1087 std::swap (base_num, index_num);
1089 fprintf (stream, "@(%s,%s)", reg_names[index_num],
1090 reg_names[base_num]);
1091 break;
1094 default:
1095 gcc_unreachable ();
1098 break;
1100 case PRE_DEC:
1101 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1102 break;
1104 case POST_INC:
1105 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1106 break;
1108 default:
1109 x = mark_constant_pool_use (x);
1110 output_addr_const (stream, x);
1111 break;
1115 /* Print operand x (an rtx) in assembler syntax to file stream
1116 according to modifier code.
1118 '.' print a .s if insn needs delay slot
1119 ',' print LOCAL_LABEL_PREFIX
1120 '@' print trap, rte or rts depending upon pragma interruptness
1121 '#' output a nop if there is nothing to put in the delay slot
1122 ''' print likelihood suffix (/u for unlikely).
1123 '>' print branch target if -fverbose-asm
1124 'O' print a constant without the #
1125 'R' print the LSW of a dp value - changes if in little endian
1126 'S' print the MSW of a dp value - changes if in little endian
1127 'T' print the next word of a dp value - same as 'R' in big endian mode.
1128 'M' print .b / .w / .l / .s / .d suffix if operand is a MEM.
1129 'N' print 'r63' if the operand is (const_int 0).
1130 'd' print a V2SF reg as dN instead of fpN.
1131 'm' print a pair `base,offset' or `base,index', for LD and ST.
1132 'U' Likewise for {LD,ST}{HI,LO}.
1133 'V' print the position of a single bit set.
1134 'W' print the position of a single bit cleared.
1135 't' print a memory address which is a register.
1136 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1137 'o' output an operator. */
1138 static void
1139 sh_print_operand (FILE *stream, rtx x, int code)
1141 int regno;
1142 machine_mode mode;
1144 switch (code)
1146 tree trapa_attr;
1148 case '.':
1149 if (final_sequence
1150 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1151 && get_attr_length (final_sequence->insn (1)))
1152 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1153 break;
1154 case ',':
1155 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1156 break;
1157 case '@':
1158 trapa_attr = lookup_attribute ("trap_exit",
1159 DECL_ATTRIBUTES (current_function_decl));
1160 if (trapa_attr)
1161 fprintf (stream, "trapa #%ld",
1162 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1163 else if (sh_cfun_interrupt_handler_p ())
1165 if (sh_cfun_resbank_handler_p ())
1166 fprintf (stream, "resbank\n");
1167 fprintf (stream, "rte");
1169 else
1170 fprintf (stream, "rts");
1171 break;
1172 case '#':
1173 /* Output a nop if there's nothing in the delay slot. */
1174 if (dbr_sequence_length () == 0)
1175 fprintf (stream, "\n\tnop");
1176 break;
1177 case '\'':
1179 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1181 if (note
1182 && profile_probability::from_reg_br_prob_note (XINT (note, 0))
1183 < profile_probability::even ())
1184 fputs ("/u", stream);
1185 break;
1187 case '>':
1188 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1190 fputs ("\t! target: ", stream);
1191 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1193 break;
1194 case 'O':
1195 x = mark_constant_pool_use (x);
1196 output_addr_const (stream, x);
1197 break;
1198 /* N.B.: %R / %S / %T adjust memory addresses by four.
1199 While they can be used to access 64 bit parts of a larger value
1200 held in general purpose registers, that won't work with memory -
1201 neither for fp registers, since the frxx names are used. */
1202 case 'R':
1203 if (REG_P (x) || GET_CODE (x) == SUBREG)
1205 regno = true_regnum (x);
1206 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1207 fputs (reg_names[regno], (stream));
1209 else if (MEM_P (x))
1211 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1212 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1214 else
1216 rtx sub = NULL_RTX;
1218 mode = GET_MODE (x);
1219 if (mode == VOIDmode)
1220 mode = DImode;
1221 if (GET_MODE_SIZE (mode) >= 8)
1222 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1223 if (sub)
1224 sh_print_operand (stream, sub, 0);
1225 else
1226 output_operand_lossage ("invalid operand to %%R");
1228 break;
1229 case 'S':
1230 if (REG_P (x) || GET_CODE (x) == SUBREG)
1232 regno = true_regnum (x);
1233 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1234 fputs (reg_names[regno], (stream));
1236 else if (MEM_P (x))
1238 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1239 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1241 else
1243 rtx sub = NULL_RTX;
1245 mode = GET_MODE (x);
1246 if (mode == VOIDmode)
1247 mode = DImode;
1248 if (GET_MODE_SIZE (mode) >= 8)
1249 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1250 if (sub)
1251 sh_print_operand (stream, sub, 0);
1252 else
1253 output_operand_lossage ("invalid operand to %%S");
1255 break;
1256 case 'T':
1257 /* Next word of a double. */
1258 switch (GET_CODE (x))
1260 case REG:
1261 fputs (reg_names[REGNO (x) + 1], (stream));
1262 break;
1263 case MEM:
1265 machine_mode mode = GET_MODE (x);
1266 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1267 && GET_CODE (XEXP (x, 0)) != POST_INC)
1268 x = adjust_address (x, SImode, 4);
1269 sh_print_operand_address (stream, mode, XEXP (x, 0));
1271 break;
1272 default:
1273 break;
1275 break;
1277 case 't':
1278 gcc_assert (MEM_P (x));
1279 x = XEXP (x, 0);
1280 switch (GET_CODE (x))
1282 case REG:
1283 case SUBREG:
1284 sh_print_operand (stream, x, 0);
1285 break;
1286 default:
1287 break;
1289 break;
1291 case 'o':
1292 switch (GET_CODE (x))
1294 case PLUS: fputs ("add", stream); break;
1295 case MINUS: fputs ("sub", stream); break;
1296 case MULT: fputs ("mul", stream); break;
1297 case DIV: fputs ("div", stream); break;
1298 case EQ: fputs ("eq", stream); break;
1299 case NE: fputs ("ne", stream); break;
1300 case GT: case LT: fputs ("gt", stream); break;
1301 case GE: case LE: fputs ("ge", stream); break;
1302 case GTU: case LTU: fputs ("gtu", stream); break;
1303 case GEU: case LEU: fputs ("geu", stream); break;
1304 default:
1305 break;
1307 break;
1308 case 'M':
1309 if (MEM_P (x))
1311 switch (GET_MODE (x))
1313 case E_QImode: fputs (".b", stream); break;
1314 case E_HImode: fputs (".w", stream); break;
1315 case E_SImode: fputs (".l", stream); break;
1316 case E_SFmode: fputs (".s", stream); break;
1317 case E_DFmode: fputs (".d", stream); break;
1318 default: gcc_unreachable ();
1321 break;
1323 case 'm':
1324 gcc_assert (MEM_P (x));
1325 x = XEXP (x, 0);
1326 /* Fall through. */
1327 case 'U':
1328 switch (GET_CODE (x))
1330 case REG:
1331 case SUBREG:
1332 sh_print_operand (stream, x, 0);
1333 fputs (", 0", stream);
1334 break;
1336 case PLUS:
1337 sh_print_operand (stream, XEXP (x, 0), 0);
1338 fputs (", ", stream);
1339 sh_print_operand (stream, XEXP (x, 1), 0);
1340 break;
1342 default:
1343 gcc_unreachable ();
1345 break;
1347 case 'V':
1349 int num = exact_log2 (INTVAL (x));
1350 gcc_assert (num >= 0);
1351 fprintf (stream, "#%d", num);
1353 break;
1355 case 'W':
1357 int num = exact_log2 (~INTVAL (x));
1358 gcc_assert (num >= 0);
1359 fprintf (stream, "#%d", num);
1361 break;
1363 case 'd':
1364 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1366 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1367 break;
1369 case 'N':
1370 if (x == CONST0_RTX (GET_MODE (x)))
1372 fprintf ((stream), "r63");
1373 break;
1375 goto default_output;
1376 case 'u':
1377 if (CONST_INT_P (x))
1379 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1380 break;
1382 /* Fall through. */
1384 default_output:
1385 default:
1386 regno = 0;
1387 mode = GET_MODE (x);
1389 switch (GET_CODE (x))
1391 case TRUNCATE:
1393 rtx inner = XEXP (x, 0);
1394 int offset = 0;
1395 machine_mode inner_mode;
1397 /* We might see SUBREGs with vector mode registers inside. */
1398 if (GET_CODE (inner) == SUBREG
1399 && (GET_MODE_SIZE (GET_MODE (inner))
1400 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1401 && subreg_lowpart_p (inner))
1402 inner = SUBREG_REG (inner);
1403 if (CONST_INT_P (inner))
1405 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1406 goto default_output;
1408 inner_mode = GET_MODE (inner);
1409 if (GET_CODE (inner) == SUBREG
1410 && (GET_MODE_SIZE (GET_MODE (inner))
1411 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1412 && REG_P (SUBREG_REG (inner)))
1414 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1415 GET_MODE (SUBREG_REG (inner)),
1416 SUBREG_BYTE (inner),
1417 GET_MODE (inner));
1418 inner = SUBREG_REG (inner);
1420 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1421 abort ();
1422 /* Floating point register pairs are always big endian;
1423 general purpose registers are 64 bit wide. */
1424 regno = REGNO (inner);
1425 regno = (hard_regno_nregs (regno, inner_mode)
1426 - hard_regno_nregs (regno, mode))
1427 + offset;
1428 x = inner;
1429 goto reg;
1431 case SIGN_EXTEND:
1432 x = XEXP (x, 0);
1433 goto reg;
1434 case SUBREG:
1435 gcc_assert (SUBREG_BYTE (x) == 0
1436 && REG_P (SUBREG_REG (x)));
1438 x = SUBREG_REG (x);
1439 /* Fall through. */
1441 reg:
1442 case REG:
1443 regno += REGNO (x);
1444 if (FP_REGISTER_P (regno)
1445 && mode == V16SFmode)
1446 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1447 else if (FP_REGISTER_P (REGNO (x))
1448 && mode == V4SFmode)
1449 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1450 else if (REG_P (x)
1451 && mode == V2SFmode)
1452 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1453 else if (FP_REGISTER_P (REGNO (x))
1454 && GET_MODE_SIZE (mode) > 4)
1455 fprintf ((stream), "d%s", reg_names[regno] + 1);
1456 else
1457 fputs (reg_names[regno], (stream));
1458 break;
1460 case MEM:
1461 output_address (GET_MODE (x), XEXP (x, 0));
1462 break;
1464 default:
1465 fputc ('#', stream);
1466 output_addr_const (stream, x);
1467 break;
1469 break;
1473 static bool
1474 sh_print_operand_punct_valid_p (unsigned char code)
1476 return (code == '.' || code == '#' || code == '@' || code == ','
1477 || code == '$' || code == '\'' || code == '>');
1480 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1481 static bool
1482 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1484 if (GET_CODE (x) == UNSPEC)
1486 switch (XINT (x, 1))
1488 case UNSPEC_PIC:
1489 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1490 output_addr_const (file, XVECEXP (x, 0, 0));
1491 break;
1492 case UNSPEC_GOT:
1493 output_addr_const (file, XVECEXP (x, 0, 0));
1494 fputs ("@GOT", file);
1495 break;
1496 case UNSPEC_GOTOFF:
1497 output_addr_const (file, XVECEXP (x, 0, 0));
1498 fputs ("@GOTOFF", file);
1499 break;
1500 case UNSPEC_PLT:
1501 output_addr_const (file, XVECEXP (x, 0, 0));
1502 fputs ("@PLT", file);
1503 break;
1504 case UNSPEC_GOTPLT:
1505 output_addr_const (file, XVECEXP (x, 0, 0));
1506 fputs ("@GOTPLT", file);
1507 break;
1508 case UNSPEC_PCREL:
1509 output_addr_const (file, XVECEXP (x, 0, 0));
1510 fputs ("@PCREL", file);
1511 break;
1512 case UNSPEC_DTPOFF:
1513 output_addr_const (file, XVECEXP (x, 0, 0));
1514 fputs ("@DTPOFF", file);
1515 break;
1516 case UNSPEC_GOTTPOFF:
1517 output_addr_const (file, XVECEXP (x, 0, 0));
1518 fputs ("@GOTTPOFF", file);
1519 break;
1520 case UNSPEC_TPOFF:
1521 output_addr_const (file, XVECEXP (x, 0, 0));
1522 fputs ("@TPOFF", file);
1523 break;
1524 case UNSPEC_CALLER:
1526 char name[32];
1527 /* LPCS stands for Label for PIC Call Site. */
1528 targetm.asm_out.generate_internal_label (name, "LPCS",
1529 INTVAL (XVECEXP (x, 0, 0)));
1530 assemble_name (file, name);
1532 break;
1533 case UNSPEC_SYMOFF:
1534 output_addr_const (file, XVECEXP (x, 0, 0));
1535 fputc ('-', file);
1536 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1538 fputc ('(', file);
1539 output_addr_const (file, XVECEXP (x, 0, 1));
1540 fputc (')', file);
1542 else
1543 output_addr_const (file, XVECEXP (x, 0, 1));
1544 break;
1545 case UNSPEC_PCREL_SYMOFF:
1546 output_addr_const (file, XVECEXP (x, 0, 0));
1547 fputs ("-(", file);
1548 output_addr_const (file, XVECEXP (x, 0, 1));
1549 fputs ("-.)", file);
1550 break;
1551 case UNSPEC_GOTFUNCDESC:
1552 output_addr_const (file, XVECEXP (x, 0, 0));
1553 fputs ("@GOTFUNCDESC", file);
1554 break;
1555 case UNSPEC_GOTOFFFUNCDESC:
1556 output_addr_const (file, XVECEXP (x, 0, 0));
1557 fputs ("@GOTOFFFUNCDESC", file);
1558 break;
1559 default:
1560 return false;
1562 return true;
1564 else
1565 return false;
1568 /* Encode symbol attributes of a SYMBOL_REF into its
1569 SYMBOL_REF_FLAGS. */
1570 static void
1571 sh_encode_section_info (tree decl, rtx rtl, int first)
1573 default_encode_section_info (decl, rtl, first);
1575 if (TREE_CODE (decl) == FUNCTION_DECL
1576 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1577 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1580 /* Prepare operands for a move define_expand; specifically, one of the
1581 operands must be in a register. */
1582 void
1583 prepare_move_operands (rtx operands[], machine_mode mode)
1585 if ((mode == SImode || mode == DImode)
1586 && flag_pic
1587 && ! ((mode == Pmode || mode == ptr_mode)
1588 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1590 rtx temp;
1591 if (SYMBOLIC_CONST_P (operands[1]))
1593 if (MEM_P (operands[0]))
1594 operands[1] = force_reg (Pmode, operands[1]);
1595 else
1597 temp = (!can_create_pseudo_p ()
1598 ? operands[0]
1599 : gen_reg_rtx (Pmode));
1600 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1603 else if (GET_CODE (operands[1]) == CONST
1604 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1605 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1607 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1608 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1609 mode, temp);
1610 operands[1] = expand_binop (mode, add_optab, temp,
1611 XEXP (XEXP (operands[1], 0), 1),
1612 (!can_create_pseudo_p ()
1613 ? temp
1614 : gen_reg_rtx (Pmode)),
1615 0, OPTAB_LIB_WIDEN);
1619 if (! reload_in_progress && ! reload_completed)
1621 /* Copy the source to a register if both operands aren't registers. */
1622 if (! register_operand (operands[0], mode)
1623 && ! register_operand (operands[1], mode))
1624 operands[1] = copy_to_mode_reg (mode, operands[1]);
1626 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1628 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1629 except that we can't use that function because it is static. */
1630 rtx new_rtx = change_address (operands[0], mode, 0);
1631 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1632 operands[0] = new_rtx;
1635 /* This case can happen while generating code to move the result
1636 of a library call to the target. Reject `st r0,@(rX,rY)' because
1637 reload will fail to find a spill register for rX, since r0 is already
1638 being used for the source. */
1639 else if (refers_to_regno_p (R0_REG, operands[1])
1640 && MEM_P (operands[0])
1641 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1642 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1643 operands[1] = copy_to_mode_reg (mode, operands[1]);
1645 /* When the displacement addressing is used, RA will assign r0 to
1646 the pseudo register operand for the QI/HImode load/store.
1647 This tends to make a long live range for R0 and might cause
1648 anomalous register spills in some case with LRA. See PR
1649 target/55212.
1650 We split possible load/store to two move insns via r0 so as to
1651 shorten R0 live range. It will make some codes worse but will
1652 win on average for LRA.
1653 Also when base+index addressing is used and the index term is
1654 a subreg, LRA assumes that more hard registers can be available
1655 in some situation. It isn't the case for SH in the problematic
1656 case. We can pre-allocate R0 for that index term to avoid
1657 the issue. See PR target/66591. */
1658 else if (sh_lra_p ()
1659 && ! TARGET_SH2A
1660 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1661 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1663 bool load_p = REG_P (operands[0]);
1664 rtx reg = operands[load_p ? 0 : 1];
1665 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1667 if ((mode == QImode || mode == HImode)
1668 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1669 && GET_CODE (adr) == PLUS
1670 && REG_P (XEXP (adr, 0))
1671 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1672 && CONST_INT_P (XEXP (adr, 1))
1673 && INTVAL (XEXP (adr, 1)) != 0
1674 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1676 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1677 emit_move_insn (r0_rtx, operands[1]);
1678 operands[1] = r0_rtx;
1680 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1681 && GET_CODE (adr) == PLUS
1682 && REG_P (XEXP (adr, 0))
1683 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1684 && SUBREG_P (XEXP (adr, 1))
1685 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1687 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1688 emit_move_insn (r0_rtx, XEXP (adr, 1));
1689 XEXP (adr, 1) = r0_rtx;
1694 if (mode == Pmode || mode == ptr_mode)
1696 rtx op0 = operands[0];
1697 rtx op1 = operands[1];
1698 rtx opc;
1699 if (GET_CODE (op1) == CONST
1700 && GET_CODE (XEXP (op1, 0)) == PLUS
1701 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1702 != TLS_MODEL_NONE))
1704 opc = XEXP (XEXP (op1, 0), 1);
1705 op1 = XEXP (XEXP (op1, 0), 0);
1707 else
1708 opc = NULL_RTX;
1710 enum tls_model tls_kind;
1712 if (! reload_in_progress && ! reload_completed
1713 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1715 rtx tga_op1, tga_ret, tmp, tmp2;
1717 if (! flag_pic
1718 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1719 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1720 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1722 static int got_labelno;
1723 /* Don't schedule insns for getting GOT address when
1724 the first scheduling is enabled, to avoid spill
1725 failures for R0. */
1726 if (flag_schedule_insns)
1727 emit_insn (gen_blockage ());
1728 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1729 emit_use (gen_rtx_REG (SImode, PIC_REG));
1730 if (flag_schedule_insns)
1731 emit_insn (gen_blockage ());
1734 switch (tls_kind)
1736 case TLS_MODEL_GLOBAL_DYNAMIC:
1737 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1738 if (TARGET_FDPIC)
1739 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1740 sh_get_fdpic_reg_initial_val ());
1741 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1742 tmp = gen_reg_rtx (Pmode);
1743 emit_move_insn (tmp, tga_ret);
1744 op1 = tmp;
1745 break;
1747 case TLS_MODEL_LOCAL_DYNAMIC:
1748 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1749 if (TARGET_FDPIC)
1750 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1751 sh_get_fdpic_reg_initial_val ());
1752 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1754 tmp = gen_reg_rtx (Pmode);
1755 emit_move_insn (tmp, tga_ret);
1757 if (register_operand (op0, Pmode))
1758 tmp2 = op0;
1759 else
1760 tmp2 = gen_reg_rtx (Pmode);
1762 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1763 op1 = tmp2;
1764 break;
1766 case TLS_MODEL_INITIAL_EXEC:
1767 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1768 tmp = gen_sym2GOTTPOFF (op1);
1769 if (TARGET_FDPIC)
1770 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1771 sh_get_fdpic_reg_initial_val ());
1772 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1773 op1 = tga_op1;
1774 break;
1776 case TLS_MODEL_LOCAL_EXEC:
1777 tmp2 = gen_reg_rtx (Pmode);
1778 emit_insn (gen_store_gbr (tmp2));
1779 tmp = gen_reg_rtx (Pmode);
1780 emit_insn (gen_symTPOFF2reg (tmp, op1));
1782 if (register_operand (op0, Pmode))
1783 op1 = op0;
1784 else
1785 op1 = gen_reg_rtx (Pmode);
1787 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1788 break;
1790 default:
1791 gcc_unreachable ();
1793 if (opc)
1794 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1795 operands[1] = op1;
1799 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
1801 rtx base, offset;
1802 split_const (operands[1], &base, &offset);
1804 if (GET_CODE (base) == SYMBOL_REF
1805 && !offset_within_block_p (base, INTVAL (offset)))
1807 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
1808 emit_move_insn (tmp, base);
1809 if (!arith_operand (offset, mode))
1810 offset = force_reg (mode, offset);
1811 emit_insn (gen_add3_insn (operands[0], tmp, offset));
1816 /* Implement the canonicalize_comparison target hook for the combine
1817 pass. For the target hook this function is invoked via
1818 sh_canonicalize_comparison. This function is also re-used to
1819 canonicalize comparisons in cbranch pattern expanders. */
1820 static void
1821 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1822 machine_mode mode,
1823 bool op0_preserve_value)
1825 /* When invoked from within the combine pass the mode is not specified,
1826 so try to get it from one of the operands. */
1827 if (mode == VOIDmode)
1828 mode = GET_MODE (op0);
1829 if (mode == VOIDmode)
1830 mode = GET_MODE (op1);
1832 // We need to have a mode to do something useful here.
1833 if (mode == VOIDmode)
1834 return;
1836 // Currently, we don't deal with floats here.
1837 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1838 return;
1840 // Make sure that the constant operand is the second operand.
1841 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1843 if (op0_preserve_value)
1844 return;
1846 std::swap (op0, op1);
1847 cmp = swap_condition (cmp);
1850 if (CONST_INT_P (op1))
1852 /* Try to adjust the constant operand in such a way that available
1853 comparison insns can be utilized better and the constant can be
1854 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1855 constant pool. */
1856 const HOST_WIDE_INT val = INTVAL (op1);
1858 /* x > -1 --> x >= 0
1859 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1860 x <= -1 --> x < 0
1861 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1862 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1864 cmp = cmp == GT ? GE : LT;
1865 op1 = gen_int_mode (val + 1, mode);
1868 /* x >= 1 --> x > 0
1869 x >= 0x80 --> x > 0x7F
1870 x < 1 --> x <= 0
1871 x < 0x80 --> x <= 0x7F */
1872 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1874 cmp = cmp == GE ? GT : LE;
1875 op1 = gen_int_mode (val - 1, mode);
1878 /* unsigned x >= 1 --> x != 0
1879 unsigned x < 1 --> x == 0 */
1880 else if (val == 1 && (cmp == GEU || cmp == LTU))
1882 cmp = cmp == GEU ? NE : EQ;
1883 op1 = CONST0_RTX (mode);
1886 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1887 unsigned x < 0x80 --> unsigned x < 0x7F */
1888 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1890 cmp = cmp == GEU ? GTU : LEU;
1891 op1 = gen_int_mode (val - 1, mode);
1894 /* unsigned x > 0 --> x != 0
1895 unsigned x <= 0 --> x == 0 */
1896 else if (val == 0 && (cmp == GTU || cmp == LEU))
1897 cmp = cmp == GTU ? NE : EQ;
1899 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1900 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1901 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1902 && val == 0x7FFFFFFF)
1904 cmp = cmp == GTU ? LT : GE;
1905 op1 = const0_rtx;
1908 /* unsigned x >= 0x80000000 --> signed x < 0
1909 unsigned x < 0x80000000 --> signed x >= 0 */
1910 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1911 && (unsigned HOST_WIDE_INT)val
1912 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1914 cmp = cmp == GEU ? LT : GE;
1915 op1 = const0_rtx;
1920 /* This function implements the canonicalize_comparison target hook.
1921 This wrapper around the internally used sh_canonicalize_comparison
1922 function is needed to do the enum rtx_code <-> int conversion.
1923 Target hooks cannot use enum rtx_code in its definition. */
1924 static void
1925 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1926 bool op0_preserve_value)
1928 enum rtx_code tmp_code = (enum rtx_code)*code;
1929 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1930 VOIDmode, op0_preserve_value);
1931 *code = (int)tmp_code;
1934 /* This function implements the legitimate_combined_insn target hook,
1935 which the combine pass uses to early reject combined insns, before
1936 it tries to recog the insn and determine its cost. */
1937 static bool
1938 sh_legitimate_combined_insn (rtx_insn* insn)
1940 /* Reject combinations of memory loads and zero extensions, as these
1941 interfere with other combine patterns such as zero extracts and bit
1942 tests. The SH2A movu.{b|w} insns are formed later in the
1943 'sh_optimize_extu_exts' pass after combine/split1. */
1944 rtx p = PATTERN (insn);
1945 if (GET_CODE (p) == SET
1946 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
1947 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
1948 && MEM_P (XEXP (XEXP (p, 1), 0)))
1949 return false;
1951 return true;
1954 bool
1955 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1957 *p1 = T_REG;
1958 *p2 = INVALID_REGNUM;
1959 return true;
1962 /* Try to calculate the branch distance of a conditional branch in bytes.
1964 FIXME: Because of PR 59189 we can't use the CFG here. Instead just
1965 walk from this insn into the next (fall-through) basic block and see if
1966 we hit the label. */
1967 unsigned int
1968 sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist)
1970 rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn);
1972 if (dump_file)
1974 fprintf (dump_file, "sh_cbranch_distance insn = \n");
1975 print_rtl_single (dump_file, cbranch_insn);
1978 unsigned int dist = 0;
1980 for (rtx_insn* i = next_nonnote_insn (cbranch_insn);
1981 i != NULL && dist < max_dist; i = next_nonnote_insn (i))
1983 const unsigned int i_len = get_attr_length (i);
1984 dist += i_len;
1986 if (dump_file)
1987 fprintf (dump_file, " insn %d length = %u dist = %u\n",
1988 INSN_UID (i), i_len, dist);
1990 if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i))
1992 if (l == cbranch_insn->jump_target ())
1994 if (dump_file)
1995 fprintf (dump_file, " cbranch dist = %u\n", dist);
1996 return dist;
1998 break;
2002 if (dump_file)
2003 fprintf (dump_file, " cbranch dist = unknown\n");
2005 return unknown_cbranch_distance;
2008 enum rtx_code
2009 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2010 enum rtx_code comparison)
2012 gcc_assert (can_create_pseudo_p ());
2014 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2015 comparison = GET_CODE (operands[0]);
2017 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2018 mode, false);
2020 rtx op1 = operands[1];
2021 operands[1] = force_reg (mode, op1);
2023 /* When we are handling DImode comparisons, we want to keep constants so
2024 that we can optimize the component comparisons; however, memory loads
2025 are better issued as a whole so that they can be scheduled well.
2026 SImode equality comparisons allow I08 constants, but only when they
2027 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2028 into a register, that register might as well be r0, and we allow the
2029 constant. If it is already in a register, this is likely to be
2030 allocated to a different hard register, thus we load the constant into
2031 a register unless it is zero. */
2032 if (!REG_P (operands[2])
2033 && (!CONST_INT_P (operands[2])
2034 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2035 && ((comparison != EQ && comparison != NE)
2036 || (REG_P (op1) && REGNO (op1) != R0_REG)
2037 || !satisfies_constraint_I08 (operands[2])))))
2038 operands[2] = force_reg (mode, operands[2]);
2040 return comparison;
2043 static void
2044 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison,
2045 profile_probability probability)
2047 rtx (*branch_expander) (rtx) = gen_branch_true;
2048 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2049 switch (comparison)
2051 case NE: case LT: case LE: case LTU: case LEU:
2052 comparison = reverse_condition (comparison);
2053 branch_expander = gen_branch_false;
2054 default: ;
2056 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2057 gen_rtx_fmt_ee (comparison, SImode,
2058 operands[1], operands[2])));
2059 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2060 if (probability.initialized_p ())
2061 add_reg_br_prob_note (jump, probability);
2064 void
2065 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison)
2067 expand_cbranchsi4 (operands, comparison,
2068 profile_probability::uninitialized ());
2071 /* ??? How should we distribute probabilities when more than one branch
2072 is generated. So far we only have some ad-hoc observations:
2073 - If the operands are random, they are likely to differ in both parts.
2074 - If comparing items in a hash chain, the operands are random or equal;
2075 operation should be EQ or NE.
2076 - If items are searched in an ordered tree from the root, we can expect
2077 the highpart to be unequal about half of the time; operation should be
2078 an inequality comparison, operands non-constant, and overall probability
2079 about 50%. Likewise for quicksort.
2080 - Range checks will be often made against constants. Even if we assume for
2081 simplicity an even distribution of the non-constant operand over a
2082 sub-range here, the same probability could be generated with differently
2083 wide sub-ranges - as long as the ratio of the part of the subrange that
2084 is before the threshold to the part that comes after the threshold stays
2085 the same. Thus, we can't really tell anything here;
2086 assuming random distribution is at least simple.
2088 bool
2089 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2091 enum rtx_code msw_taken, msw_skip, lsw_taken;
2092 rtx_code_label *skip_label = NULL;
2093 rtx op1h, op1l, op2h, op2l;
2094 int num_branches;
2095 profile_probability prob, rev_prob;
2096 profile_probability msw_taken_prob = profile_probability::uninitialized (),
2097 msw_skip_prob = profile_probability::uninitialized (),
2098 lsw_taken_prob = profile_probability::uninitialized ();
2100 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2101 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2102 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2103 op1l = gen_lowpart (SImode, operands[1]);
2104 op2l = gen_lowpart (SImode, operands[2]);
2105 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2106 prob = split_branch_probability;
2107 rev_prob = prob.invert ();
2108 switch (comparison)
2110 case EQ:
2111 msw_skip = NE;
2112 lsw_taken = EQ;
2113 if (prob.initialized_p ())
2115 /* FIXME: This is not optimal. We do not really know the probability
2116 that values differ by MCW only, but we should probably distribute
2117 probabilities more evenly. */
2118 msw_skip_prob = rev_prob;
2119 lsw_taken_prob = prob > profile_probability::never ()
2120 ? profile_probability::guessed_always ()
2121 : profile_probability::guessed_never ();
2123 break;
2124 case NE:
2125 msw_taken = NE;
2126 msw_taken_prob = prob;
2127 lsw_taken = NE;
2128 lsw_taken_prob = profile_probability::guessed_never ();
2129 break;
2130 case GTU: case GT:
2131 msw_taken = comparison;
2132 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2133 break;
2134 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2135 msw_skip = swap_condition (msw_taken);
2136 lsw_taken = GTU;
2137 break;
2138 case GEU: case GE:
2139 if (op2l == CONST0_RTX (SImode))
2140 msw_taken = comparison;
2141 else
2143 msw_taken = comparison == GE ? GT : GTU;
2144 msw_skip = swap_condition (msw_taken);
2145 lsw_taken = GEU;
2147 break;
2148 case LTU: case LT:
2149 msw_taken = comparison;
2150 if (op2l == CONST0_RTX (SImode))
2151 break;
2152 msw_skip = swap_condition (msw_taken);
2153 lsw_taken = LTU;
2154 break;
2155 case LEU: case LE:
2156 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2157 msw_taken = comparison;
2158 else
2160 lsw_taken = LEU;
2161 if (comparison == LE)
2162 msw_taken = LT;
2163 else if (op2h != CONST0_RTX (SImode))
2164 msw_taken = LTU;
2165 else
2167 msw_skip = swap_condition (LTU);
2168 break;
2170 msw_skip = swap_condition (msw_taken);
2172 break;
2173 default: return false;
2175 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2176 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2177 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2178 if (comparison != EQ && comparison != NE && num_branches > 1)
2180 if (!CONSTANT_P (operands[2])
2181 && prob.initialized_p ()
2182 && prob.to_reg_br_prob_base () >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2183 && prob.to_reg_br_prob_base () <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2185 msw_taken_prob = prob / 2;
2186 msw_skip_prob = rev_prob.apply_scale (REG_BR_PROB_BASE,
2187 rev_prob.to_reg_br_prob_base ()
2188 + REG_BR_PROB_BASE);
2189 lsw_taken_prob = prob;
2191 else
2193 msw_taken_prob = prob;
2194 msw_skip_prob = profile_probability::guessed_always ();
2195 /* ??? If we have a constant op2h, should we use that when
2196 calculating lsw_taken_prob? */
2197 lsw_taken_prob = prob;
2200 operands[1] = op1h;
2201 operands[2] = op2h;
2203 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2204 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2205 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2207 rtx taken_label = operands[3];
2209 /* Operands were possibly modified, but msw_skip doesn't expect this.
2210 Always use the original ones. */
2211 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2213 operands[1] = op1h;
2214 operands[2] = op2h;
2217 operands[3] = skip_label = gen_label_rtx ();
2218 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2219 operands[3] = taken_label;
2221 operands[1] = op1l;
2222 operands[2] = op2l;
2223 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2224 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2225 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2226 emit_label (skip_label);
2227 return true;
2230 /* Given an operand, return 1 if the evaluated operand plugged into an
2231 if_then_else will result in a branch_true, 0 if branch_false, or
2232 -1 if neither nor applies. The truth table goes like this:
2234 op | cmpval | code | result
2235 ---------+--------+---------+--------------------
2236 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2237 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2238 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2239 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2240 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2241 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2242 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2243 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2245 sh_eval_treg_value (rtx op)
2247 if (t_reg_operand (op, GET_MODE (op)))
2248 return 1;
2249 if (negt_reg_operand (op, GET_MODE (op)))
2250 return 0;
2252 rtx_code code = GET_CODE (op);
2253 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2254 return -1;
2256 int cmpop = code == EQ ? 1 : 0;
2257 int cmpval = INTVAL (XEXP (op, 1));
2258 if (cmpval != 0 && cmpval != 1)
2259 return -1;
2261 int t;
2262 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2263 t = 0;
2264 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2265 t = 1;
2266 else
2267 return -1;
2269 return t ^ (cmpval == cmpop);
2272 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2273 of floating-point comparisons. */
2274 static void
2275 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2277 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2278 && GET_CODE (insn) != PARALLEL)
2280 insn = gen_rtx_PARALLEL (VOIDmode,
2281 gen_rtvec (3, insn,
2282 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2283 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2285 emit_insn (insn);
2288 /* Prepare the operands for an scc instruction; make sure that the
2289 compare has been done and the result is in T_REG. */
2290 void
2291 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2293 rtx t_reg = get_t_reg_rtx ();
2294 enum rtx_code oldcode = code;
2296 /* First need a compare insn. */
2297 switch (code)
2299 case NE:
2300 /* It isn't possible to handle this case. */
2301 gcc_unreachable ();
2302 case LT:
2303 code = GT;
2304 break;
2305 case LE:
2306 code = GE;
2307 break;
2308 case LTU:
2309 code = GTU;
2310 break;
2311 case LEU:
2312 code = GEU;
2313 break;
2314 default:
2315 break;
2317 if (code != oldcode)
2318 std::swap (op0, op1);
2320 machine_mode mode = GET_MODE (op0);
2321 if (mode == VOIDmode)
2322 mode = GET_MODE (op1);
2324 op0 = force_reg (mode, op0);
2325 if ((code != EQ && code != NE
2326 && (op1 != const0_rtx
2327 || code == GTU || code == GEU || code == LTU || code == LEU))
2328 || (mode == DImode && op1 != const0_rtx)
2329 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2330 op1 = force_reg (mode, op1);
2332 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2333 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2334 mode);
2337 /* Called from the md file, set up the operands of a compare instruction. */
2338 void
2339 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2341 enum rtx_code code = GET_CODE (operands[0]);
2342 enum rtx_code branch_code;
2343 rtx op0 = operands[1];
2344 rtx op1 = operands[2];
2345 rtx insn;
2346 bool need_ccmpeq = false;
2348 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2350 op0 = force_reg (mode, op0);
2351 op1 = force_reg (mode, op1);
2353 else
2355 if (code != EQ || mode == DImode)
2357 /* Force args into regs, since we can't use constants here. */
2358 op0 = force_reg (mode, op0);
2359 if (op1 != const0_rtx || code == GTU || code == GEU)
2360 op1 = force_reg (mode, op1);
2364 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2366 if (code == LT
2367 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2368 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2370 std::swap (op0, op1);
2371 code = swap_condition (code);
2374 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2375 if (code == GE)
2377 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2378 need_ccmpeq = true;
2379 code = GT;
2382 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2383 to EQ/GT respectively. */
2384 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2387 switch (code)
2389 case EQ:
2390 case GT:
2391 case GE:
2392 case GTU:
2393 case GEU:
2394 branch_code = code;
2395 break;
2396 case NE:
2397 case LT:
2398 case LE:
2399 case LTU:
2400 case LEU:
2401 branch_code = reverse_condition (code);
2402 break;
2403 default:
2404 gcc_unreachable ();
2407 insn = gen_rtx_SET (get_t_reg_rtx (),
2408 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2410 sh_emit_set_t_insn (insn, mode);
2411 if (need_ccmpeq)
2412 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2414 if (branch_code == code)
2415 emit_jump_insn (gen_branch_true (operands[3]));
2416 else
2417 emit_jump_insn (gen_branch_false (operands[3]));
2420 void
2421 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2423 enum rtx_code code = GET_CODE (operands[1]);
2424 rtx op0 = operands[2];
2425 rtx op1 = operands[3];
2426 rtx_code_label *lab = NULL;
2427 bool invert = false;
2429 op0 = force_reg (mode, op0);
2430 if ((code != EQ && code != NE
2431 && (op1 != const0_rtx
2432 || code == GTU || code == GEU || code == LTU || code == LEU))
2433 || (mode == DImode && op1 != const0_rtx)
2434 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2435 op1 = force_reg (mode, op1);
2437 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2439 if (code == LT || code == LE)
2441 std::swap (op0, op1);
2442 code = swap_condition (code);
2444 if (code == GE)
2446 if (TARGET_IEEE)
2448 lab = gen_label_rtx ();
2449 sh_emit_scc_to_t (EQ, op0, op1);
2450 emit_jump_insn (gen_branch_true (lab));
2451 code = GT;
2453 else
2455 code = LT;
2456 invert = true;
2461 if (code == NE)
2463 code = EQ;
2464 invert = true;
2467 sh_emit_scc_to_t (code, op0, op1);
2468 if (lab)
2469 emit_label (lab);
2470 if (invert)
2471 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2472 else
2473 emit_move_insn (operands[0], get_t_reg_rtx ());
2476 /* Functions to output assembly code. */
2478 /* Return a sequence of instructions to perform DI or DF move.
2480 Since the SH cannot move a DI or DF in one instruction, we have
2481 to take care when we see overlapping source and dest registers. */
2482 const char *
2483 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2484 machine_mode mode)
2486 rtx dst = operands[0];
2487 rtx src = operands[1];
2489 if (MEM_P (dst)
2490 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2491 return "mov.l %T1,%0" "\n"
2492 " mov.l %1,%0";
2494 if (register_operand (dst, mode)
2495 && register_operand (src, mode))
2497 if (REGNO (src) == MACH_REG)
2498 return "sts mach,%S0" "\n"
2499 " sts macl,%R0";
2501 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2502 when mov.d r1,r0 do r1->r0 then r2->r1. */
2503 if (REGNO (src) + 1 == REGNO (dst))
2504 return "mov %T1,%T0" "\n"
2505 " mov %1,%0";
2506 else
2507 return "mov %1,%0" "\n"
2508 " mov %T1,%T0";
2510 else if (CONST_INT_P (src))
2512 if (INTVAL (src) < 0)
2513 output_asm_insn ("mov #-1,%S0", operands);
2514 else
2515 output_asm_insn ("mov #0,%S0", operands);
2517 return "mov %1,%R0";
2519 else if (MEM_P (src))
2521 int ptrreg = -1;
2522 int dreg = REGNO (dst);
2523 rtx inside = XEXP (src, 0);
2525 switch (GET_CODE (inside))
2527 case REG:
2528 ptrreg = REGNO (inside);
2529 break;
2531 case SUBREG:
2532 ptrreg = subreg_regno (inside);
2533 break;
2535 case PLUS:
2536 ptrreg = REGNO (XEXP (inside, 0));
2537 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2538 an offsettable address. Unfortunately, offsettable addresses use
2539 QImode to check the offset, and a QImode offsettable address
2540 requires r0 for the other operand, which is not currently
2541 supported, so we can't use the 'o' constraint.
2542 Thus we must check for and handle r0+REG addresses here.
2543 We punt for now, since this is likely very rare. */
2544 gcc_assert (!REG_P (XEXP (inside, 1)));
2545 break;
2547 case LABEL_REF:
2548 return "mov.l %1,%0" "\n"
2549 " mov.l %1+4,%T0";
2550 case POST_INC:
2551 return "mov.l %1,%0" "\n"
2552 " mov.l %1,%T0";
2553 default:
2554 gcc_unreachable ();
2557 /* Work out the safe way to copy. Copy into the second half first. */
2558 if (dreg == ptrreg)
2559 return "mov.l %T1,%T0" "\n"
2560 " mov.l %1,%0";
2563 return "mov.l %1,%0" "\n"
2564 " mov.l %T1,%T0";
2567 /* Print an instruction which would have gone into a delay slot after
2568 another instruction, but couldn't because the other instruction expanded
2569 into a sequence where putting the slot insn at the end wouldn't work. */
2570 static void
2571 print_slot (rtx_sequence *seq)
2573 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2575 seq->insn (1)->set_deleted ();
2578 const char *
2579 output_far_jump (rtx_insn *insn, rtx op)
2581 struct { rtx lab, reg, op; } this_jmp;
2582 rtx_code_label *braf_base_lab = NULL;
2583 const char *jump;
2584 int far;
2585 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2586 rtx_insn *prev;
2588 this_jmp.lab = gen_label_rtx ();
2590 if (TARGET_SH2
2591 && offset >= -32764
2592 && offset - get_attr_length (insn) <= 32766
2593 && ! CROSSING_JUMP_P (insn))
2595 far = 0;
2596 jump = "mov.w %O0,%1" "\n"
2597 " braf %1";
2599 else
2601 far = 1;
2602 if (flag_pic)
2604 if (TARGET_SH2)
2605 jump = "mov.l %O0,%1" "\n"
2606 " braf %1";
2607 else
2608 jump = "mov.l r0,@-r15" "\n"
2609 " mova %O0,r0" "\n"
2610 " mov.l @r0,%1" "\n"
2611 " add r0,%1" "\n"
2612 " mov.l @r15+,r0" "\n"
2613 " jmp @%1";
2615 else
2616 jump = "mov.l %O0,%1" "\n"
2617 " jmp @%1";
2619 /* If we have a scratch register available, use it. */
2620 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2621 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2623 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2624 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2625 jump = "mov.l r1,@-r15" "\n"
2626 " mova %O0,r0" "\n"
2627 " mov.l @r0,r1" "\n"
2628 " add r1,r0" "\n"
2629 " mov.l @r15+,r1" "\n"
2630 " jmp @%1";
2631 output_asm_insn (jump, &this_jmp.lab);
2632 if (dbr_sequence_length ())
2633 print_slot (final_sequence);
2634 else
2635 output_asm_insn ("nop", 0);
2637 else
2639 /* Output the delay slot insn first if any. */
2640 if (dbr_sequence_length ())
2641 print_slot (final_sequence);
2643 this_jmp.reg = gen_rtx_REG (SImode, 13);
2644 output_asm_insn ("mov.l r13,@-r15", 0);
2645 output_asm_insn (jump, &this_jmp.lab);
2646 output_asm_insn ("mov.l @r15+,r13", 0);
2648 if (far && flag_pic && TARGET_SH2)
2650 braf_base_lab = gen_label_rtx ();
2651 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2652 CODE_LABEL_NUMBER (braf_base_lab));
2654 if (far)
2655 output_asm_insn (".align 2", 0);
2656 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2657 this_jmp.op = op;
2658 if (far && flag_pic)
2660 if (TARGET_SH2)
2661 this_jmp.lab = braf_base_lab;
2662 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2664 else
2665 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2666 return "";
2669 /* Local label counter, used for constants in the pool and inside
2670 pattern branches. */
2671 static int lf = 100;
2673 /* Output code for ordinary branches. */
2674 const char *
2675 output_branch (int logic, rtx_insn *insn, rtx *operands)
2677 switch (get_attr_length (insn))
2679 case 6:
2680 /* This can happen if filling the delay slot has caused a forward
2681 branch to exceed its range (we could reverse it, but only
2682 when we know we won't overextend other branches; this should
2683 best be handled by relaxation).
2684 It can also happen when other condbranches hoist delay slot insn
2685 from their destination, thus leading to code size increase.
2686 But the branch will still be in the range -4092..+4098 bytes. */
2687 if (! TARGET_RELAX)
2689 int label = lf++;
2690 /* The call to print_slot will clobber the operands. */
2691 rtx op0 = operands[0];
2693 /* If the instruction in the delay slot is annulled (true), then
2694 there is no delay slot where we can put it now. The only safe
2695 place for it is after the label. final will do that by default. */
2697 if (final_sequence
2698 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2699 && get_attr_length (final_sequence->insn (1)))
2701 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2702 ASSEMBLER_DIALECT ? "/" : ".", label);
2703 print_slot (final_sequence);
2705 else
2706 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2708 output_asm_insn ("bra\t%l0", &op0);
2709 fprintf (asm_out_file, "\tnop\n");
2710 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2712 return "";
2714 /* FALLTHRU */
2715 /* When relaxing, handle this like a short branch. The linker
2716 will fix it up if it still doesn't fit after relaxation. */
2717 case 2:
2718 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2720 /* These are for SH2e, in which we have to account for the
2721 extra nop because of the hardware bug in annulled branches. */
2722 case 8:
2723 if (! TARGET_RELAX)
2725 int label = lf++;
2727 gcc_assert (!final_sequence
2728 || !(INSN_ANNULLED_BRANCH_P
2729 (XVECEXP (final_sequence, 0, 0))));
2730 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2731 logic ? "f" : "t",
2732 ASSEMBLER_DIALECT ? "/" : ".", label);
2733 fprintf (asm_out_file, "\tnop\n");
2734 output_asm_insn ("bra\t%l0", operands);
2735 fprintf (asm_out_file, "\tnop\n");
2736 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2738 return "";
2740 /* FALLTHRU */
2741 case 4:
2743 char buffer[10];
2745 sprintf (buffer, "b%s%ss\t%%l0",
2746 logic ? "t" : "f",
2747 ASSEMBLER_DIALECT ? "/" : ".");
2748 output_asm_insn (buffer, &operands[0]);
2749 return "nop";
2752 default:
2753 /* There should be no longer branches now - that would
2754 indicate that something has destroyed the branches set
2755 up in machine_dependent_reorg. */
2756 gcc_unreachable ();
2760 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2761 fill in operands 9 as a label to the successor insn.
2762 We try to use jump threading where possible.
2763 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2764 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2765 follow jmp and bt, if the address is in range. */
2766 const char *
2767 output_branchy_insn (enum rtx_code code, const char *templ,
2768 rtx_insn *insn, rtx *operands)
2770 rtx_insn *next_insn = NEXT_INSN (insn);
2772 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2774 rtx src = SET_SRC (PATTERN (next_insn));
2775 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2777 /* Following branch not taken */
2778 rtx_code_label *lab = gen_label_rtx ();
2779 emit_label_after (lab, next_insn);
2780 INSN_ADDRESSES_NEW (lab,
2781 INSN_ADDRESSES (INSN_UID (next_insn))
2782 + get_attr_length (next_insn));
2783 operands[9] = lab;
2784 return templ;
2786 else
2788 int offset = (branch_dest (next_insn)
2789 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2790 if (offset >= -252 && offset <= 258)
2792 if (GET_CODE (src) == IF_THEN_ELSE)
2793 /* branch_true */
2794 src = XEXP (src, 1);
2795 operands[9] = src;
2796 return templ;
2800 rtx_code_label *lab = gen_label_rtx ();
2801 emit_label_after (lab, insn);
2802 INSN_ADDRESSES_NEW (lab,
2803 INSN_ADDRESSES (INSN_UID (insn))
2804 + get_attr_length (insn));
2805 operands[9] = lab;
2806 return templ;
2809 const char *
2810 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2812 return output_branchy_insn (NE, "bt %l9" "\n"
2813 " fcmp/eq %1,%0",
2814 insn, operands);
2817 /* Output the start of the assembler file. */
2818 static void
2819 sh_file_start (void)
2821 default_file_start ();
2823 if (TARGET_ELF)
2824 /* We need to show the text section with the proper
2825 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2826 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2827 will complain. We can teach GAS specifically about the
2828 default attributes for our choice of text section, but
2829 then we would have to change GAS again if/when we change
2830 the text section name. */
2831 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2832 else
2833 /* Switch to the data section so that the coffsem symbol
2834 isn't in the text section. */
2835 switch_to_section (data_section);
2837 if (TARGET_LITTLE_ENDIAN)
2838 fputs ("\t.little\n", asm_out_file);
2841 /* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
2842 need to be output as pointers to function descriptors for
2843 FDPIC. */
2845 static bool
2846 sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
2848 if (TARGET_FDPIC && size == UNITS_PER_WORD
2849 && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value))
2851 fputs ("\t.long\t", asm_out_file);
2852 output_addr_const (asm_out_file, value);
2853 fputs ("@FUNCDESC\n", asm_out_file);
2854 return true;
2856 return default_assemble_integer (value, size, aligned_p);
2859 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2860 static bool
2861 unspec_caller_rtx_p (rtx pat)
2863 rtx base, offset;
2864 split_const (pat, &base, &offset);
2866 if (GET_CODE (base) == UNSPEC)
2868 if (XINT (base, 1) == UNSPEC_CALLER)
2869 return true;
2870 for (int i = 0; i < XVECLEN (base, 0); i++)
2871 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2872 return true;
2874 return false;
2877 /* Indicate that INSN cannot be duplicated. This is true for insn
2878 that generates a unique label. */
2879 static bool
2880 sh_cannot_copy_insn_p (rtx_insn *insn)
2882 if (!reload_completed || !flag_pic)
2883 return false;
2885 if (!NONJUMP_INSN_P (insn))
2886 return false;
2887 if (asm_noperands (insn) >= 0)
2888 return false;
2890 rtx pat = PATTERN (insn);
2892 if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
2893 return false;
2895 if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL)
2897 rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
2898 if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0)))
2899 return true;
2902 if (GET_CODE (pat) != SET)
2903 return false;
2904 pat = SET_SRC (pat);
2906 if (unspec_caller_rtx_p (pat))
2907 return true;
2909 return false;
2912 /* Number of instructions used to make an arithmetic right shift by N. */
2913 static const char ashiftrt_insns[] =
2914 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2916 /* Description of a logical left or right shift, when expanded to a sequence
2917 of 1/2/8/16 shifts.
2918 Notice that one bit right shifts clobber the T bit. One bit left shifts
2919 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2920 enum
2922 ASHL_CLOBBERS_T = 1 << 0,
2923 LSHR_CLOBBERS_T = 1 << 1
2926 struct ashl_lshr_sequence
2928 char insn_count;
2929 signed char amount[6];
2930 char clobbers_t;
2933 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2935 { 0, { 0 }, 0 }, // 0
2936 { 1, { 1 }, LSHR_CLOBBERS_T },
2937 { 1, { 2 }, 0 },
2938 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2939 { 2, { 2, 2 }, 0 }, // 4
2940 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2941 { 3, { 2, 2, 2 }, 0 },
2942 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2943 { 1, { 8 }, 0 }, // 8
2944 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2945 { 2, { 8, 2 }, 0 },
2946 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2947 { 3, { 8, 2, 2 }, 0 }, // 12
2948 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2949 { 3, { 8, -2, 8 }, 0 },
2950 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2951 { 1, { 16 }, 0 }, // 16
2952 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2953 { 2, { 16, 2 }, 0 },
2954 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2955 { 3, { 16, 2, 2 }, 0 }, // 20
2956 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2957 { 3, { 16, -2, 8 }, 0 },
2958 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2959 { 2, { 16, 8 }, 0 }, // 24
2960 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2961 { 3, { 16, 8, 2 }, 0 },
2962 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2963 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2964 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2965 { 3, { 16, -2, 16 }, 0 },
2967 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2968 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2969 However, the shift-and combiner code needs this entry here to be in
2970 terms of real shift insns. */
2971 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2974 /* Individual shift amounts for shift amounts < 16, up to three highmost
2975 bits might be clobbered. This is typically used when combined with some
2976 kind of sign or zero extension. */
2977 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2979 { 0, { 0 }, 0 }, // 0
2980 { 1, { 1 }, LSHR_CLOBBERS_T },
2981 { 1, { 2 }, 0 },
2982 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2983 { 2, { 2, 2 }, 0 }, // 4
2984 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2985 { 2, { 8, -2 }, 0 },
2986 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2987 { 1, { 8 }, 0 }, // 8
2988 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2989 { 2, { 8, 2 }, 0 },
2990 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2991 { 3, { 8, 2, 2 }, 0 }, // 12
2992 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2993 { 2, { 16, -2 }, 0 },
2994 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2995 { 1, { 16 }, 0 }, // 16
2996 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2997 { 2, { 16, 2 }, 0 },
2998 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2999 { 3, { 16, 2, 2 }, 0 }, // 20
3000 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3001 { 3, { 16, -2, 8 }, 0 },
3002 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3003 { 2, { 16, 8 }, 0 }, // 24
3004 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3005 { 3, { 16, 8, 2 }, 0 },
3006 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3007 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3008 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3009 { 3, { 16, -2, 16 }, 0 },
3010 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3013 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3014 will clobber the T bit. */
3015 bool
3016 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3018 gcc_assert (CONST_INT_P (shift_amount));
3020 const int shift_amount_i = INTVAL (shift_amount) & 31;
3022 /* Special case for shift count of 31: use and-rotl sequence. */
3023 if (shift_amount_i == 31)
3024 return true;
3026 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3027 & ASHL_CLOBBERS_T) != 0;
3030 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3031 instructions will clobber the T bit. */
3032 bool
3033 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3035 gcc_assert (CONST_INT_P (shift_amount));
3037 /* For right shifts the constant might be negative. */
3038 const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31;
3040 /* Special case for shift count of 31: use shll-movt sequence. */
3041 if (shift_amount_i == 31)
3042 return true;
3044 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3045 & LSHR_CLOBBERS_T) != 0;
3048 /* Return true if it is potentially beneficial to use a dynamic shift
3049 instruction (shad / shar) instead of a combination of 1/2/8/16
3050 shift instructions for the specified shift count.
3051 If dynamic shifts are not available, always return false. */
3052 bool
3053 sh_dynamicalize_shift_p (rtx count)
3055 gcc_assert (CONST_INT_P (count));
3057 /* For right shifts the constant might be negative. */
3058 const int shift_amount_i = std::abs (INTVAL (count)) & 31;
3059 int insn_count;
3061 /* For left and right shifts, there are shorter 2 insn sequences for
3062 shift amounts of 31. */
3063 if (shift_amount_i == 31)
3064 insn_count = 2;
3065 else
3066 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3068 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3071 /* Assuming we have a value that has been sign-extended by at least one bit,
3072 can we use the ext_shift_amounts with the last shift turned to an
3073 arithmetic shift to shift it by N without data loss, and quicker than by
3074 other means? */
3075 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3077 /* Return the cost of a shift. */
3078 static inline int
3079 shiftcosts (rtx x)
3081 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3083 if (GET_MODE (x) == DImode
3084 && CONST_INT_P (XEXP (x, 1))
3085 && INTVAL (XEXP (x, 1)) == 1)
3086 return 2;
3088 /* Everything else is invalid, because there is no pattern for it. */
3089 return -1;
3091 /* If shift by a non constant, then this will be expensive. */
3092 if (!CONST_INT_P (XEXP (x, 1)))
3093 return SH_DYNAMIC_SHIFT_COST;
3095 /* Otherwise, return the true cost in instructions. Cope with out of range
3096 shift counts more or less arbitrarily. */
3097 int value = INTVAL (XEXP (x, 1)) & 31;
3099 if (GET_CODE (x) == ASHIFTRT)
3101 int cost = ashiftrt_insns[value];
3102 /* If dynamic shifts are available and profitable in this case, then we
3103 put the constant in a reg and use shad. */
3104 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3105 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3106 return cost;
3108 else
3109 return ashl_lshr_seq[value].insn_count;
3112 /* Return the cost of an AND/XOR/IOR operation. */
3113 static inline int
3114 and_xor_ior_costs (rtx x, int code)
3116 /* On SH1-4 we have only max. SImode operations.
3117 Double the cost for modes > SImode. */
3118 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3120 /* A logical operation with two registers is a single cycle
3121 instruction. */
3122 if (!CONST_INT_P (XEXP (x, 1)))
3123 return 1 * cost_scale;
3125 int i = INTVAL (XEXP (x, 1));
3127 /* These constants are single cycle extu.[bw] instructions. */
3128 if ((i == 0xff || i == 0xffff) && code == AND)
3129 return 1 * cost_scale;
3130 /* Constants that can be used in an instruction as an immediate are
3131 a single cycle, but this requires r0, so make it a little more
3132 expensive. */
3133 if (CONST_OK_FOR_K08 (i))
3134 return 2 * cost_scale;
3135 /* Constants that can be loaded with a mov immediate need one more cycle.
3136 This case is probably unnecessary. */
3137 if (CONST_OK_FOR_I08 (i))
3138 return 2 * cost_scale;
3139 /* Any other constant requires an additional 2 cycle pc-relative load.
3140 This case is probably unnecessary. */
3141 return 3 * cost_scale;
3144 /* Return the cost of an addition or a subtraction. */
3145 static inline int
3146 addsubcosts (rtx x)
3148 if (GET_MODE (x) == SImode)
3150 /* The addc or subc patterns will eventually become one or two
3151 instructions. Below are some costs for some of the patterns
3152 which combine would reject because the costs of the individual
3153 insns in the patterns are lower.
3155 FIXME: It would be much easier if we had something like insn cost
3156 attributes and the cost calculation machinery used those attributes
3157 in the first place. This would eliminate redundant recog-like C
3158 code to calculate costs of complex patterns. */
3159 rtx op0 = XEXP (x, 0);
3160 rtx op1 = XEXP (x, 1);
3162 if (GET_CODE (x) == PLUS)
3164 if (GET_CODE (op0) == AND
3165 && XEXP (op0, 1) == const1_rtx
3166 && (GET_CODE (op1) == PLUS
3167 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3168 return 1;
3170 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3171 && GET_CODE (op1) == LSHIFTRT
3172 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3173 return 1;
3175 /* Let's assume that adding the result of an insns that stores into
3176 the T bit is cheap. */
3177 if (treg_set_expr (op1, SImode))
3178 return 1;
3179 if (treg_set_expr (op0, SImode))
3180 return 1;
3183 /* On SH1-4 we have only max. SImode operations.
3184 Double the cost for modes > SImode. */
3185 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3187 /* Adding a register is a single cycle insn. */
3188 if (REG_P (XEXP (x, 1))
3189 || GET_CODE (XEXP (x, 1)) == SUBREG)
3190 return 1 * cost_scale;
3192 /* Likewise for small constants. */
3193 if (CONST_INT_P (XEXP (x, 1))
3194 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3195 return 1 * cost_scale;
3197 /* Any other constant requires a 2 cycle pc-relative load plus an
3198 addition. */
3199 return 3 * cost_scale;
3202 /* Return the cost of a multiply. */
3203 static inline int
3204 multcosts (rtx x ATTRIBUTE_UNUSED)
3206 if (sh_multcost >= 0)
3207 return sh_multcost;
3209 if (TARGET_SH2)
3211 /* We have a mul insn, so we can never take more than the mul and the
3212 read of the mac reg, but count more because of the latency and extra
3213 reg usage. */
3214 if (optimize_size)
3215 return 2;
3216 return 3;
3219 /* If we're aiming at small code, then just count the number of
3220 insns in a multiply call sequence. */
3221 if (optimize_size)
3222 return 5;
3224 /* Otherwise count all the insns in the routine we'd be calling too. */
3225 return 20;
3228 /* Compute a (partial) cost for rtx X. Return true if the complete
3229 cost has been computed, and false if subexpressions should be
3230 scanned. In either case, *TOTAL contains the cost result. */
3231 static bool
3232 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
3233 int opno ATTRIBUTE_UNUSED,
3234 int *total, bool speed)
3236 int code = GET_CODE (x);
3238 switch (code)
3240 /* The lower-subreg pass decides whether to split multi-word regs
3241 into individual regs by looking at the cost for a SET of certain
3242 modes with the following patterns:
3243 (set (reg) (reg))
3244 (set (reg) (const_int 0))
3245 On machines that support vector-move operations a multi-word move
3246 is the same cost as individual reg move. On SH there is no
3247 vector-move, so we have to provide the correct cost in the number
3248 of move insns to load/store the reg of the mode in question. */
3249 case SET:
3250 if (sh_movt_set_dest (x) != NULL || sh_movrt_set_dest (x) != NULL)
3252 *total = COSTS_N_INSNS (1);
3253 return true;
3256 if (register_operand (SET_DEST (x), VOIDmode)
3257 && (register_operand (SET_SRC (x), VOIDmode)
3258 || satisfies_constraint_Z (SET_SRC (x))))
3260 const machine_mode mode = GET_MODE (SET_DEST (x));
3261 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3262 / mov_insn_size (mode, TARGET_SH2A));
3263 return true;
3265 return false;
3267 /* The cost of a mem access is mainly the cost of the address mode on top
3268 of the cost of the load/store insn itself. */
3269 case MEM:
3270 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3271 speed)
3272 + COSTS_N_INSNS (1);
3273 return true;
3275 case IF_THEN_ELSE:
3276 /* This case is required for the if_then_else negc pattern. */
3277 if (treg_set_expr (XEXP (x, 0), SImode))
3279 *total = COSTS_N_INSNS (1);
3280 return true;
3282 else
3283 return false;
3285 /* Zero extracts of single bits are usually combine patterns for the
3286 tst insns. */
3287 case ZERO_EXTRACT:
3288 if (GET_CODE (XEXP (x, 0)) == XOR
3289 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3290 && XEXP (x, 1) == const1_rtx
3291 && CONST_INT_P (XEXP (x, 2))
3292 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3293 /* Check that the xor constaint overlaps with the extracted bit. */
3294 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3296 *total = 1; //COSTS_N_INSNS (1);
3297 return true;
3300 /* div0s variant. */
3301 if (GET_CODE (XEXP (x, 0)) == XOR
3302 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3303 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3305 *total = 1;
3306 return true;
3308 return false;
3310 /* The cost of a sign or zero extend depends on whether the source is a
3311 reg or a mem. In case of a mem take the address into account. */
3312 case SIGN_EXTEND:
3313 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3315 *total = COSTS_N_INSNS (1);
3316 return true;
3318 if (MEM_P (XEXP (x, 0)))
3320 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3321 GET_MODE (XEXP (x, 0)),
3322 MEM_ADDR_SPACE (XEXP (x, 0)), speed)
3323 + COSTS_N_INSNS (1);
3324 return true;
3326 return false;
3328 case ZERO_EXTEND:
3329 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3331 *total = COSTS_N_INSNS (1);
3332 return true;
3334 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3335 && (GET_MODE (XEXP (x, 0)) == QImode
3336 || GET_MODE (XEXP (x, 0)) == HImode))
3338 /* Handle SH2A's movu.b and movu.w insn. */
3339 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3340 GET_MODE (XEXP (x, 0)),
3341 MEM_ADDR_SPACE (XEXP (x, 0)), speed)
3342 + COSTS_N_INSNS (1);
3343 return true;
3345 return false;
3347 /* mems for SFmode and DFmode can be inside a parallel due to
3348 the way the fpscr is handled. */
3349 case PARALLEL:
3350 for (int i = 0; i < XVECLEN (x, 0); i++)
3352 rtx xx = XVECEXP (x, 0, i);
3353 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3355 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3356 GET_MODE (XEXP (xx, 0)),
3357 MEM_ADDR_SPACE (XEXP (xx, 0)), speed);
3358 + COSTS_N_INSNS (1);
3359 return true;
3361 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3363 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3364 GET_MODE (XEXP (xx, 1)),
3365 MEM_ADDR_SPACE (XEXP (xx, 1)), speed);
3366 + COSTS_N_INSNS (1);
3367 return true;
3371 if (sh_1el_vec (x, VOIDmode))
3372 *total = outer_code != SET;
3373 else if (sh_rep_vec (x, VOIDmode))
3374 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3375 + (outer_code != SET));
3376 else
3377 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3378 return true;
3380 case CONST_INT:
3381 if (CONST_OK_FOR_I08 (INTVAL (x)))
3382 *total = 0;
3383 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3384 && CONST_OK_FOR_K08 (INTVAL (x)))
3385 *total = 1;
3386 /* prepare_cmp_insn will force costly constants int registers before
3387 the cbranch[sd]i4 patterns can see them, so preserve potentially
3388 interesting ones not covered by I08 above. */
3389 else if (outer_code == COMPARE
3390 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3391 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3392 || INTVAL (x) == 0x7fffffff
3393 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3394 *total = 1;
3395 else
3396 *total = 8;
3397 return true;
3399 case EQ:
3400 /* An and with a constant compared against zero is
3401 most likely going to be a TST #imm, R0 instruction. */
3402 if (XEXP (x, 1) == const0_rtx
3403 && ((GET_CODE (XEXP (x, 0)) == AND
3404 || (SUBREG_P (XEXP (x, 0))
3405 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))
3406 || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT))
3408 *total = 1;
3409 return true;
3412 else if (XEXP (x, 1) == const0_rtx
3413 && GET_CODE (XEXP (x, 0)) == AND
3414 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3415 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3416 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3417 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3419 *total = 1;
3420 return true;
3422 else
3423 return false;
3425 case SMIN:
3426 case SMAX:
3427 /* This is most likely a clips.b or clips.w insn that is being made up
3428 by combine. */
3429 if (TARGET_SH2A
3430 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3431 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3432 && REG_P (XEXP (XEXP (x, 0), 0))
3433 && CONST_INT_P (XEXP (x, 1)))
3435 *total = COSTS_N_INSNS (1);
3436 return true;
3438 else
3439 return false;
3441 case CONST:
3442 case LABEL_REF:
3443 case SYMBOL_REF:
3444 *total = 5;
3445 return true;
3447 case CONST_DOUBLE:
3448 /* prepare_cmp_insn will force costly constants int registers before
3449 the cbranchdi4 pattern can see them, so preserve potentially
3450 interesting ones. */
3451 if (outer_code == COMPARE && GET_MODE (x) == DImode)
3452 *total = 1;
3453 else
3454 *total = 10;
3455 return true;
3457 case CONST_VECTOR:
3458 /* FIXME: This looks broken. Only the last statement has any effect.
3459 Probably this could be folded with the PARALLEL case? */
3460 if (x == CONST0_RTX (GET_MODE (x)))
3461 *total = 0;
3462 else if (sh_1el_vec (x, VOIDmode))
3463 *total = outer_code != SET;
3464 if (sh_rep_vec (x, VOIDmode))
3465 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3466 + (outer_code != SET));
3467 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3468 return true;
3470 case PLUS:
3471 case MINUS:
3472 *total = COSTS_N_INSNS (addsubcosts (x));
3473 return true;
3475 case AND:
3476 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3477 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3479 *total = COSTS_N_INSNS (1);
3480 return true;
3482 /* Fall through. */
3484 case XOR:
3485 case IOR:
3486 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3487 return true;
3489 case MULT:
3490 *total = COSTS_N_INSNS (multcosts (x));
3491 return true;
3493 case LT:
3494 case GE:
3495 /* div0s sign comparison. */
3496 if (GET_CODE (XEXP (x, 0)) == XOR
3497 && REG_P ((XEXP (XEXP (x, 0), 0)))
3498 && REG_P ((XEXP (XEXP (x, 0), 1)))
3499 && satisfies_constraint_Z (XEXP (x, 1)))
3501 *total = COSTS_N_INSNS (1);
3502 return true;
3504 else
3505 return false;
3507 case LSHIFTRT:
3508 /* div0s sign comparison. */
3509 if (GET_CODE (XEXP (x, 0)) == XOR
3510 && REG_P ((XEXP (XEXP (x, 0), 0)))
3511 && REG_P ((XEXP (XEXP (x, 0), 1)))
3512 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3514 *total = COSTS_N_INSNS (1);
3515 return true;
3517 /* FALLTHRU */
3518 case ASHIFT:
3519 case ASHIFTRT:
3521 int cost = shiftcosts (x);
3522 if (cost < 0)
3523 return false;
3524 *total = COSTS_N_INSNS (cost);
3525 return true;
3528 case DIV:
3529 case UDIV:
3530 case MOD:
3531 case UMOD:
3532 *total = COSTS_N_INSNS (20);
3533 return true;
3535 case FLOAT:
3536 case FIX:
3537 *total = 100;
3538 return true;
3540 default:
3541 return false;
3545 /* Determine the size of the fundamental move insn that will be used
3546 for the specified mode. */
3547 static inline int
3548 mov_insn_size (machine_mode mode, bool consider_sh2a)
3550 const int mode_sz = GET_MODE_SIZE (mode);
3552 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3553 || (TARGET_FMOVD && mode == DFmode))
3554 return mode_sz;
3555 else
3557 /* The max. available mode for actual move insns is SImode.
3558 Larger accesses will be split into multiple loads/stores. */
3559 const int max_mov_sz = GET_MODE_SIZE (SImode);
3560 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3564 /* Determine the maximum possible displacement for a move insn for the
3565 specified mode. */
3567 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3569 /* The 4 byte displacement move insns are the same as the 2 byte
3570 versions but take a 12 bit displacement. All we need to do is to
3571 scale the max. displacement value accordingly. */
3572 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3574 /* SH2A supports FPU move insns with 12 bit displacements.
3575 Other variants to do not support any kind of displacements for
3576 FPU move insns. */
3577 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3578 return 0;
3579 else
3581 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3582 const int mode_sz = GET_MODE_SIZE (mode);
3583 int r = 15 * mov_insn_sz * disp_scale;
3585 /* If the mov insn will be split into multiple loads/stores, the
3586 maximum possible displacement is a bit smaller. */
3587 if (mode_sz > mov_insn_sz)
3588 r -= mode_sz - mov_insn_sz;
3589 return r;
3593 /* Determine the alignment mask for a move insn of the
3594 specified mode. */
3595 static inline int
3596 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3598 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3599 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3602 /* Return the displacement value of a displacement address. */
3603 HOST_WIDE_INT
3604 sh_disp_addr_displacement (rtx x)
3606 gcc_assert (satisfies_constraint_Sdd (x));
3607 return INTVAL (XEXP (XEXP (x, 0), 1));
3610 /* Compute the cost of an address. */
3611 static int
3612 sh_address_cost (rtx x, machine_mode mode,
3613 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3615 /* 'GBR + 0'. Account one more because of R0 restriction. */
3616 if (REG_P (x) && REGNO (x) == GBR_REG)
3617 return 2;
3619 /* Simple reg, post-inc, pre-dec addressing. */
3620 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3621 return 1;
3623 /* 'reg + disp' addressing. */
3624 if (GET_CODE (x) == PLUS
3625 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3627 /* 'GBR + disp'. Account one more because of R0 restriction. */
3628 if (REGNO (XEXP (x, 0)) == GBR_REG
3629 && gbr_displacement (XEXP (x, 1), mode))
3630 return 2;
3632 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3634 if (offset == 0)
3635 return 1;
3637 /* The displacement would fit into a 2 byte move insn.
3638 HImode and QImode loads/stores with displacement put pressure on
3639 R0 which will most likely require another reg copy. Thus account
3640 a higher cost for that. */
3641 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3642 return (mode == HImode || mode == QImode) ? 2 : 1;
3644 /* The displacement would fit into a 4 byte move insn (SH2A). */
3645 if (TARGET_SH2A
3646 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3647 return 2;
3649 /* The displacement is probably out of range and will require extra
3650 calculations. */
3651 return 3;
3654 /* 'reg + reg' addressing. Account a slightly higher cost because of
3655 increased pressure on R0. */
3656 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1)))
3657 return 3;
3659 /* Not sure what it is - probably expensive. */
3660 return 10;
3663 /* Code to expand a shift. */
3664 static void
3665 gen_ashift (int type, int n, rtx reg)
3667 rtx n_rtx;
3669 /* Negative values here come from the shift_amounts array. */
3670 if (n < 0)
3672 if (type == ASHIFT)
3673 type = LSHIFTRT;
3674 else
3675 type = ASHIFT;
3676 n = -n;
3679 n_rtx = GEN_INT (n);
3680 gcc_assert (satisfies_constraint_P27 (n_rtx));
3682 switch (type)
3684 case ASHIFTRT:
3685 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3686 break;
3687 case LSHIFTRT:
3688 if (n == 1)
3689 emit_insn (gen_shlr (reg, reg));
3690 else
3691 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3692 break;
3693 case ASHIFT:
3694 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3695 break;
3696 default:
3697 gcc_unreachable ();
3701 /* Code to expand a HImode shift. */
3702 static void
3703 gen_ashift_hi (int type, int n, rtx reg)
3705 /* Negative values here come from the shift_amounts array. */
3706 if (n < 0)
3708 if (type == ASHIFT)
3709 type = LSHIFTRT;
3710 else
3711 type = ASHIFT;
3712 n = -n;
3715 switch (type)
3717 case ASHIFTRT:
3718 case LSHIFTRT:
3719 /* We don't have HImode right shift operations because using the
3720 ordinary 32 bit shift instructions for that doesn't generate proper
3721 zero/sign extension.
3722 gen_ashift_hi is only called in contexts where we know that the
3723 sign extension works out correctly. */
3725 int offset = 0;
3726 if (GET_CODE (reg) == SUBREG)
3728 offset = SUBREG_BYTE (reg);
3729 reg = SUBREG_REG (reg);
3731 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3732 break;
3734 case ASHIFT:
3735 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3736 break;
3740 /* Output RTL to split a constant shift into its component SH constant
3741 shift instructions. */
3742 void
3743 gen_shifty_op (int code, rtx *operands)
3745 int value = INTVAL (operands[2]);
3746 int max, i;
3748 /* Truncate the shift count in case it is out of bounds. */
3749 value = value & 31;
3751 if (value == 31)
3753 if (code == LSHIFTRT)
3755 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3756 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3757 return;
3759 else if (code == ASHIFT)
3761 /* There is a two instruction sequence for 31 bit left shifts,
3762 but it requires r0. */
3763 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3765 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3766 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3767 return;
3771 else if (value == 0)
3773 /* This can happen even when optimizing, if there were subregs before
3774 reload. Don't output a nop here, as this is never optimized away;
3775 use a no-op move instead. */
3776 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3777 return;
3780 max = ashl_lshr_seq[value].insn_count;
3781 for (i = 0; i < max; i++)
3782 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3785 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3786 don't matter. */
3787 void
3788 gen_shifty_hi_op (int code, rtx *operands)
3790 int value = INTVAL (operands[2]);
3791 int max, i;
3792 void (*gen_fun) (int, int, rtx);
3794 /* This operation is used by and_shl for SImode values with a few
3795 high bits known to be cleared. */
3796 value &= 31;
3797 if (value == 0)
3799 emit_insn (gen_nop ());
3800 return;
3803 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3804 if (code == ASHIFT)
3806 max = ext_ashl_lshr_seq[value].insn_count;
3807 for (i = 0; i < max; i++)
3808 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3810 else
3811 /* When shifting right, emit the shifts in reverse order, so that
3812 solitary negative values come first. */
3813 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3814 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3817 /* Output RTL for an arithmetic right shift.
3818 ??? Rewrite to use super-optimizer sequences. */
3819 bool
3820 expand_ashiftrt (rtx *operands)
3822 rtx wrk;
3823 char func[18];
3824 int value;
3826 if (TARGET_DYNSHIFT)
3828 if (!CONST_INT_P (operands[2]))
3830 rtx count = copy_to_mode_reg (SImode, operands[2]);
3831 emit_insn (gen_negsi2 (count, count));
3832 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3833 return true;
3835 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3836 > 1 + SH_DYNAMIC_SHIFT_COST)
3838 rtx count
3839 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3840 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3841 return true;
3844 if (!CONST_INT_P (operands[2]))
3845 return false;
3847 value = INTVAL (operands[2]) & 31;
3849 if (value == 31)
3851 /* If we are called from abs expansion, arrange things so that we
3852 we can use a single MT instruction that doesn't clobber the source,
3853 if LICM can hoist out the load of the constant zero. */
3854 if (currently_expanding_to_rtl)
3856 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3857 operands[1]));
3858 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3859 return true;
3861 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3862 return true;
3864 else if (value >= 16 && value <= 19)
3866 wrk = gen_reg_rtx (SImode);
3867 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3868 value -= 16;
3869 while (value--)
3870 gen_ashift (ASHIFTRT, 1, wrk);
3871 emit_move_insn (operands[0], wrk);
3872 return true;
3874 /* Expand a short sequence inline, longer call a magic routine. */
3875 else if (value <= 5)
3877 wrk = gen_reg_rtx (SImode);
3878 emit_move_insn (wrk, operands[1]);
3879 while (value--)
3880 gen_ashift (ASHIFTRT, 1, wrk);
3881 emit_move_insn (operands[0], wrk);
3882 return true;
3885 wrk = gen_reg_rtx (Pmode);
3887 /* Load the value into an arg reg and call a helper. */
3888 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3889 sprintf (func, "__ashiftrt_r4_%d", value);
3890 rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
3891 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
3892 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3893 return true;
3896 /* Try to find a good way to implement the combiner pattern
3897 [(set (match_operand:SI 0 "register_operand" "r")
3898 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3899 (match_operand:SI 2 "const_int_operand" "n"))
3900 (match_operand:SI 3 "const_int_operand" "n"))) .
3901 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3902 return 0 for simple right / left or left/right shift combination.
3903 return 1 for a combination of shifts with zero_extend.
3904 return 2 for a combination of shifts with an AND that needs r0.
3905 return 3 for a combination of shifts with an AND that needs an extra
3906 scratch register, when the three highmost bits of the AND mask are clear.
3907 return 4 for a combination of shifts with an AND that needs an extra
3908 scratch register, when any of the three highmost bits of the AND mask
3909 is set.
3910 If ATTRP is set, store an initial right shift width in ATTRP[0],
3911 and the instruction length in ATTRP[1] . These values are not valid
3912 when returning 0.
3913 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3914 shift_amounts for the last shift value that is to be used before the
3915 sign extend. */
3917 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3919 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3920 int left = INTVAL (left_rtx), right;
3921 int best = 0;
3922 int cost, best_cost = 10000;
3923 int best_right = 0, best_len = 0;
3924 int i;
3925 int can_ext;
3927 if (left < 0 || left > 31)
3928 return 0;
3929 if (CONST_INT_P (mask_rtx))
3930 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3931 else
3932 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3933 /* Can this be expressed as a right shift / left shift pair? */
3934 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3935 right = exact_log2 (lsb);
3936 mask2 = ~(mask + lsb - 1);
3937 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3938 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3939 if (! mask2)
3940 best_cost = ashl_lshr_seq[right].insn_count
3941 + ashl_lshr_seq[right + left].insn_count;
3942 /* mask has no trailing zeroes <==> ! right */
3943 else if (! right && mask2 == ~(lsb2 - 1))
3945 int late_right = exact_log2 (lsb2);
3946 best_cost = ashl_lshr_seq[left + late_right].insn_count
3947 + ashl_lshr_seq[late_right].insn_count;
3949 /* Try to use zero extend. */
3950 if (mask2 == ~(lsb2 - 1))
3952 int width, first;
3954 for (width = 8; width <= 16; width += 8)
3956 /* Can we zero-extend right away? */
3957 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3959 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3960 + ext_ashl_lshr_seq[left + right].insn_count;
3961 if (cost < best_cost)
3963 best = 1;
3964 best_cost = cost;
3965 best_right = right;
3966 best_len = cost;
3967 if (attrp)
3968 attrp[2] = -1;
3970 continue;
3972 /* ??? Could try to put zero extend into initial right shift,
3973 or even shift a bit left before the right shift. */
3974 /* Determine value of first part of left shift, to get to the
3975 zero extend cut-off point. */
3976 first = width - exact_log2 (lsb2) + right;
3977 if (first >= 0 && right + left - first >= 0)
3979 cost = ext_ashl_lshr_seq[right].insn_count
3980 + ext_ashl_lshr_seq[first].insn_count + 1
3981 + ext_ashl_lshr_seq[right + left - first].insn_count;
3983 if (cost < best_cost)
3985 best = 1;
3986 best_cost = cost;
3987 best_right = right;
3988 best_len = cost;
3989 if (attrp)
3990 attrp[2] = first;
3995 /* Try to use r0 AND pattern */
3996 for (i = 0; i <= 2; i++)
3998 if (i > right)
3999 break;
4000 if (! CONST_OK_FOR_K08 (mask >> i))
4001 continue;
4002 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4003 if (cost < best_cost)
4005 best = 2;
4006 best_cost = cost;
4007 best_right = i;
4008 best_len = cost - 1;
4011 /* Try to use a scratch register to hold the AND operand. */
4012 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4013 for (i = 0; i <= 2; i++)
4015 if (i > right)
4016 break;
4017 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4018 + (can_ext
4019 ? ext_ashl_lshr_seq
4020 : ashl_lshr_seq)[left + i].insn_count;
4021 if (cost < best_cost)
4023 best = 4 - can_ext;
4024 best_cost = cost;
4025 best_right = i;
4026 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4030 if (attrp)
4032 attrp[0] = best_right;
4033 attrp[1] = best_len;
4035 return best;
4038 /* This is used in length attributes of the unnamed instructions
4039 corresponding to shl_and_kind return values of 1 and 2. */
4041 shl_and_length (rtx insn)
4043 rtx set_src, left_rtx, mask_rtx;
4044 int attributes[3];
4046 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4047 left_rtx = XEXP (XEXP (set_src, 0), 1);
4048 mask_rtx = XEXP (set_src, 1);
4049 shl_and_kind (left_rtx, mask_rtx, attributes);
4050 return attributes[1];
4053 /* This is used in length attribute of the and_shl_scratch instruction. */
4055 shl_and_scr_length (rtx insn)
4057 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4058 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4059 rtx op = XEXP (set_src, 0);
4060 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4061 op = XEXP (XEXP (op, 0), 0);
4062 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4065 /* Generate rtl for instructions for which shl_and_kind advised a particular
4066 method of generating them, i.e. returned zero. */
4067 bool
4068 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4070 int attributes[3];
4071 unsigned HOST_WIDE_INT mask;
4072 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4073 int right, total_shift;
4074 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4076 right = attributes[0];
4077 total_shift = INTVAL (left_rtx) + right;
4078 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4079 switch (kind)
4081 default:
4082 return true;
4083 case 1:
4085 int first = attributes[2];
4086 rtx operands[3];
4088 if (first < 0)
4090 emit_insn ((mask << right) <= 0xff
4091 ? gen_zero_extendqisi2 (dest,
4092 gen_lowpart (QImode, source))
4093 : gen_zero_extendhisi2 (dest,
4094 gen_lowpart (HImode, source)));
4095 source = dest;
4097 if (source != dest)
4098 emit_insn (gen_movsi (dest, source));
4099 operands[0] = dest;
4100 if (right)
4102 operands[2] = GEN_INT (right);
4103 gen_shifty_hi_op (LSHIFTRT, operands);
4105 if (first > 0)
4107 operands[2] = GEN_INT (first);
4108 gen_shifty_hi_op (ASHIFT, operands);
4109 total_shift -= first;
4110 mask <<= first;
4112 if (first >= 0)
4113 emit_insn (mask <= 0xff
4114 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4115 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4116 if (total_shift > 0)
4118 operands[2] = GEN_INT (total_shift);
4119 gen_shifty_hi_op (ASHIFT, operands);
4121 break;
4123 case 4:
4124 shift_gen_fun = gen_shifty_op;
4125 /* FALLTHRU */
4126 case 3:
4127 /* If the topmost bit that matters is set, set the topmost bits
4128 that don't matter. This way, we might be able to get a shorter
4129 signed constant. */
4130 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4131 mask |= (HOST_WIDE_INT) ((HOST_WIDE_INT_M1U) << (31 - total_shift));
4132 /* FALLTHRU */
4133 case 2:
4134 /* Don't expand fine-grained when combining, because that will
4135 make the pattern fail. */
4136 if (currently_expanding_to_rtl
4137 || reload_in_progress || reload_completed)
4139 rtx operands[3];
4141 /* Cases 3 and 4 should be handled by this split
4142 only while combining */
4143 gcc_assert (kind <= 2);
4144 if (right)
4146 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4147 source = dest;
4149 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4150 if (total_shift)
4152 operands[0] = dest;
4153 operands[1] = dest;
4154 operands[2] = GEN_INT (total_shift);
4155 shift_gen_fun (ASHIFT, operands);
4157 break;
4159 else
4161 int neg = 0;
4162 if (kind != 4 && total_shift < 16)
4164 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4165 if (neg > 0)
4166 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4167 else
4168 neg = 0;
4170 emit_insn (gen_and_shl_scratch (dest, source,
4171 GEN_INT (right),
4172 GEN_INT (mask),
4173 GEN_INT (total_shift + neg),
4174 GEN_INT (neg)));
4175 emit_insn (gen_movsi (dest, dest));
4176 break;
4179 return false;
4182 /* Try to find a good way to implement the combiner pattern
4183 [(set (match_operand:SI 0 "register_operand" "=r")
4184 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4185 (match_operand:SI 2 "const_int_operand" "n")
4186 (match_operand:SI 3 "const_int_operand" "n")
4187 (const_int 0)))
4188 (clobber (reg:SI T_REG))]
4189 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4190 return 0 for simple left / right shift combination.
4191 return 1 for left shift / 8 bit sign extend / left shift.
4192 return 2 for left shift / 16 bit sign extend / left shift.
4193 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4194 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4195 return 5 for left shift / 16 bit sign extend / right shift
4196 return 6 for < 8 bit sign extend / left shift.
4197 return 7 for < 8 bit sign extend / left shift / single right shift.
4198 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4200 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4202 int left, size, insize, ext;
4203 int cost = 0, best_cost;
4204 int kind;
4206 left = INTVAL (left_rtx);
4207 size = INTVAL (size_rtx);
4208 insize = size - left;
4209 gcc_assert (insize > 0);
4210 /* Default to left / right shift. */
4211 kind = 0;
4212 best_cost = ashl_lshr_seq[32 - insize].insn_count
4213 + ashl_lshr_seq[32 - size].insn_count;
4214 if (size <= 16)
4216 /* 16 bit shift / sign extend / 16 bit shift */
4217 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4218 + ashl_lshr_seq[16 - size].insn_count;
4219 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4220 below, by alternative 3 or something even better. */
4221 if (cost < best_cost)
4223 kind = 5;
4224 best_cost = cost;
4227 /* Try a plain sign extend between two shifts. */
4228 for (ext = 16; ext >= insize; ext -= 8)
4230 if (ext <= size)
4232 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4233 + ashl_lshr_seq[size - ext].insn_count;
4234 if (cost < best_cost)
4236 kind = ext / (unsigned) 8;
4237 best_cost = cost;
4240 /* Check if we can do a sloppy shift with a final signed shift
4241 restoring the sign. */
4242 if (EXT_SHIFT_SIGNED (size - ext))
4243 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4244 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4245 /* If not, maybe it's still cheaper to do the second shift sloppy,
4246 and do a final sign extend? */
4247 else if (size <= 16)
4248 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4249 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4250 + 1;
4251 else
4252 continue;
4253 if (cost < best_cost)
4255 kind = ext / (unsigned) 8 + 2;
4256 best_cost = cost;
4259 /* Check if we can sign extend in r0 */
4260 if (insize < 8)
4262 cost = 3 + ashl_lshr_seq[left].insn_count;
4263 if (cost < best_cost)
4265 kind = 6;
4266 best_cost = cost;
4268 /* Try the same with a final signed shift. */
4269 if (left < 31)
4271 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4272 if (cost < best_cost)
4274 kind = 7;
4275 best_cost = cost;
4279 if (TARGET_DYNSHIFT)
4281 /* Try to use a dynamic shift. */
4282 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4283 if (cost < best_cost)
4285 kind = 0;
4286 best_cost = cost;
4289 if (costp)
4290 *costp = cost;
4291 return kind;
4294 /* Function to be used in the length attribute of the instructions
4295 implementing this pattern. */
4297 shl_sext_length (rtx insn)
4299 rtx set_src, left_rtx, size_rtx;
4300 int cost;
4302 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4303 left_rtx = XEXP (XEXP (set_src, 0), 1);
4304 size_rtx = XEXP (set_src, 1);
4305 shl_sext_kind (left_rtx, size_rtx, &cost);
4306 return cost;
4309 /* Generate rtl for this pattern */
4310 bool
4311 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4313 int kind;
4314 int left, size, insize, cost;
4315 rtx operands[3];
4317 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4318 left = INTVAL (left_rtx);
4319 size = INTVAL (size_rtx);
4320 insize = size - left;
4321 switch (kind)
4323 case 1:
4324 case 2:
4325 case 3:
4326 case 4:
4328 int ext = kind & 1 ? 8 : 16;
4329 int shift2 = size - ext;
4331 /* Don't expand fine-grained when combining, because that will
4332 make the pattern fail. */
4333 if (! currently_expanding_to_rtl
4334 && ! reload_in_progress && ! reload_completed)
4336 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4337 emit_insn (gen_movsi (dest, source));
4338 break;
4340 if (dest != source)
4341 emit_insn (gen_movsi (dest, source));
4342 operands[0] = dest;
4343 if (ext - insize)
4345 operands[2] = GEN_INT (ext - insize);
4346 gen_shifty_hi_op (ASHIFT, operands);
4348 emit_insn (kind & 1
4349 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4350 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4351 if (kind <= 2)
4353 if (shift2)
4355 operands[2] = GEN_INT (shift2);
4356 gen_shifty_op (ASHIFT, operands);
4359 else
4361 if (shift2 > 0)
4363 if (EXT_SHIFT_SIGNED (shift2))
4365 operands[2] = GEN_INT (shift2 + 1);
4366 gen_shifty_op (ASHIFT, operands);
4367 operands[2] = const1_rtx;
4368 gen_shifty_op (ASHIFTRT, operands);
4369 break;
4371 operands[2] = GEN_INT (shift2);
4372 gen_shifty_hi_op (ASHIFT, operands);
4374 else if (shift2)
4376 operands[2] = GEN_INT (-shift2);
4377 gen_shifty_hi_op (LSHIFTRT, operands);
4379 emit_insn (size <= 8
4380 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4381 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4383 break;
4385 case 5:
4387 int i = 16 - size;
4388 if (! currently_expanding_to_rtl
4389 && ! reload_in_progress && ! reload_completed)
4390 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4391 else
4393 operands[0] = dest;
4394 operands[2] = GEN_INT (16 - insize);
4395 gen_shifty_hi_op (ASHIFT, operands);
4396 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4398 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4399 while (--i >= 0)
4400 gen_ashift (ASHIFTRT, 1, dest);
4401 break;
4403 case 6:
4404 case 7:
4405 /* Don't expand fine-grained when combining, because that will
4406 make the pattern fail. */
4407 if (! currently_expanding_to_rtl
4408 && ! reload_in_progress && ! reload_completed)
4410 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4411 emit_insn (gen_movsi (dest, source));
4412 break;
4414 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4415 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4416 emit_insn (gen_addsi3 (dest, dest, GEN_INT (HOST_WIDE_INT_M1U << (insize - 1))));
4417 operands[0] = dest;
4418 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4419 gen_shifty_op (ASHIFT, operands);
4420 if (kind == 7)
4421 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4422 break;
4423 default:
4424 return true;
4426 return false;
4429 typedef struct label_ref_list_d
4431 rtx_code_label *label;
4432 struct label_ref_list_d *next;
4433 } *label_ref_list_t;
4435 static object_allocator<label_ref_list_d> label_ref_list_d_pool
4436 ("label references list");
4438 /* The SH cannot load a large constant into a register, constants have to
4439 come from a pc relative load. The reference of a pc relative load
4440 instruction must be less than 1k in front of the instruction. This
4441 means that we often have to dump a constant inside a function, and
4442 generate code to branch around it.
4444 It is important to minimize this, since the branches will slow things
4445 down and make things bigger.
4447 Worst case code looks like:
4449 mov.l L1,rn
4450 bra L2
4452 align
4453 L1: .long value
4457 mov.l L3,rn
4458 bra L4
4460 align
4461 L3: .long value
4465 We fix this by performing a scan before scheduling, which notices which
4466 instructions need to have their operands fetched from the constant table
4467 and builds the table.
4469 The algorithm is:
4471 scan, find an instruction which needs a pcrel move. Look forward, find the
4472 last barrier which is within MAX_COUNT bytes of the requirement.
4473 If there isn't one, make one. Process all the instructions between
4474 the find and the barrier.
4476 In the above example, we can tell that L3 is within 1k of L1, so
4477 the first move can be shrunk from the 3 insn+constant sequence into
4478 just 1 insn, and the constant moved to L3 to make:
4480 mov.l L1,rn
4482 mov.l L3,rn
4483 bra L4
4485 align
4486 L3:.long value
4487 L4:.long value
4489 Then the second move becomes the target for the shortening process. */
4491 typedef struct
4493 rtx value; /* Value in table. */
4494 rtx_code_label *label; /* Label of value. */
4495 label_ref_list_t wend; /* End of window. */
4496 machine_mode mode; /* Mode of value. */
4498 /* True if this constant is accessed as part of a post-increment
4499 sequence. Note that HImode constants are never accessed in this way. */
4500 bool part_of_sequence_p;
4501 } pool_node;
4503 /* The maximum number of constants that can fit into one pool, since
4504 constants in the range 0..510 are at least 2 bytes long, and in the
4505 range from there to 1018 at least 4 bytes. */
4507 #define MAX_POOL_SIZE 372
4508 static pool_node pool_vector[MAX_POOL_SIZE];
4509 static int pool_size;
4510 static rtx_code_label *pool_window_label;
4511 static int pool_window_last;
4513 static int max_labelno_before_reorg;
4515 /* ??? If we need a constant in HImode which is the truncated value of a
4516 constant we need in SImode, we could combine the two entries thus saving
4517 two bytes. Is this common enough to be worth the effort of implementing
4518 it? */
4520 /* ??? This stuff should be done at the same time that we shorten branches.
4521 As it is now, we must assume that all branches are the maximum size, and
4522 this causes us to almost always output constant pools sooner than
4523 necessary. */
4525 /* Add a constant to the pool and return its label. */
4526 static rtx_code_label *
4527 add_constant (rtx x, machine_mode mode, rtx last_value)
4529 rtx_code_label *lab, *new_rtx;
4530 label_ref_list_t ref, newref;
4532 /* First see if we've already got it. */
4533 for (int i = 0; i < pool_size; i++)
4535 if (x->code == pool_vector[i].value->code
4536 && mode == pool_vector[i].mode)
4538 if (x->code == CODE_LABEL)
4540 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4541 continue;
4543 if (rtx_equal_p (x, pool_vector[i].value))
4545 lab = new_rtx = 0;
4546 if (! last_value
4547 || ! i
4548 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4550 new_rtx = gen_label_rtx ();
4551 LABEL_REFS (new_rtx) = pool_vector[i].label;
4552 pool_vector[i].label = lab = new_rtx;
4554 if (lab && pool_window_label)
4556 newref = label_ref_list_d_pool.allocate ();
4557 newref->label = pool_window_label;
4558 ref = pool_vector[pool_window_last].wend;
4559 newref->next = ref;
4560 pool_vector[pool_window_last].wend = newref;
4562 if (new_rtx)
4563 pool_window_label = new_rtx;
4564 pool_window_last = i;
4565 return lab;
4570 /* Need a new one. */
4571 pool_vector[pool_size].value = x;
4572 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4574 lab = 0;
4575 pool_vector[pool_size - 1].part_of_sequence_p = true;
4577 else
4578 lab = gen_label_rtx ();
4579 pool_vector[pool_size].mode = mode;
4580 pool_vector[pool_size].label = lab;
4581 pool_vector[pool_size].wend = NULL;
4582 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4583 if (lab && pool_window_label)
4585 newref = label_ref_list_d_pool.allocate ();
4586 newref->label = pool_window_label;
4587 ref = pool_vector[pool_window_last].wend;
4588 newref->next = ref;
4589 pool_vector[pool_window_last].wend = newref;
4591 if (lab)
4592 pool_window_label = lab;
4593 pool_window_last = pool_size;
4594 pool_size++;
4595 return lab;
4598 /* Output the literal table. START, if nonzero, is the first instruction
4599 this table is needed for, and also indicates that there is at least one
4600 casesi_worker_2 instruction; We have to emit the operand3 labels from
4601 these insns at a 4-byte aligned position. BARRIER is the barrier
4602 after which we are to place the table. */
4603 static void
4604 dump_table (rtx_insn *start, rtx_insn *barrier)
4606 rtx_insn *scan = barrier;
4607 bool need_align = true;
4608 rtx_code_label *lab;
4609 label_ref_list_t ref;
4610 bool have_df = false;
4612 /* Do two passes, first time dump out the HI sized constants. */
4614 for (int i = 0; i < pool_size; i++)
4616 pool_node *p = &pool_vector[i];
4618 if (p->mode == HImode)
4620 if (need_align)
4622 scan = emit_insn_after (gen_align_2 (), scan);
4623 need_align = false;
4625 for (lab = p->label; lab;
4626 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4627 scan = emit_label_after (lab, scan);
4628 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4629 scan);
4630 for (ref = p->wend; ref; ref = ref->next)
4632 lab = ref->label;
4633 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4636 else if (p->mode == DFmode)
4637 have_df = true;
4640 need_align = true;
4642 if (start)
4644 scan = emit_insn_after (gen_align_4 (), scan);
4645 need_align = false;
4646 for (; start != barrier; start = NEXT_INSN (start))
4647 if (NONJUMP_INSN_P (start)
4648 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4650 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4651 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4653 scan = emit_label_after (as_a <rtx_insn *> (lab), scan);
4656 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4658 rtx_insn *align_insn = NULL;
4660 scan = emit_label_after (gen_label_rtx (), scan);
4661 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4662 need_align = false;
4664 for (int i = 0; i < pool_size; i++)
4666 pool_node *p = &pool_vector[i];
4668 switch (p->mode)
4670 case E_HImode:
4671 break;
4672 case E_SImode:
4673 case E_SFmode:
4674 if (align_insn && !p->part_of_sequence_p)
4676 for (lab = p->label; lab;
4677 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4678 emit_label_before (lab, align_insn);
4679 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4680 align_insn);
4681 for (ref = p->wend; ref; ref = ref->next)
4683 lab = ref->label;
4684 emit_insn_before (gen_consttable_window_end (lab),
4685 align_insn);
4687 delete_insn (align_insn);
4688 align_insn = NULL;
4689 continue;
4691 else
4693 for (lab = p->label; lab;
4694 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4695 scan = emit_label_after (lab, scan);
4696 scan = emit_insn_after (gen_consttable_4 (p->value,
4697 const0_rtx), scan);
4698 need_align = ! need_align;
4700 break;
4701 case E_DFmode:
4702 if (need_align)
4704 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4705 align_insn = scan;
4706 need_align = false;
4708 /* FALLTHRU */
4709 case E_DImode:
4710 for (lab = p->label; lab;
4711 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4712 scan = emit_label_after (lab, scan);
4713 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4714 scan);
4715 break;
4716 default:
4717 gcc_unreachable ();
4720 if (p->mode != HImode)
4722 for (ref = p->wend; ref; ref = ref->next)
4724 lab = ref->label;
4725 scan = emit_insn_after (gen_consttable_window_end (lab),
4726 scan);
4731 pool_size = 0;
4734 for (int i = 0; i < pool_size; i++)
4736 pool_node *p = &pool_vector[i];
4738 switch (p->mode)
4740 case E_HImode:
4741 break;
4742 case E_SImode:
4743 case E_SFmode:
4744 if (need_align)
4746 need_align = false;
4747 scan = emit_label_after (gen_label_rtx (), scan);
4748 scan = emit_insn_after (gen_align_4 (), scan);
4750 for (lab = p->label; lab;
4751 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4752 scan = emit_label_after (lab, scan);
4753 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4754 scan);
4755 break;
4756 case E_DFmode:
4757 case E_DImode:
4758 if (need_align)
4760 need_align = false;
4761 scan = emit_label_after (gen_label_rtx (), scan);
4762 scan = emit_insn_after (gen_align_4 (), scan);
4764 for (lab = p->label; lab;
4765 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4766 scan = emit_label_after (lab, scan);
4767 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4768 scan);
4769 break;
4770 default:
4771 gcc_unreachable ();
4774 if (p->mode != HImode)
4776 for (ref = p->wend; ref; ref = ref->next)
4778 lab = ref->label;
4779 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4784 scan = emit_insn_after (gen_consttable_end (), scan);
4785 scan = emit_barrier_after (scan);
4786 pool_size = 0;
4787 pool_window_label = NULL;
4788 pool_window_last = 0;
4791 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4793 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4795 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4796 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4797 need to fix it if the input value is CONST_OK_FOR_I08. */
4798 static bool
4799 broken_move (rtx_insn *insn)
4801 if (NONJUMP_INSN_P (insn))
4803 rtx pat = PATTERN (insn);
4804 if (GET_CODE (pat) == PARALLEL)
4805 pat = XVECEXP (pat, 0, 0);
4806 if (GET_CODE (pat) == SET
4807 /* We can load any 8-bit value if we don't care what the high
4808 order bits end up as. */
4809 && GET_MODE (SET_DEST (pat)) != QImode
4810 && (CONSTANT_P (SET_SRC (pat))
4811 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4812 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4813 /* Match mova_const. */
4814 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4815 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4816 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4817 && ! (TARGET_SH2E
4818 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4819 && (fp_zero_operand (SET_SRC (pat))
4820 || fp_one_operand (SET_SRC (pat)))
4821 /* In general we don't know the current setting of fpscr, so
4822 disable fldi.
4823 There is an exception if this was a register-register move
4824 before reload - and hence it was ascertained that we have
4825 single precision setting - and in a post-reload optimization
4826 we changed this to do a constant load. In that case
4827 we don't have an r0 clobber, hence we must use fldi. */
4828 && (TARGET_FMOVD
4829 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4830 == SCRATCH))
4831 && REG_P (SET_DEST (pat))
4832 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4833 && ! (TARGET_SH2A
4834 && GET_MODE (SET_DEST (pat)) == SImode
4835 && (satisfies_constraint_I20 (SET_SRC (pat))
4836 || satisfies_constraint_I28 (SET_SRC (pat))))
4837 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4838 return true;
4841 return false;
4844 /* Return true if the specified insn is a mova insn. */
4845 static bool
4846 mova_p (rtx_insn *insn)
4848 return (NONJUMP_INSN_P (insn)
4849 && GET_CODE (PATTERN (insn)) == SET
4850 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4851 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4852 /* Don't match mova_const. */
4853 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4856 /* Fix up a mova from a switch that went out of range. */
4857 static void
4858 fixup_mova (rtx_insn *mova)
4860 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4861 if (! flag_pic)
4863 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4864 INSN_CODE (mova) = -1;
4866 else
4868 rtx_insn *worker = mova;
4869 rtx_code_label *lab = gen_label_rtx ();
4870 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4874 worker = NEXT_INSN (worker);
4875 gcc_assert (worker
4876 && !LABEL_P (worker)
4877 && !JUMP_P (worker));
4878 } while (NOTE_P (worker)
4879 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4880 wpat = PATTERN (worker);
4881 wpat0 = XVECEXP (wpat, 0, 0);
4882 wpat1 = XVECEXP (wpat, 0, 1);
4883 wsrc = SET_SRC (wpat0);
4884 PATTERN (worker) = (gen_casesi_worker_2
4885 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4886 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4887 XEXP (wpat1, 0)));
4888 INSN_CODE (worker) = -1;
4889 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4890 base = gen_rtx_LABEL_REF (Pmode, lab);
4891 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4892 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4893 INSN_CODE (mova) = -1;
4897 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4898 *num_mova, and check if the new mova is not nested within the first one.
4899 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4900 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4901 static int
4902 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4904 int n_addr = 0; /* Initialization to shut up spurious warning. */
4905 int f_target, n_target = 0; /* Likewise. */
4907 if (optimize)
4909 /* If NEW_MOVA has no address yet, it will be handled later. */
4910 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4911 return -1;
4913 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4914 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4915 if (n_addr > n_target || n_addr + 1022 < n_target)
4917 /* Change the mova into a load.
4918 broken_move will then return true for it. */
4919 fixup_mova (new_mova);
4920 return 1;
4923 if (!(*num_mova)++)
4925 *first_mova = new_mova;
4926 return 2;
4928 if (!optimize
4929 || ((f_target
4930 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4931 >= n_target))
4932 return -1;
4934 (*num_mova)--;
4935 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4936 > n_target - n_addr)
4938 fixup_mova (*first_mova);
4939 return 0;
4941 else
4943 fixup_mova (new_mova);
4944 return 1;
4948 /* Find the last barrier from insn FROM which is close enough to hold the
4949 constant pool. If we can't find one, then create one near the end of
4950 the range. */
4951 static rtx_insn *
4952 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
4954 int count_si = 0;
4955 int count_hi = 0;
4956 int found_hi = 0;
4957 int found_si = 0;
4958 int hi_align = 2;
4959 int si_align = 2;
4960 int leading_mova = num_mova;
4961 rtx_insn *barrier_before_mova = NULL;
4962 rtx_insn *found_barrier = NULL;
4963 rtx_insn *good_barrier = NULL;
4964 int si_limit;
4965 int hi_limit;
4966 rtx_insn *orig = from;
4967 rtx_insn *last_got = NULL;
4968 rtx_insn *last_symoff = NULL;
4970 /* For HImode: range is 510, add 4 because pc counts from address of
4971 second instruction after this one, subtract 2 for the jump instruction
4972 that we may need to emit before the table, subtract 2 for the instruction
4973 that fills the jump delay slot (in very rare cases, reorg will take an
4974 instruction from after the constant pool or will leave the delay slot
4975 empty). This gives 510.
4976 For SImode: range is 1020, add 4 because pc counts from address of
4977 second instruction after this one, subtract 2 in case pc is 2 byte
4978 aligned, subtract 2 for the jump instruction that we may need to emit
4979 before the table, subtract 2 for the instruction that fills the jump
4980 delay slot. This gives 1018. */
4982 /* The branch will always be shortened now that the reference address for
4983 forward branches is the successor address, thus we need no longer make
4984 adjustments to the [sh]i_limit for -O0. */
4986 si_limit = 1018;
4987 hi_limit = 510;
4989 while (from && count_si < si_limit && count_hi < hi_limit)
4991 int inc = get_attr_length (from);
4992 int new_align = 1;
4994 /* If this is a label that existed at the time of the compute_alignments
4995 call, determine the alignment. N.B. When find_barrier recurses for
4996 an out-of-reach mova, we might see labels at the start of previously
4997 inserted constant tables. */
4998 if (LABEL_P (from)
4999 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5001 if (optimize)
5002 new_align = 1 << label_to_alignment (from).levels[0].log;
5003 else if (BARRIER_P (prev_nonnote_insn (from)))
5004 new_align = 1 << barrier_align (from);
5005 else
5006 new_align = 1;
5007 inc = 0;
5009 /* In case we are scanning a constant table because of recursion, check
5010 for explicit alignments. If the table is long, we might be forced
5011 to emit the new table in front of it; the length of the alignment
5012 might be the last straw. */
5013 else if (NONJUMP_INSN_P (from)
5014 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5015 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5016 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5017 /* When we find the end of a constant table, paste the new constant
5018 at the end. That is better than putting it in front because
5019 this way, we don't need extra alignment for adding a 4-byte-aligned
5020 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5021 else if (NONJUMP_INSN_P (from)
5022 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5023 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5024 return from;
5026 if (BARRIER_P (from))
5028 rtx_insn *next;
5030 found_barrier = from;
5032 /* If we are at the end of the function, or in front of an alignment
5033 instruction, we need not insert an extra alignment. We prefer
5034 this kind of barrier. */
5035 if (barrier_align (from) > 2)
5036 good_barrier = from;
5038 /* If we are at the end of a hot/cold block, dump the constants
5039 here. */
5040 next = NEXT_INSN (from);
5041 if (next
5042 && NOTE_P (next)
5043 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5044 break;
5047 if (broken_move (from))
5049 rtx pat, src, dst;
5050 machine_mode mode;
5052 pat = PATTERN (from);
5053 if (GET_CODE (pat) == PARALLEL)
5054 pat = XVECEXP (pat, 0, 0);
5055 src = SET_SRC (pat);
5056 dst = SET_DEST (pat);
5057 mode = GET_MODE (dst);
5059 /* GOT pcrelat setting comes in pair of
5060 mova .L8,r0
5061 mov.l .L8,r12
5062 instructions. (plus add r0,r12).
5063 Remember if we see one without the other. */
5064 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5065 last_got = last_got ? NULL : from;
5066 else if (PIC_ADDR_P (src))
5067 last_got = last_got ? NULL : from;
5069 /* We must explicitly check the mode, because sometimes the
5070 front end will generate code to load unsigned constants into
5071 HImode targets without properly sign extending them. */
5072 if (mode == HImode
5073 || (mode == SImode && satisfies_constraint_I16 (src)
5074 && REGNO (dst) != FPUL_REG))
5076 found_hi += 2;
5077 /* We put the short constants before the long constants, so
5078 we must count the length of short constants in the range
5079 for the long constants. */
5080 /* ??? This isn't optimal, but is easy to do. */
5081 si_limit -= 2;
5083 else
5085 /* We dump DF/DI constants before SF/SI ones, because
5086 the limit is the same, but the alignment requirements
5087 are higher. We may waste up to 4 additional bytes
5088 for alignment, and the DF/DI constant may have
5089 another SF/SI constant placed before it. */
5090 while (si_align > 2 && found_si + si_align - 2 > count_si)
5091 si_align >>= 1;
5092 if (found_si > count_si)
5093 count_si = found_si;
5094 found_si += GET_MODE_SIZE (mode);
5095 if (num_mova)
5096 si_limit -= GET_MODE_SIZE (mode);
5100 if (mova_p (from))
5102 switch (untangle_mova (&num_mova, &mova, from))
5104 case 1:
5105 if (flag_pic)
5107 rtx src = SET_SRC (PATTERN (from));
5108 if (GET_CODE (src) == CONST
5109 && GET_CODE (XEXP (src, 0)) == UNSPEC
5110 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5111 last_symoff = from;
5113 break;
5114 case 0: return find_barrier (0, 0, mova);
5115 case 2:
5117 leading_mova = 0;
5118 barrier_before_mova
5119 = good_barrier ? good_barrier : found_barrier;
5121 default: break;
5123 if (found_si > count_si)
5124 count_si = found_si;
5126 else if (JUMP_TABLE_DATA_P (from)
5127 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5129 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5130 || (num_mova
5131 && (prev_nonnote_insn (from)
5132 == XEXP (MOVA_LABELREF (mova), 0))))
5133 num_mova--;
5134 if (barrier_align (next_real_insn (from)) == align_jumps.levels[0].log)
5136 /* We have just passed the barrier in front of the
5137 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5138 the ADDR_DIFF_VEC is accessed as data, just like our pool
5139 constants, this is a good opportunity to accommodate what
5140 we have gathered so far.
5141 If we waited any longer, we could end up at a barrier in
5142 front of code, which gives worse cache usage for separated
5143 instruction / data caches. */
5144 good_barrier = found_barrier;
5145 break;
5147 else
5149 rtx body = PATTERN (from);
5150 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5153 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5154 else if (JUMP_P (from)
5155 && ! TARGET_SH2
5156 && ! optimize_size)
5157 new_align = 4;
5159 /* There is a possibility that a bf is transformed into a bf/s by the
5160 delay slot scheduler. */
5161 if (JUMP_P (from)
5162 && get_attr_type (from) == TYPE_CBRANCH
5163 && ! sequence_insn_p (from))
5164 inc += 2;
5166 if (found_si)
5168 count_si += inc;
5169 if (new_align > si_align)
5171 si_limit -= (count_si - 1) & (new_align - si_align);
5172 si_align = new_align;
5174 count_si = (count_si + new_align - 1) & -new_align;
5176 if (found_hi)
5178 count_hi += inc;
5179 if (new_align > hi_align)
5181 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5182 hi_align = new_align;
5184 count_hi = (count_hi + new_align - 1) & -new_align;
5186 from = NEXT_INSN (from);
5189 if (num_mova)
5191 if (leading_mova)
5193 /* Try as we might, the leading mova is out of range. Change
5194 it into a load (which will become a pcload) and retry. */
5195 fixup_mova (mova);
5196 return find_barrier (0, 0, mova);
5198 else
5200 /* Insert the constant pool table before the mova instruction,
5201 to prevent the mova label reference from going out of range. */
5202 from = mova;
5203 good_barrier = found_barrier = barrier_before_mova;
5207 if (found_barrier)
5209 if (good_barrier && next_real_insn (found_barrier))
5210 found_barrier = good_barrier;
5212 else
5214 /* We didn't find a barrier in time to dump our stuff,
5215 so we'll make one. */
5216 rtx_code_label *label = gen_label_rtx ();
5218 /* Don't emit a constant table in the middle of insns for
5219 casesi_worker_2. This is a bit overkill but is enough
5220 because casesi_worker_2 wouldn't appear so frequently. */
5221 if (last_symoff)
5222 from = last_symoff;
5224 /* If we exceeded the range, then we must back up over the last
5225 instruction we looked at. Otherwise, we just need to undo the
5226 NEXT_INSN at the end of the loop. */
5227 if (PREV_INSN (from) != orig
5228 && (count_hi > hi_limit || count_si > si_limit))
5229 from = PREV_INSN (PREV_INSN (from));
5230 else
5231 from = PREV_INSN (from);
5233 /* Don't emit a constant table int the middle of global pointer setting,
5234 since that that would move the addressing base GOT into another table.
5235 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5236 in the pool anyway, so just move up the whole constant pool.
5238 However, avoid doing so when the last single GOT mov is the starting
5239 insn itself. Going past above the start insn would create a negative
5240 offset, causing errors. */
5241 if (last_got && last_got != orig)
5242 from = PREV_INSN (last_got);
5244 /* Don't insert the constant pool table at the position which
5245 may be the landing pad. */
5246 if (flag_exceptions
5247 && CALL_P (from)
5248 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5249 from = PREV_INSN (from);
5251 /* Walk back to be just before any jump or label.
5252 Putting it before a label reduces the number of times the branch
5253 around the constant pool table will be hit. Putting it before
5254 a jump makes it more likely that the bra delay slot will be
5255 filled. */
5256 while (NOTE_P (from) || JUMP_P (from) || LABEL_P (from))
5257 from = PREV_INSN (from);
5259 if (CALL_P (from))
5261 bool sibcall_p = SIBLING_CALL_P (from);
5263 /* If FROM was a sibling call, then we know that control
5264 will not return. In fact, we were guaranteed to hit
5265 a barrier before another real insn.
5267 The jump around the constant pool is unnecessary. It
5268 costs space, but more importantly it confuses dwarf2cfi
5269 generation. */
5270 if (sibcall_p)
5271 return emit_barrier_after (from);
5274 from = emit_jump_insn_after (gen_jump (label), from);
5275 JUMP_LABEL (from) = label;
5276 LABEL_NUSES (label) = 1;
5277 found_barrier = emit_barrier_after (from);
5278 emit_label_after (label, found_barrier);
5281 return found_barrier;
5284 /* If the instruction INSN is implemented by a special function, and we can
5285 positively find the register that is used to call the sfunc, and this
5286 register is not used anywhere else in this instruction - except as the
5287 destination of a set, return this register; else, return 0. */
5289 sfunc_uses_reg (rtx_insn *insn)
5291 int i;
5292 rtx pattern, part, reg_part, reg;
5294 if (!NONJUMP_INSN_P (insn))
5295 return NULL_RTX;
5296 pattern = PATTERN (insn);
5297 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5298 return NULL_RTX;
5300 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5302 part = XVECEXP (pattern, 0, i);
5303 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5304 reg_part = part;
5306 if (! reg_part)
5307 return NULL_RTX;
5308 reg = XEXP (reg_part, 0);
5309 for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5311 part = XVECEXP (pattern, 0, i);
5312 if (part == reg_part || GET_CODE (part) == CLOBBER)
5313 continue;
5314 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5315 && REG_P (SET_DEST (part)))
5316 ? SET_SRC (part) : part)))
5317 return NULL_RTX;
5319 return reg;
5322 /* See if the only way in which INSN uses REG is by calling it, or by
5323 setting it while calling it. Set *SET to a SET rtx if the register
5324 is set by INSN. */
5325 static bool
5326 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5328 *set = NULL_RTX;
5330 rtx reg2 = sfunc_uses_reg (insn);
5331 if (reg2 && REGNO (reg2) == REGNO (reg))
5333 rtx pattern = single_set (insn);
5334 if (pattern
5335 && REG_P (SET_DEST (pattern))
5336 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5337 *set = pattern;
5338 return false;
5340 if (!CALL_P (insn))
5342 /* We don't use rtx_equal_p because we don't care if the mode is
5343 different. */
5344 rtx pattern = single_set (insn);
5345 if (pattern
5346 && REG_P (SET_DEST (pattern))
5347 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5349 rtx par, part;
5350 int i;
5352 *set = pattern;
5353 par = PATTERN (insn);
5354 if (GET_CODE (par) == PARALLEL)
5355 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5357 part = XVECEXP (par, 0, i);
5358 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5359 return true;
5361 return reg_mentioned_p (reg, SET_SRC (pattern));
5364 return true;
5367 rtx pattern = PATTERN (insn);
5369 if (GET_CODE (pattern) == PARALLEL)
5371 for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5372 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5373 return true;
5374 pattern = XVECEXP (pattern, 0, 0);
5377 if (GET_CODE (pattern) == SET)
5379 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5381 /* We don't use rtx_equal_p, because we don't care if the
5382 mode is different. */
5383 if (!REG_P (SET_DEST (pattern))
5384 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5385 return true;
5387 *set = pattern;
5390 pattern = SET_SRC (pattern);
5393 if (GET_CODE (pattern) != CALL
5394 || !MEM_P (XEXP (pattern, 0))
5395 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5396 return true;
5398 return false;
5401 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5402 general registers. Bits 0..15 mean that the respective registers
5403 are used as inputs in the instruction. Bits 16..31 mean that the
5404 registers 0..15, respectively, are used as outputs, or are clobbered.
5405 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5407 regs_used (rtx x, int is_dest)
5409 enum rtx_code code;
5410 const char *fmt;
5411 int used = 0;
5413 if (! x)
5414 return used;
5415 code = GET_CODE (x);
5416 switch (code)
5418 case REG:
5419 if (REGNO (x) < 16)
5420 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5421 << (REGNO (x) + is_dest));
5422 return 0;
5423 case SUBREG:
5425 rtx y = SUBREG_REG (x);
5427 if (!REG_P (y))
5428 break;
5429 if (REGNO (y) < 16)
5430 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5431 << (REGNO (y) +
5432 subreg_regno_offset (REGNO (y),
5433 GET_MODE (y),
5434 SUBREG_BYTE (x),
5435 GET_MODE (x)) + is_dest));
5436 return 0;
5438 case SET:
5439 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5440 case RETURN:
5441 /* If there was a return value, it must have been indicated with USE. */
5442 return 0x00ffff00;
5443 case CLOBBER:
5444 is_dest = 1;
5445 break;
5446 case MEM:
5447 is_dest = 0;
5448 break;
5449 case CALL:
5450 used |= 0x00ff00f0;
5451 break;
5452 default:
5453 break;
5456 fmt = GET_RTX_FORMAT (code);
5458 for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5460 if (fmt[i] == 'E')
5462 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
5463 used |= regs_used (XVECEXP (x, i, j), is_dest);
5465 else if (fmt[i] == 'e')
5466 used |= regs_used (XEXP (x, i), is_dest);
5468 return used;
5471 /* Create an instruction that prevents redirection of a conditional branch
5472 to the destination of the JUMP with address ADDR.
5473 If the branch needs to be implemented as an indirect jump, try to find
5474 a scratch register for it.
5475 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5476 If any preceding insn that doesn't fit into a delay slot is good enough,
5477 pass 1. Pass 2 if a definite blocking insn is needed.
5478 -1 is used internally to avoid deep recursion.
5479 If a blocking instruction is made or recognized, return it. */
5480 static rtx_insn *
5481 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5483 int dead = 0;
5484 rtx_insn *prev = prev_nonnote_insn (jump);
5486 /* First, check if we already have an instruction that satisfies our need. */
5487 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5489 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5490 return prev;
5491 if (GET_CODE (PATTERN (prev)) == USE
5492 || GET_CODE (PATTERN (prev)) == CLOBBER
5493 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5494 prev = jump;
5495 else if ((need_block &= ~1) < 0)
5496 return prev;
5497 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5498 need_block = 0;
5500 if (GET_CODE (PATTERN (jump)) == RETURN)
5502 if (! need_block)
5503 return prev;
5504 /* Reorg even does nasty things with return insns that cause branches
5505 to go out of range - see find_end_label and callers. */
5506 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5508 /* We can't use JUMP_LABEL here because it might be undefined
5509 when not optimizing. */
5510 rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5511 /* If the branch is out of range, try to find a scratch register for it. */
5512 if (optimize
5513 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5514 > 4092 + 4098))
5516 rtx_insn *scan;
5517 /* Don't look for the stack pointer as a scratch register,
5518 it would cause trouble if an interrupt occurred. */
5519 unsigned attempt = 0x7fff, used;
5520 int jump_left = flag_expensive_optimizations + 1;
5522 /* It is likely that the most recent eligible instruction is wanted for
5523 the delay slot. Therefore, find out which registers it uses, and
5524 try to avoid using them. */
5526 for (scan = jump; (scan = PREV_INSN (scan)); )
5528 if (scan->deleted ())
5529 continue;
5530 rtx_code code = GET_CODE (scan);
5531 if (code == CODE_LABEL || code == JUMP_INSN)
5532 break;
5533 if (code == INSN
5534 && GET_CODE (PATTERN (scan)) != USE
5535 && GET_CODE (PATTERN (scan)) != CLOBBER
5536 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5538 attempt &= ~regs_used (PATTERN (scan), 0);
5539 break;
5542 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5543 (scan = NEXT_INSN (scan)); )
5545 if (scan->deleted ())
5546 continue;
5547 rtx_code code = GET_CODE (scan);
5548 if (INSN_P (scan))
5550 used |= regs_used (PATTERN (scan), 0);
5551 if (code == CALL_INSN)
5552 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5553 dead |= (used >> 16) & ~used;
5554 if (dead & attempt)
5556 dead &= attempt;
5557 break;
5559 if (code == JUMP_INSN)
5561 if (jump_left-- && simplejump_p (scan))
5562 scan = JUMP_LABEL_AS_INSN (scan);
5563 else
5564 break;
5568 /* Mask out the stack pointer again, in case it was
5569 the only 'free' register we have found. */
5570 dead &= 0x7fff;
5572 /* If the immediate destination is still in range, check for possible
5573 threading with a jump beyond the delay slot insn.
5574 Don't check if we are called recursively; the jump has been or will be
5575 checked in a different invocation then. */
5577 else if (optimize && need_block >= 0)
5579 rtx_insn *next = next_active_insn (as_a<rtx_insn *> (dest));
5580 next = next_active_insn (next);
5581 if (next && JUMP_P (next)
5582 && GET_CODE (PATTERN (next)) == SET
5583 && recog_memoized (next) == CODE_FOR_jump_compact)
5585 dest = JUMP_LABEL (next);
5586 if (dest
5587 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5588 > 4092 + 4098))
5589 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5593 if (dead)
5595 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5597 /* It would be nice if we could convert the jump into an indirect
5598 jump / far branch right now, and thus exposing all constituent
5599 instructions to further optimization. However, reorg uses
5600 simplejump_p to determine if there is an unconditional jump where
5601 it should try to schedule instructions from the target of the
5602 branch; simplejump_p fails for indirect jumps even if they have
5603 a JUMP_LABEL. */
5604 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5605 (reg, GEN_INT (unspec_bbr_uid++)),
5606 jump);
5607 /* ??? We would like this to have the scope of the jump, but that
5608 scope will change when a delay slot insn of an inner scope is added.
5609 Hence, after delay slot scheduling, we'll have to expect
5610 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5611 the jump. */
5613 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5614 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5615 return insn;
5617 else if (need_block)
5618 /* We can't use JUMP_LABEL here because it might be undefined
5619 when not optimizing. */
5620 return emit_insn_before (gen_block_branch_redirect
5621 (GEN_INT (unspec_bbr_uid++)),
5622 jump);
5623 return prev;
5626 #define CONDJUMP_MIN -252
5627 #define CONDJUMP_MAX 262
5628 struct far_branch
5630 /* A label (to be placed) in front of the jump
5631 that jumps to our ultimate destination. */
5632 rtx_insn *near_label;
5633 /* Where we are going to insert it if we cannot move the jump any farther,
5634 or the jump itself if we have picked up an existing jump. */
5635 rtx_insn *insert_place;
5636 /* The ultimate destination. */
5637 rtx_insn *far_label;
5638 struct far_branch *prev;
5639 /* If the branch has already been created, its address;
5640 else the address of its first prospective user. */
5641 int address;
5644 enum mdep_reorg_phase_e mdep_reorg_phase;
5646 static void
5647 gen_far_branch (struct far_branch *bp)
5649 rtx_insn *insn = bp->insert_place;
5650 rtx_jump_insn *jump;
5651 rtx_code_label *label = gen_label_rtx ();
5653 emit_label_after (label, insn);
5654 if (bp->far_label)
5656 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5657 LABEL_NUSES (bp->far_label)++;
5659 else
5660 jump = emit_jump_insn_after (gen_return (), insn);
5662 /* Emit a barrier so that reorg knows that any following instructions
5663 are not reachable via a fall-through path.
5664 But don't do this when not optimizing, since we wouldn't suppress the
5665 alignment for the barrier then, and could end up with out-of-range
5666 pc-relative loads. */
5667 if (optimize)
5668 emit_barrier_after (jump);
5669 emit_label_after (bp->near_label, insn);
5671 if (bp->far_label)
5672 JUMP_LABEL (jump) = bp->far_label;
5673 else
5675 rtx pat = PATTERN (jump);
5676 gcc_assert (ANY_RETURN_P (pat));
5677 JUMP_LABEL (jump) = pat;
5680 bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5681 gcc_assert (ok);
5683 /* If we are branching around a jump (rather than a return), prevent
5684 reorg from using an insn from the jump target as the delay slot insn -
5685 when reorg did this, it pessimized code (we rather hide the delay slot)
5686 and it could cause branches to go out of range. */
5687 if (bp->far_label)
5688 (emit_insn_after
5689 (gen_stuff_delay_slot
5690 (GEN_INT (unspec_bbr_uid++),
5691 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5692 insn));
5693 /* Prevent reorg from undoing our splits. */
5694 gen_block_redirect (jump, bp->address += 2, 2);
5697 /* Fix up ADDR_DIFF_VECs. */
5698 void
5699 fixup_addr_diff_vecs (rtx_insn *first)
5701 rtx_insn *insn;
5703 for (insn = first; insn; insn = NEXT_INSN (insn))
5705 rtx vec_lab, pat, prevpat, x, braf_label;
5706 rtx_insn *prev;
5708 if (! JUMP_TABLE_DATA_P (insn)
5709 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5710 continue;
5711 pat = PATTERN (insn);
5712 vec_lab = XEXP (XEXP (pat, 0), 0);
5714 /* Search the matching casesi_jump_2. */
5715 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5717 if (!JUMP_P (prev))
5718 continue;
5719 prevpat = PATTERN (prev);
5720 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5721 continue;
5722 x = XVECEXP (prevpat, 0, 1);
5723 if (GET_CODE (x) != USE)
5724 continue;
5725 x = XEXP (x, 0);
5726 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5727 break;
5729 /* FIXME: This is a bug in the optimizer, but it seems harmless
5730 to just avoid panicing. */
5731 if (!prev)
5732 continue;
5734 /* Emit the reference label of the braf where it belongs, right after
5735 the casesi_jump_2 (i.e. braf). */
5736 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5737 emit_label_after (as_a <rtx_insn *> (braf_label), prev);
5739 /* Fix up the ADDR_DIF_VEC to be relative
5740 to the reference address of the braf. */
5741 XEXP (XEXP (pat, 0), 0) = braf_label;
5745 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5746 a barrier. Return the base 2 logarithm of the desired alignment. */
5748 barrier_align (rtx_insn *barrier_or_label)
5750 if (! barrier_or_label)
5751 return 0;
5753 if (LABEL_P (barrier_or_label)
5754 && NEXT_INSN (barrier_or_label)
5755 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5756 return 2;
5758 if (BARRIER_P (barrier_or_label)
5759 && PREV_INSN (barrier_or_label)
5760 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5762 rtx pat = PATTERN (PREV_INSN (barrier_or_label));
5763 /* If this is a very small table, we want to keep the alignment after
5764 the table to the minimum for proper code alignment. */
5765 return ((optimize_size
5766 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5767 <= (unsigned) 1 << (CACHE_LOG - 2)))
5768 ? 1 : align_jumps.levels[0].log);
5771 rtx_insn *next = next_active_insn (barrier_or_label);
5773 if (! next)
5774 return 0;
5776 rtx pat = PATTERN (next);
5778 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5779 /* This is a barrier in front of a constant table. */
5780 return 0;
5782 if (optimize_size)
5783 return 0;
5785 if (! TARGET_SH2 || ! optimize)
5786 return align_jumps.levels[0].log;
5788 /* When fixing up pcloads, a constant table might be inserted just before
5789 the basic block that ends with the barrier. Thus, we can't trust the
5790 instruction lengths before that. */
5791 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5793 /* Check if there is an immediately preceding branch to the insn beyond
5794 the barrier. We must weight the cost of discarding useful information
5795 from the current cache line when executing this branch and there is
5796 an alignment, against that of fetching unneeded insn in front of the
5797 branch target when there is no alignment. */
5799 /* There are two delay_slot cases to consider. One is the simple case
5800 where the preceding branch is to the insn beyond the barrier (simple
5801 delay slot filling), and the other is where the preceding branch has
5802 a delay slot that is a duplicate of the insn after the barrier
5803 (fill_eager_delay_slots) and the branch is to the insn after the insn
5804 after the barrier. */
5806 int slot, credit;
5807 bool jump_to_next = false;
5809 /* Skip to the insn before the JUMP_INSN before the barrier under
5810 investigation. */
5811 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5813 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5814 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5815 prev = prev_real_insn (prev))
5817 jump_to_next = false;
5818 if (GET_CODE (PATTERN (prev)) == USE
5819 || GET_CODE (PATTERN (prev)) == CLOBBER)
5820 continue;
5821 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5823 prev = prev_seq->insn (1);
5824 if (INSN_UID (prev) == INSN_UID (next))
5826 /* Delay slot was filled with insn at jump target. */
5827 jump_to_next = true;
5828 continue;
5832 if (slot &&
5833 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5834 slot = 0;
5835 credit -= get_attr_length (prev);
5837 if (prev && jump_to_label_p (prev))
5839 rtx_insn *x;
5840 if (jump_to_next
5841 || next_real_insn (JUMP_LABEL_AS_INSN (prev)) == next
5842 /* If relax_delay_slots() decides NEXT was redundant
5843 with some previous instruction, it will have
5844 redirected PREV's jump to the following insn. */
5845 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5846 /* There is no upper bound on redundant instructions
5847 that might have been skipped, but we must not put an
5848 alignment where none had been before. */
5849 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5850 (INSN_P (x)
5851 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5852 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5853 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5855 rtx pat = PATTERN (prev);
5856 if (GET_CODE (pat) == PARALLEL)
5857 pat = XVECEXP (pat, 0, 0);
5858 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5859 return 0;
5864 return align_jumps.levels[0].log;
5867 /* If we are inside a phony loop, almost any kind of label can turn up as the
5868 first one in the loop. Aligning a braf label causes incorrect switch
5869 destination addresses; we can detect braf labels because they are
5870 followed by a BARRIER.
5871 Applying loop alignment to small constant or switch tables is a waste
5872 of space, so we suppress this too. */
5874 sh_loop_align (rtx_insn *label)
5876 rtx_insn *next = label;
5878 if (! optimize || optimize_size)
5879 return 0;
5882 next = next_nonnote_insn (next);
5883 while (next && LABEL_P (next));
5885 if (! next
5886 || ! INSN_P (next)
5887 || recog_memoized (next) == CODE_FOR_consttable_2)
5888 return 0;
5890 return align_loops.levels[0].log;
5893 /* Do a final pass over the function, just before delayed branch
5894 scheduling. */
5895 static void
5896 sh_reorg (void)
5898 rtx_insn *first, *insn, *mova = NULL;
5899 int num_mova;
5900 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5901 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5903 first = get_insns ();
5904 max_labelno_before_reorg = max_label_num ();
5906 /* We must split call insns before introducing `mova's. If we're
5907 optimizing, they'll have already been split. Otherwise, make
5908 sure we don't split them too late. */
5909 if (! optimize)
5910 split_all_insns_noflow ();
5912 /* If relaxing, generate pseudo-ops to associate function calls with
5913 the symbols they call. It does no harm to not generate these
5914 pseudo-ops. However, when we can generate them, it enables the
5915 linker to potentially relax the jsr to a bsr, and eliminate the
5916 register load and, possibly, the constant pool entry. */
5918 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5919 if (TARGET_RELAX)
5921 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5922 own purposes. This works because none of the remaining passes
5923 need to look at them.
5925 ??? But it may break in the future. We should use a machine
5926 dependent REG_NOTE, or some other approach entirely. */
5927 for (insn = first; insn; insn = NEXT_INSN (insn))
5929 if (INSN_P (insn))
5931 rtx note;
5933 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5934 NULL_RTX)) != 0)
5935 remove_note (insn, note);
5939 for (insn = first; insn; insn = NEXT_INSN (insn))
5941 rtx pattern, reg, set, dies;
5942 rtx_code_label *label;
5943 rtx_insn *link, *scan;
5944 int rescan = 0, foundinsn = 0;
5946 if (CALL_P (insn))
5948 pattern = PATTERN (insn);
5950 if (GET_CODE (pattern) == PARALLEL)
5951 pattern = XVECEXP (pattern, 0, 0);
5952 if (GET_CODE (pattern) == SET)
5953 pattern = SET_SRC (pattern);
5955 if (GET_CODE (pattern) != CALL
5956 || !MEM_P (XEXP (pattern, 0)))
5957 continue;
5959 reg = XEXP (XEXP (pattern, 0), 0);
5961 else
5963 reg = sfunc_uses_reg (insn);
5964 if (! reg)
5965 continue;
5968 if (!REG_P (reg))
5969 continue;
5971 /* Try scanning backward to find where the register is set. */
5972 link = NULL;
5973 for (scan = PREV_INSN (insn);
5974 scan && !LABEL_P (scan);
5975 scan = PREV_INSN (scan))
5977 if (! INSN_P (scan))
5978 continue;
5980 if (! reg_mentioned_p (reg, scan))
5981 continue;
5983 if (noncall_uses_reg (reg, scan, &set))
5984 break;
5986 if (set)
5988 link = scan;
5989 break;
5993 if (! link)
5994 continue;
5996 /* The register is set at LINK. */
5998 /* We can only optimize the function call if the register is
5999 being set to a symbol. In theory, we could sometimes
6000 optimize calls to a constant location, but the assembler
6001 and linker do not support that at present. */
6002 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6003 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6004 continue;
6006 /* Scan forward from LINK to the place where REG dies, and
6007 make sure that the only insns which use REG are
6008 themselves function calls. */
6010 /* ??? This doesn't work for call targets that were allocated
6011 by reload, since there may not be a REG_DEAD note for the
6012 register. */
6014 dies = NULL_RTX;
6015 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6017 rtx scanset;
6019 /* Don't try to trace forward past a CODE_LABEL if we haven't
6020 seen INSN yet. Ordinarily, we will only find the setting insn
6021 if it is in the same basic block. However,
6022 cross-jumping can insert code labels in between the load and
6023 the call, and can result in situations where a single call
6024 insn may have two targets depending on where we came from. */
6026 if (LABEL_P (scan) && ! foundinsn)
6027 break;
6029 if (! INSN_P (scan))
6030 continue;
6032 /* Don't try to trace forward past a JUMP. To optimize
6033 safely, we would have to check that all the
6034 instructions at the jump destination did not use REG. */
6036 if (JUMP_P (scan))
6037 break;
6039 if (! reg_mentioned_p (reg, scan))
6040 continue;
6042 if (noncall_uses_reg (reg, scan, &scanset))
6043 break;
6045 if (scan == insn)
6046 foundinsn = 1;
6048 if (scan != insn
6049 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6051 /* There is a function call to this register other
6052 than the one we are checking. If we optimize
6053 this call, we need to rescan again below. */
6054 rescan = 1;
6057 /* ??? We shouldn't have to worry about SCANSET here.
6058 We should just be able to check for a REG_DEAD note
6059 on a function call. However, the REG_DEAD notes are
6060 apparently not dependable around libcalls; c-torture
6061 execute/920501-2 is a test case. If SCANSET is set,
6062 then this insn sets the register, so it must have
6063 died earlier. Unfortunately, this will only handle
6064 the cases in which the register is, in fact, set in a
6065 later insn. */
6067 /* ??? We shouldn't have to use FOUNDINSN here.
6068 This dates back to when we used LOG_LINKS to find
6069 the most recent insn which sets the register. */
6071 if (foundinsn
6072 && (scanset
6073 || find_reg_note (scan, REG_DEAD, reg)))
6075 dies = scan;
6076 break;
6080 if (! dies)
6082 /* Either there was a branch, or some insn used REG
6083 other than as a function call address. */
6084 continue;
6087 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6088 on the insn which sets the register, and on each call insn
6089 which uses the register. In final_prescan_insn we look for
6090 the REG_LABEL_OPERAND notes, and output the appropriate label
6091 or pseudo-op. */
6093 label = gen_label_rtx ();
6094 add_reg_note (link, REG_LABEL_OPERAND, label);
6095 add_reg_note (insn, REG_LABEL_OPERAND, label);
6096 if (rescan)
6098 scan = link;
6101 rtx reg2;
6103 scan = NEXT_INSN (scan);
6104 if (scan != insn
6105 && ((CALL_P (scan)
6106 && reg_mentioned_p (reg, scan))
6107 || ((reg2 = sfunc_uses_reg (scan))
6108 && REGNO (reg2) == REGNO (reg))))
6109 add_reg_note (scan, REG_LABEL_OPERAND, label);
6111 while (scan != dies);
6116 if (TARGET_SH2)
6117 fixup_addr_diff_vecs (first);
6119 if (optimize)
6121 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6122 shorten_branches (first);
6125 /* Scan the function looking for move instructions which have to be
6126 changed to pc-relative loads and insert the literal tables. */
6127 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6128 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6130 if (mova_p (insn))
6132 /* ??? basic block reordering can move a switch table dispatch
6133 below the switch table. Check if that has happened.
6134 We only have the addresses available when optimizing; but then,
6135 this check shouldn't be needed when not optimizing. */
6136 if (!untangle_mova (&num_mova, &mova, insn))
6138 insn = mova;
6139 num_mova = 0;
6142 else if (JUMP_TABLE_DATA_P (insn)
6143 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6144 && num_mova
6145 /* ??? loop invariant motion can also move a mova out of a
6146 loop. Since loop does this code motion anyway, maybe we
6147 should wrap UNSPEC_MOVA into a CONST, so that reload can
6148 move it back. */
6149 && ((num_mova > 1
6150 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6151 || (prev_nonnote_insn (insn)
6152 == XEXP (MOVA_LABELREF (mova), 0))))
6154 rtx_insn *scan;
6155 int total;
6157 num_mova--;
6159 /* Some code might have been inserted between the mova and
6160 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6161 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6162 total += get_attr_length (scan);
6164 /* range of mova is 1020, add 4 because pc counts from address of
6165 second instruction after this one, subtract 2 in case pc is 2
6166 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6167 cancels out with alignment effects of the mova itself. */
6168 if (total > 1022)
6170 /* Change the mova into a load, and restart scanning
6171 there. broken_move will then return true for mova. */
6172 fixup_mova (mova);
6173 insn = mova;
6176 if (broken_move (insn)
6177 || (NONJUMP_INSN_P (insn)
6178 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6180 rtx_insn *scan;
6181 /* Scan ahead looking for a barrier to stick the constant table
6182 behind. */
6183 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6184 rtx_insn *last_float_move = NULL;
6185 rtx last_float = 0, *last_float_addr = NULL;
6186 int need_aligned_label = 0;
6188 if (num_mova && ! mova_p (mova))
6190 /* find_barrier had to change the first mova into a
6191 pcload; thus, we have to start with this new pcload. */
6192 insn = mova;
6193 num_mova = 0;
6195 /* Now find all the moves between the points and modify them. */
6196 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6198 if (LABEL_P (scan))
6199 last_float = 0;
6200 if (NONJUMP_INSN_P (scan)
6201 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6202 need_aligned_label = 1;
6203 if (broken_move (scan))
6205 rtx *patp = &PATTERN (scan), pat = *patp;
6206 rtx src, dst;
6207 rtx lab;
6208 rtx newsrc;
6209 machine_mode mode;
6211 if (GET_CODE (pat) == PARALLEL)
6212 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6213 src = SET_SRC (pat);
6214 dst = SET_DEST (pat);
6215 mode = GET_MODE (dst);
6217 if (mode == SImode && satisfies_constraint_I16 (src)
6218 && REGNO (dst) != FPUL_REG)
6220 int offset = 0;
6222 mode = HImode;
6223 while (GET_CODE (dst) == SUBREG)
6225 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6226 GET_MODE (SUBREG_REG (dst)),
6227 SUBREG_BYTE (dst),
6228 GET_MODE (dst));
6229 dst = SUBREG_REG (dst);
6231 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6233 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6235 /* This must be an insn that clobbers r0. */
6236 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6237 XVECLEN (PATTERN (scan), 0)
6238 - 1);
6239 rtx clobber = *clobberp;
6241 gcc_assert (GET_CODE (clobber) == CLOBBER
6242 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6244 if (last_float
6245 && reg_set_between_p (r0_rtx, last_float_move, scan))
6246 last_float = 0;
6247 lab = add_constant (src, mode, last_float);
6248 if (lab)
6249 emit_insn_before (gen_mova (lab), scan);
6250 else
6252 /* There will be a REG_UNUSED note for r0 on
6253 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6254 lest reorg:mark_target_live_regs will not
6255 consider r0 to be used, and we end up with delay
6256 slot insn in front of SCAN that clobbers r0. */
6257 rtx note
6258 = find_regno_note (last_float_move, REG_UNUSED, 0);
6260 /* If we are not optimizing, then there may not be
6261 a note. */
6262 if (note)
6263 PUT_REG_NOTE_KIND (note, REG_INC);
6265 *last_float_addr = r0_inc_rtx;
6267 last_float_move = scan;
6268 last_float = src;
6269 newsrc = gen_const_mem (mode,
6270 (((TARGET_SH4 && ! TARGET_FMOVD)
6271 || REGNO (dst) == FPUL_REG)
6272 ? r0_inc_rtx
6273 : r0_rtx));
6274 last_float_addr = &XEXP (newsrc, 0);
6276 /* Remove the clobber of r0. */
6277 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6278 gen_rtx_SCRATCH (Pmode));
6280 /* This is a mova needing a label. Create it. */
6281 else if (GET_CODE (src) == UNSPEC
6282 && XINT (src, 1) == UNSPEC_MOVA
6283 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6285 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6286 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6287 newsrc = gen_rtx_UNSPEC (SImode,
6288 gen_rtvec (1, newsrc),
6289 UNSPEC_MOVA);
6291 else if (GET_CODE (src) == UNSPEC_VOLATILE
6292 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6294 newsrc = XVECEXP (src, 0, 0);
6295 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6296 INSN_CODE (scan) = -1;
6297 continue;
6299 else
6301 lab = add_constant (src, mode, 0);
6302 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6303 newsrc = gen_const_mem (mode, newsrc);
6305 *patp = gen_rtx_SET (dst, newsrc);
6306 INSN_CODE (scan) = -1;
6309 dump_table (need_aligned_label ? insn : 0, barrier);
6310 insn = barrier;
6313 label_ref_list_d_pool.release ();
6314 for (insn = first; insn; insn = NEXT_INSN (insn))
6315 PUT_MODE (insn, VOIDmode);
6317 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6318 INSN_ADDRESSES_FREE ();
6319 split_branches (first);
6321 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6322 also has an effect on the register that holds the address of the sfunc.
6323 Insert an extra dummy insn in front of each sfunc that pretends to
6324 use this register. */
6325 if (flag_delayed_branch)
6327 for (insn = first; insn; insn = NEXT_INSN (insn))
6329 rtx reg = sfunc_uses_reg (insn);
6331 if (! reg)
6332 continue;
6333 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6336 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6339 /* Return the UID of the insn that follows the specified label. */
6341 get_dest_uid (rtx_insn *label, int max_uid)
6343 rtx_insn *dest = next_real_insn (label);
6345 if (! dest)
6346 /* This can happen for an undefined label. */
6347 return 0;
6348 int dest_uid = INSN_UID (dest);
6349 /* If this is a newly created branch redirection blocking instruction,
6350 we cannot index the branch_uid or insn_addresses arrays with its
6351 uid. But then, we won't need to, because the actual destination is
6352 the following branch. */
6353 while (dest_uid >= max_uid)
6355 dest = NEXT_INSN (dest);
6356 dest_uid = INSN_UID (dest);
6358 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6359 return 0;
6360 return dest_uid;
6363 /* Split condbranches that are out of range. Also add clobbers for
6364 scratch registers that are needed in far jumps.
6365 We do this before delay slot scheduling, so that it can take our
6366 newly created instructions into account. It also allows us to
6367 find branches with common targets more easily. */
6368 static void
6369 split_branches (rtx_insn *first)
6371 rtx_insn *insn;
6372 struct far_branch **uid_branch, *far_branch_list = 0;
6373 int max_uid = get_max_uid ();
6374 int ok;
6376 /* Find out which branches are out of range. */
6377 shorten_branches (first);
6379 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6380 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6382 for (insn = first; insn; insn = NEXT_INSN (insn))
6383 if (! INSN_P (insn))
6384 continue;
6385 else if (insn->deleted ())
6387 /* Shorten_branches would split this instruction again,
6388 so transform it into a note. */
6389 SET_INSN_DELETED (insn);
6391 else if (JUMP_P (insn))
6393 enum attr_type type = get_attr_type (insn);
6394 if (type == TYPE_CBRANCH)
6396 rtx_insn *next, *beyond;
6398 if (get_attr_length (insn) > 4)
6400 rtx src = SET_SRC (PATTERN (insn));
6401 rtx_insn *olabel = safe_as_a <rtx_insn *> (XEXP (XEXP (src, 1), 0));
6402 int addr = INSN_ADDRESSES (INSN_UID (insn));
6403 rtx_insn *label = 0;
6404 int dest_uid = get_dest_uid (olabel, max_uid);
6405 struct far_branch *bp = uid_branch[dest_uid];
6407 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6408 the label if the LABEL_NUSES count drops to zero. There is
6409 always a jump_optimize pass that sets these values, but it
6410 proceeds to delete unreferenced code, and then if not
6411 optimizing, to un-delete the deleted instructions, thus
6412 leaving labels with too low uses counts. */
6413 if (! optimize)
6415 JUMP_LABEL (insn) = olabel;
6416 LABEL_NUSES (olabel)++;
6418 if (! bp)
6420 bp = (struct far_branch *) alloca (sizeof *bp);
6421 uid_branch[dest_uid] = bp;
6422 bp->prev = far_branch_list;
6423 far_branch_list = bp;
6424 bp->far_label = as_a <rtx_insn *> (
6425 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6426 0));
6427 LABEL_NUSES (bp->far_label)++;
6429 else
6431 label = bp->near_label;
6432 if (! label && bp->address - addr >= CONDJUMP_MIN)
6434 rtx_insn *block = bp->insert_place;
6436 if (GET_CODE (PATTERN (block)) == RETURN)
6437 block = PREV_INSN (block);
6438 else
6439 block = gen_block_redirect (block,
6440 bp->address, 2);
6441 label = emit_label_after (gen_label_rtx (),
6442 PREV_INSN (block));
6443 bp->near_label = label;
6445 else if (label && ! NEXT_INSN (label))
6447 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6448 bp->insert_place = insn;
6449 else
6450 gen_far_branch (bp);
6453 if (! label
6454 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6456 bp->near_label = label = gen_label_rtx ();
6457 bp->insert_place = insn;
6458 bp->address = addr;
6460 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6461 gcc_assert (ok);
6463 else
6465 /* get_attr_length (insn) == 2 */
6466 /* Check if we have a pattern where reorg wants to redirect
6467 the branch to a label from an unconditional branch that
6468 is too far away. */
6469 /* We can't use JUMP_LABEL here because it might be undefined
6470 when not optimizing. */
6471 /* A syntax error might cause beyond to be NULL_RTX. */
6472 rtx temp = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6473 beyond = next_active_insn (as_a<rtx_insn *> (temp));
6475 if (beyond
6476 && (JUMP_P (beyond)
6477 || ((beyond = next_active_insn (beyond))
6478 && JUMP_P (beyond)))
6479 && GET_CODE (PATTERN (beyond)) == SET
6480 && recog_memoized (beyond) == CODE_FOR_jump_compact
6481 && ((INSN_ADDRESSES
6482 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6483 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6484 > 252 + 258 + 2))
6485 gen_block_redirect (beyond,
6486 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6489 next = next_active_insn (insn);
6491 if (next
6492 && (JUMP_P (next)
6493 || ((next = next_active_insn (next))
6494 && JUMP_P (next)))
6495 && GET_CODE (PATTERN (next)) == SET
6496 && recog_memoized (next) == CODE_FOR_jump_compact
6497 && ((INSN_ADDRESSES
6498 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6499 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6500 > 252 + 258 + 2))
6501 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6503 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6505 int addr = INSN_ADDRESSES (INSN_UID (insn));
6506 rtx_insn *far_label = 0;
6507 int dest_uid = 0;
6508 struct far_branch *bp;
6510 if (type == TYPE_JUMP)
6512 if (CROSSING_JUMP_P (insn))
6514 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6515 insn);
6516 continue;
6519 far_label = as_a <rtx_insn *> (
6520 XEXP (SET_SRC (PATTERN (insn)), 0));
6521 dest_uid = get_dest_uid (far_label, max_uid);
6522 if (! dest_uid)
6524 /* Parse errors can lead to labels outside
6525 the insn stream. */
6526 if (! NEXT_INSN (far_label))
6527 continue;
6529 if (! optimize)
6531 JUMP_LABEL (insn) = far_label;
6532 LABEL_NUSES (far_label)++;
6534 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6535 far_label = 0;
6538 bp = uid_branch[dest_uid];
6539 if (! bp)
6541 bp = (struct far_branch *) alloca (sizeof *bp);
6542 uid_branch[dest_uid] = bp;
6543 bp->prev = far_branch_list;
6544 far_branch_list = bp;
6545 bp->near_label = 0;
6546 bp->far_label = far_label;
6547 if (far_label)
6548 LABEL_NUSES (far_label)++;
6550 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6551 if (addr - bp->address <= CONDJUMP_MAX)
6552 emit_label_after (bp->near_label, PREV_INSN (insn));
6553 else
6555 gen_far_branch (bp);
6556 bp->near_label = 0;
6558 else
6559 bp->near_label = 0;
6560 bp->address = addr;
6561 bp->insert_place = insn;
6562 if (! far_label)
6563 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6564 else
6565 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6568 /* Generate all pending far branches,
6569 and free our references to the far labels. */
6570 while (far_branch_list)
6572 if (far_branch_list->near_label
6573 && ! NEXT_INSN (far_branch_list->near_label))
6574 gen_far_branch (far_branch_list);
6575 if (optimize
6576 && far_branch_list->far_label
6577 && ! --LABEL_NUSES (far_branch_list->far_label))
6578 delete_insn (far_branch_list->far_label);
6579 far_branch_list = far_branch_list->prev;
6582 /* Instruction length information is no longer valid due to the new
6583 instructions that have been generated. */
6584 init_insn_lengths ();
6587 /* Dump out instruction addresses, which is useful for debugging the
6588 constant pool table stuff.
6590 If relaxing, output the label and pseudo-ops used to link together
6591 calls and the instruction which set the registers.
6593 ??? The addresses printed by this routine for insns are nonsense for
6594 insns which are inside of a sequence where none of the inner insns have
6595 variable length. This is because the second pass of shorten_branches
6596 does not bother to update them. */
6597 void
6598 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6599 int noperands ATTRIBUTE_UNUSED)
6601 if (TARGET_DUMPISIZE)
6602 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6604 if (TARGET_RELAX)
6606 if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX))
6608 rtx pattern = PATTERN (insn);
6609 if (GET_CODE (pattern) == PARALLEL)
6610 pattern = XVECEXP (pattern, 0, 0);
6611 switch (GET_CODE (pattern))
6613 case SET:
6614 if (GET_CODE (SET_SRC (pattern)) != CALL
6615 && get_attr_type (insn) != TYPE_SFUNC)
6617 targetm.asm_out.internal_label
6618 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6619 break;
6621 /* FALLTHROUGH */
6622 case CALL:
6623 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6624 CODE_LABEL_NUMBER (XEXP (note, 0)));
6625 break;
6627 default:
6628 gcc_unreachable ();
6634 /* Dump out any constants accumulated in the final pass. These will
6635 only be labels. */
6636 const char *
6637 output_jump_label_table (void)
6639 if (pool_size)
6641 fprintf (asm_out_file, "\t.align 2\n");
6642 for (int i = 0; i < pool_size; i++)
6644 pool_node *p = &pool_vector[i];
6646 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6647 CODE_LABEL_NUMBER (p->label));
6648 output_asm_insn (".long %O0", &p->value);
6650 pool_size = 0;
6653 return "";
6656 /* A full frame looks like:
6658 arg-5
6659 arg-4
6660 [ if current_function_anonymous_args
6661 arg-3
6662 arg-2
6663 arg-1
6664 arg-0 ]
6665 saved-fp
6666 saved-r10
6667 saved-r11
6668 saved-r12
6669 saved-pr
6670 local-n
6672 local-1
6673 local-0 <- fp points here.
6675 Number of bytes pushed for anonymous args, used to pass information
6676 between expand_prologue and expand_epilogue.
6678 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6679 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6680 for an epilogue and a negative value means that it's for a sibcall
6681 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6682 all the registers that are about to be restored, and hence dead. */
6683 static void
6684 output_stack_adjust (int size, rtx reg, int epilogue_p,
6685 HARD_REG_SET *live_regs_mask, bool frame_p)
6687 rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn;
6688 if (size)
6690 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6692 /* This test is bogus, as output_stack_adjust is used to re-align the
6693 stack. */
6694 #if 0
6695 gcc_assert (!(size % align));
6696 #endif
6698 if (CONST_OK_FOR_ADD (size))
6699 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6700 /* Try to do it with two partial adjustments; however, we must make
6701 sure that the stack is properly aligned at all times, in case
6702 an interrupt occurs between the two partial adjustments. */
6703 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6704 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6706 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6707 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6709 else
6711 rtx const_reg;
6712 rtx insn;
6713 int temp = epilogue_p ? 7 : 1;
6714 int i;
6716 /* If TEMP is invalid, we could temporarily save a general
6717 register to MACL. However, there is currently no need
6718 to handle this case, so just die when we see it. */
6719 if (epilogue_p < 0
6720 || current_function_interrupt
6721 || ! call_used_regs[temp] || fixed_regs[temp])
6722 temp = -1;
6723 if (temp < 0 && ! current_function_interrupt && epilogue_p >= 0)
6725 HARD_REG_SET temps = (regs_invalidated_by_call
6726 & ~fixed_reg_set
6727 & savable_regs);
6728 if (epilogue_p > 0)
6730 int nreg = 0;
6731 if (crtl->return_rtx)
6733 machine_mode mode;
6734 mode = GET_MODE (crtl->return_rtx);
6735 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6736 nreg = hard_regno_nregs (FIRST_RET_REG, mode);
6738 for (i = 0; i < nreg; i++)
6739 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6740 if (crtl->calls_eh_return)
6742 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6743 for (i = 0; i <= 3; i++)
6744 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6747 if (epilogue_p <= 0)
6749 for (i = FIRST_PARM_REG;
6750 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6751 CLEAR_HARD_REG_BIT (temps, i);
6752 if (cfun->static_chain_decl != NULL)
6753 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6755 temp = scavenge_reg (&temps);
6757 if (temp < 0 && live_regs_mask)
6759 HARD_REG_SET temps;
6761 temps = *live_regs_mask;
6762 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6763 temp = scavenge_reg (&temps);
6765 if (temp < 0)
6767 rtx adj_reg, tmp_reg, mem;
6769 /* If we reached here, the most likely case is the (sibcall)
6770 epilogue. Put a special push/pop sequence for such case as
6771 the last resort. This looks lengthy but would not be problem
6772 because it seems to be very rare. */
6773 gcc_assert (epilogue_p);
6775 /* ??? There is still the slight possibility that r4 or
6776 r5 have been reserved as fixed registers or assigned
6777 as global registers, and they change during an
6778 interrupt. There are possible ways to handle this:
6780 - If we are adjusting the frame pointer (r14), we can do
6781 with a single temp register and an ordinary push / pop
6782 on the stack.
6783 - Grab any call-used or call-saved registers (i.e. not
6784 fixed or globals) for the temps we need. We might
6785 also grab r14 if we are adjusting the stack pointer.
6786 If we can't find enough available registers, issue
6787 a diagnostic and die - the user must have reserved
6788 way too many registers.
6789 But since all this is rather unlikely to happen and
6790 would require extra testing, we just die if r4 / r5
6791 are not available. */
6792 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6793 && !global_regs[4] && !global_regs[5]);
6795 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6796 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6797 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6798 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6799 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6800 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6801 emit_move_insn (mem, tmp_reg);
6802 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6803 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6804 emit_move_insn (mem, tmp_reg);
6805 emit_move_insn (reg, adj_reg);
6806 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6807 emit_move_insn (adj_reg, mem);
6808 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6809 emit_move_insn (tmp_reg, mem);
6810 /* Tell flow the insns that pop r4/r5 aren't dead. */
6811 emit_use (tmp_reg);
6812 emit_use (adj_reg);
6813 return;
6815 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6817 /* If SIZE is negative, subtract the positive value.
6818 This sometimes allows a constant pool entry to be shared
6819 between prologue and epilogue code. */
6820 if (size < 0)
6822 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6823 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6825 else
6827 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6828 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6830 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6831 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
6832 GEN_INT (size))));
6837 /* Emit the specified insn and mark it as frame related. */
6838 static rtx_insn *
6839 emit_frame_insn (rtx x)
6841 rtx_insn *insn = emit_insn (x);
6842 RTX_FRAME_RELATED_P (insn) = 1;
6843 return insn;
6846 /* Output RTL to push register RN onto the stack. */
6847 static rtx
6848 push (int rn)
6850 rtx x;
6851 if (rn == FPUL_REG)
6852 x = gen_push_fpul ();
6853 else if (rn == FPSCR_REG)
6854 x = gen_push_fpscr ();
6855 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6856 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6858 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6859 return NULL_RTX;
6860 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6862 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6863 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6864 else
6865 x = gen_push (gen_rtx_REG (SImode, rn));
6867 x = emit_frame_insn (x);
6868 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6869 return x;
6872 /* Output RTL to pop register RN from the stack. */
6873 static void
6874 pop (int rn)
6876 rtx x, sp_reg, reg;
6877 if (rn == FPUL_REG)
6878 x = gen_pop_fpul ();
6879 else if (rn == FPSCR_REG)
6880 x = gen_pop_fpscr ();
6881 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6882 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6884 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6885 return;
6886 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6888 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6889 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6890 else
6891 x = gen_pop (gen_rtx_REG (SImode, rn));
6893 x = emit_insn (x);
6895 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6896 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6897 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6898 : SET_DEST (PATTERN (x)));
6899 add_reg_note (x, REG_CFA_RESTORE, reg);
6900 add_reg_note (x, REG_CFA_ADJUST_CFA,
6901 gen_rtx_SET (sp_reg,
6902 plus_constant (SImode, sp_reg,
6903 GET_MODE_SIZE (GET_MODE (reg)))));
6904 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6905 RTX_FRAME_RELATED_P (x) = 1;
6908 /* Generate code to push the regs specified in the mask. */
6909 static void
6910 push_regs (HARD_REG_SET *mask, bool interrupt_handler)
6912 bool skip_fpscr = false;
6914 /* Push PR last; this gives better latencies after the prologue, and
6915 candidates for the return delay slot when there are no general
6916 registers pushed. */
6917 for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6918 i < FIRST_PSEUDO_REGISTER; i++)
6920 /* If this is an interrupt handler, and the SZ bit varies,
6921 and we have to push any floating point register, we need
6922 to switch to the correct precision first. */
6923 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6924 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6926 push (FPSCR_REG);
6927 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), ~*mask);
6928 skip_fpscr = true;
6930 if (i != PR_REG
6931 && (i != FPSCR_REG || ! skip_fpscr)
6932 && TEST_HARD_REG_BIT (*mask, i))
6934 /* If the ISR has RESBANK attribute assigned, don't push any of
6935 the following registers - R0-R14, MACH, MACL and GBR. */
6936 if (! (sh_cfun_resbank_handler_p ()
6937 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6938 || i == MACH_REG
6939 || i == MACL_REG
6940 || i == GBR_REG)))
6941 push (i);
6945 /* Push banked registers last to improve delay slot opportunities. */
6946 if (interrupt_handler)
6948 bool use_movml = false;
6950 if (TARGET_SH2A)
6952 unsigned int count = 0;
6954 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6955 if (TEST_HARD_REG_BIT (*mask, i))
6956 count++;
6957 else
6958 break;
6960 /* Use movml when all banked registers are pushed. */
6961 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6962 use_movml = true;
6965 if (sh_cfun_resbank_handler_p ())
6966 ; /* Do nothing. */
6967 else if (use_movml)
6969 rtx x, mem, reg, set;
6970 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6972 /* We must avoid scheduling multiple store insn with another
6973 insns. */
6974 emit_insn (gen_blockage ());
6975 x = gen_movml_push_banked (sp_reg);
6976 x = emit_frame_insn (x);
6977 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6979 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
6980 reg = gen_rtx_REG (SImode, i);
6981 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6984 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
6985 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6986 emit_insn (gen_blockage ());
6988 else
6989 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6990 if (TEST_HARD_REG_BIT (*mask, i))
6991 push (i);
6994 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6995 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6996 push (PR_REG);
6999 /* Work out the registers which need to be saved, both as a mask and a
7000 count of saved words. Return the count.
7002 If doing a pragma interrupt function, then push all regs used by the
7003 function, and if we call another function (we can tell by looking at PR),
7004 make sure that all the regs it clobbers are safe too. */
7005 static int
7006 calc_live_regs (HARD_REG_SET *live_regs_mask)
7008 unsigned int reg;
7009 tree attrs;
7010 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7011 bool nosave_low_regs;
7013 attrs = DECL_ATTRIBUTES (current_function_decl);
7014 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7015 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7016 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7017 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7019 CLEAR_HARD_REG_SET (*live_regs_mask);
7020 if (TARGET_FPU_DOUBLE && TARGET_FMOVD && interrupt_handler
7021 && df_regs_ever_live_p (FPSCR_REG))
7022 target_flags &= ~MASK_FPU_SINGLE;
7023 /* If we can save a lot of saves by switching to double mode, do that. */
7024 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE)
7025 for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7026 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7027 && (! call_used_regs[reg]
7028 || interrupt_handler)
7029 && ++count > 2)
7031 target_flags &= ~MASK_FPU_SINGLE;
7032 break;
7036 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7037 bool pr_live = (pr_initial
7038 ? (!REG_P (pr_initial)
7039 || REGNO (pr_initial) != (PR_REG))
7040 : df_regs_ever_live_p (PR_REG));
7041 /* For Shcompact, if not optimizing, we end up with a memory reference
7042 using the return address pointer for __builtin_return_address even
7043 though there is no actual need to put the PR register on the stack. */
7044 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7046 /* Force PR to be live if the prologue has to call the SHmedia
7047 argument decoder or register saver. */
7048 bool has_call = pr_live;
7050 int count;
7051 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7053 if (reg == PR_REG
7054 ? pr_live
7055 : interrupt_handler
7056 ? (/* Need to save all the regs ever live. */
7057 (df_regs_ever_live_p (reg)
7058 || (call_used_regs[reg]
7059 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7060 || reg == PIC_OFFSET_TABLE_REGNUM)
7061 && has_call))
7062 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7063 && reg != RETURN_ADDRESS_POINTER_REGNUM
7064 && reg != T_REG && reg != GBR_REG
7065 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7066 /* Push fpscr only on targets which have FPU */
7067 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7068 : (/* Only push those regs which are used and need to be saved. */
7069 (false)
7070 || (df_regs_ever_live_p (reg)
7071 && ((!call_used_regs[reg]
7072 && !(reg != PIC_OFFSET_TABLE_REGNUM
7073 && fixed_regs[reg]
7074 && call_used_or_fixed_reg_p (reg)))
7075 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7076 || (crtl->calls_eh_return
7077 && (reg == EH_RETURN_DATA_REGNO (0)
7078 || reg == EH_RETURN_DATA_REGNO (1)
7079 || reg == EH_RETURN_DATA_REGNO (2)
7080 || reg == EH_RETURN_DATA_REGNO (3)))
7081 || ((reg == MACL_REG || reg == MACH_REG)
7082 && df_regs_ever_live_p (reg)
7083 && sh_cfun_attr_renesas_p ())
7086 SET_HARD_REG_BIT (*live_regs_mask, reg);
7087 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7089 if (TARGET_FPU_DOUBLE && TARGET_FMOVD
7090 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7092 if (FP_REGISTER_P (reg))
7094 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7096 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7097 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7100 else if (XD_REGISTER_P (reg))
7102 /* Must switch to double mode to access these registers. */
7103 target_flags &= ~MASK_FPU_SINGLE;
7107 if (nosave_low_regs && reg == R8_REG)
7108 break;
7111 return count;
7114 /* Code to generate prologue and epilogue sequences */
7116 /* PUSHED is the number of bytes that are being pushed on the
7117 stack for register saves. Return the frame size, padded
7118 appropriately so that the stack stays properly aligned. */
7119 static HOST_WIDE_INT
7120 rounded_frame_size (int pushed)
7122 HOST_WIDE_INT size = get_frame_size ();
7123 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7125 if (ACCUMULATE_OUTGOING_ARGS)
7126 size += crtl->outgoing_args_size;
7128 return ((size + pushed + align - 1) & -align) - pushed;
7131 /* Expand code for the function prologue. */
7132 void
7133 sh_expand_prologue (void)
7135 int save_flags = target_flags;
7136 tree sp_switch_attr
7137 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7139 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7141 /* We have pretend args if we had an object sent partially in registers
7142 and partially on the stack, e.g. a large structure. */
7143 int pretend_args = crtl->args.pretend_args_size;
7144 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7145 && (NPARM_REGS(SImode)
7146 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7147 pretend_args = 0;
7149 output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true);
7150 int stack_usage = pretend_args;
7152 /* Emit the code for SETUP_VARARGS. */
7153 if (cfun->stdarg)
7155 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7157 /* Push arg regs as if they'd been provided by caller in stack. */
7158 for (int i = 0; i < NPARM_REGS(SImode); i++)
7160 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7162 if (i >= (NPARM_REGS(SImode)
7163 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7165 break;
7166 push (rn);
7167 stack_usage += GET_MODE_SIZE (SImode);
7172 /* If we're supposed to switch stacks at function entry, do so now. */
7173 if (sp_switch_attr)
7175 rtx lab, newsrc;
7176 /* The argument specifies a variable holding the address of the
7177 stack the interrupt function should switch to/from at entry/exit. */
7178 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7179 const char* s = ggc_strdup (TREE_STRING_POINTER (arg));
7180 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7182 lab = add_constant (sp_switch, SImode, 0);
7183 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7185 emit_insn (gen_sp_switch_1 (newsrc));
7188 HARD_REG_SET live_regs_mask;
7189 int d = calc_live_regs (&live_regs_mask);
7190 /* ??? Maybe we could save some switching if we can move a mode switch
7191 that already happens to be at the function start into the prologue. */
7192 if (target_flags != save_flags && ! current_function_interrupt)
7193 emit_insn (gen_toggle_sz ());
7195 push_regs (&live_regs_mask, current_function_interrupt);
7196 stack_usage += d;
7198 if (flag_pic && !TARGET_FDPIC
7199 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7200 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7202 if (target_flags != save_flags && ! current_function_interrupt)
7203 emit_insn (gen_toggle_sz ());
7205 target_flags = save_flags;
7207 output_stack_adjust (-rounded_frame_size (d),
7208 stack_pointer_rtx, 0, NULL, true);
7209 stack_usage += rounded_frame_size (d);
7211 if (frame_pointer_needed)
7212 emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7214 /* If we are profiling, make sure no instructions are scheduled before
7215 the call to mcount. Similarly if some call instructions are swapped
7216 before frame related insns, it'll confuse the unwinder because
7217 currently SH has no unwind info for function epilogues. */
7218 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7219 emit_insn (gen_blockage ());
7221 if (flag_stack_usage_info)
7222 current_function_static_stack_size = stack_usage;
7225 /* Expand code for the function epilogue. */
7226 void
7227 sh_expand_epilogue (bool sibcall_p)
7229 int save_flags = target_flags;
7230 bool fpscr_deferred = false;
7231 int e = sibcall_p ? -1 : 1;
7233 HARD_REG_SET live_regs_mask;
7234 int d = calc_live_regs (&live_regs_mask);
7236 int save_size = d;
7237 int frame_size = rounded_frame_size (d);
7239 if (frame_pointer_needed)
7241 /* We must avoid scheduling the epilogue with previous basic blocks.
7242 See PR/18032 and PR/40313. */
7243 emit_insn (gen_blockage ());
7244 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7245 &live_regs_mask, true);
7247 /* We must avoid moving the stack pointer adjustment past code
7248 which reads from the local frame, else an interrupt could
7249 occur after the SP adjustment and clobber data in the local
7250 frame. */
7251 emit_insn (gen_blockage ());
7252 emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7254 else if (frame_size)
7256 /* We must avoid moving the stack pointer adjustment past code
7257 which reads from the local frame, else an interrupt could
7258 occur after the SP adjustment and clobber data in the local
7259 frame. */
7260 emit_insn (gen_blockage ());
7261 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7262 &live_regs_mask, true);
7265 /* Pop all the registers. */
7267 if (target_flags != save_flags && ! current_function_interrupt)
7268 emit_insn (gen_toggle_sz ());
7271 int last_reg;
7273 save_size = 0;
7274 /* For an ISR with RESBANK attribute assigned, don't pop PR
7275 register. */
7276 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7277 && !sh_cfun_resbank_handler_p ())
7279 if (!frame_pointer_needed)
7280 emit_insn (gen_blockage ());
7281 pop (PR_REG);
7284 /* Banked registers are popped first to avoid being scheduled in the
7285 delay slot. RTE switches banks before the ds instruction. */
7286 if (current_function_interrupt)
7288 bool use_movml = false;
7290 if (TARGET_SH2A)
7292 unsigned int count = 0;
7294 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7295 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7296 count++;
7297 else
7298 break;
7300 /* Use movml when all banked register are poped. */
7301 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7302 use_movml = true;
7305 if (sh_cfun_resbank_handler_p ())
7306 ; /* Do nothing. */
7307 else if (use_movml)
7309 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7311 /* We must avoid scheduling multiple load insn with another
7312 insns. */
7313 emit_insn (gen_blockage ());
7314 emit_insn (gen_movml_pop_banked (sp_reg));
7315 emit_insn (gen_blockage ());
7317 else
7318 for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7319 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7320 pop (i);
7322 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7324 else
7325 last_reg = FIRST_PSEUDO_REGISTER;
7327 for (int i = 0; i < last_reg; i++)
7329 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7331 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7332 && hard_reg_set_intersect_p (live_regs_mask,
7333 reg_class_contents[DF_REGS]))
7334 fpscr_deferred = true;
7335 /* For an ISR with RESBANK attribute assigned, don't pop
7336 following registers, R0-R14, MACH, MACL and GBR. */
7337 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7338 && ! (sh_cfun_resbank_handler_p ()
7339 && ((j >= FIRST_GENERAL_REG
7340 && j < LAST_GENERAL_REG)
7341 || j == MACH_REG
7342 || j == MACL_REG
7343 || j == GBR_REG)))
7344 pop (j);
7346 if (j == FIRST_FP_REG && fpscr_deferred)
7347 pop (FPSCR_REG);
7350 if (target_flags != save_flags && ! current_function_interrupt)
7351 emit_insn (gen_toggle_sz ());
7352 target_flags = save_flags;
7354 output_stack_adjust (crtl->args.pretend_args_size + save_size,
7355 stack_pointer_rtx, e, NULL, true);
7357 if (crtl->calls_eh_return)
7358 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7359 EH_RETURN_STACKADJ_RTX));
7361 /* Switch back to the normal stack if necessary. */
7362 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7363 emit_insn (gen_sp_switch_2 ());
7365 /* Tell flow the insn that pops PR isn't dead. */
7366 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7367 emit_use (gen_rtx_REG (SImode, PR_REG));
7370 /* Emit code to change the current function's return address to RA.
7371 TEMP is available as a scratch register, if needed. */
7372 void
7373 sh_set_return_address (rtx ra, rtx tmp)
7375 HARD_REG_SET live_regs_mask;
7376 int d = calc_live_regs (&live_regs_mask);
7378 /* If pr_reg isn't life, we can set it directly. */
7379 if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7381 rtx rr = gen_rtx_REG (SImode, PR_REG);
7382 emit_insn (GEN_MOV (rr, ra));
7383 /* Tell flow the register for return isn't dead. */
7384 emit_use (rr);
7385 return;
7388 int pr_offset = rounded_frame_size (d);
7390 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7392 if (frame_pointer_needed)
7393 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7394 else
7395 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7397 tmp = gen_frame_mem (Pmode, tmp);
7398 emit_insn (GEN_MOV (tmp, ra));
7399 /* Tell this store isn't dead. */
7400 emit_use (tmp);
7403 /* Clear variables at function end. */
7404 static void
7405 sh_output_function_epilogue (FILE *)
7409 static rtx
7410 sh_builtin_saveregs (void)
7412 /* First unnamed integer register. */
7413 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7414 /* Number of integer registers we need to save. */
7415 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7416 /* First unnamed SFmode float reg */
7417 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7418 /* Number of SFmode float regs to save. */
7419 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7420 rtx regbuf, fpregs;
7421 int bufsize, regno;
7422 alias_set_type alias_set;
7424 if (!TARGET_FPU_ANY)
7426 error ("%<__builtin_saveregs%> not supported by this subtarget");
7427 return const0_rtx;
7430 /* Allocate block of memory for the regs. */
7431 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7432 Or can assign_stack_local accept a 0 SIZE argument? */
7433 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7435 if (n_floatregs & 1)
7437 rtx addr;
7439 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7440 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7441 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7442 regbuf = change_address (regbuf, BLKmode, addr);
7444 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7446 rtx addr, mask;
7448 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7449 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
7450 XEXP (regbuf, 0), 4));
7451 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7452 emit_insn (gen_andsi3 (addr, addr, mask));
7453 regbuf = change_address (regbuf, BLKmode, addr);
7455 else
7456 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7457 alias_set = get_varargs_alias_set ();
7458 set_mem_alias_set (regbuf, alias_set);
7460 /* Save int args.
7461 This is optimized to only save the regs that are necessary. Explicitly
7462 named args need not be saved. */
7463 if (n_intregs > 0)
7464 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7465 adjust_address (regbuf, BLKmode,
7466 n_floatregs * UNITS_PER_WORD),
7467 n_intregs);
7469 /* Save float args.
7470 This is optimized to only save the regs that are necessary. Explicitly
7471 named args need not be saved.
7472 We explicitly build a pointer to the buffer because it halves the insn
7473 count when not optimizing (otherwise the pointer is built for each reg
7474 saved).
7475 We emit the moves in reverse order so that we can use predecrement. */
7477 fpregs = copy_to_mode_reg (Pmode,
7478 plus_constant (Pmode, XEXP (regbuf, 0),
7479 n_floatregs * UNITS_PER_WORD));
7480 if (TARGET_FPU_DOUBLE)
7482 rtx mem;
7483 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7485 emit_insn (gen_addsi3 (fpregs, fpregs,
7486 GEN_INT (-2 * UNITS_PER_WORD)));
7487 mem = change_address (regbuf, DFmode, fpregs);
7488 emit_move_insn (mem,
7489 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7491 regno = first_floatreg;
7492 if (regno & 1)
7494 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7495 mem = change_address (regbuf, SFmode, fpregs);
7496 emit_move_insn (mem,
7497 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
7498 + regno - SH_REG_MSW_OFFSET));
7501 else
7502 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7504 rtx mem;
7506 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7507 mem = change_address (regbuf, SFmode, fpregs);
7508 emit_move_insn (mem,
7509 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7512 /* Return the address of the regbuf. */
7513 return XEXP (regbuf, 0);
7516 /* Define the `__builtin_va_list' type for the ABI. */
7517 static tree
7518 sh_build_builtin_va_list (void)
7520 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7521 tree record, type_decl;
7523 if ((! TARGET_SH2E && ! TARGET_SH4)
7524 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7525 return ptr_type_node;
7527 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7528 type_decl = build_decl (BUILTINS_LOCATION,
7529 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7531 f_next_o = build_decl (BUILTINS_LOCATION,
7532 FIELD_DECL, get_identifier ("__va_next_o"),
7533 ptr_type_node);
7534 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7535 FIELD_DECL,
7536 get_identifier ("__va_next_o_limit"),
7537 ptr_type_node);
7538 f_next_fp = build_decl (BUILTINS_LOCATION,
7539 FIELD_DECL, get_identifier ("__va_next_fp"),
7540 ptr_type_node);
7541 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7542 FIELD_DECL,
7543 get_identifier ("__va_next_fp_limit"),
7544 ptr_type_node);
7545 f_next_stack = build_decl (BUILTINS_LOCATION,
7546 FIELD_DECL, get_identifier ("__va_next_stack"),
7547 ptr_type_node);
7549 DECL_FIELD_CONTEXT (f_next_o) = record;
7550 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7551 DECL_FIELD_CONTEXT (f_next_fp) = record;
7552 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7553 DECL_FIELD_CONTEXT (f_next_stack) = record;
7555 TYPE_STUB_DECL (record) = type_decl;
7556 TYPE_NAME (record) = type_decl;
7557 TYPE_FIELDS (record) = f_next_o;
7558 DECL_CHAIN (f_next_o) = f_next_o_limit;
7559 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7560 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7561 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7563 layout_type (record);
7565 return record;
7568 /* Implement `va_start' for varargs and stdarg. */
7569 static void
7570 sh_va_start (tree valist, rtx nextarg)
7572 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7573 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7574 tree t, u;
7575 int nfp, nint;
7577 if ((! TARGET_SH2E && ! TARGET_SH4)
7578 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7580 std_expand_builtin_va_start (valist, nextarg);
7581 return;
7584 f_next_o = TYPE_FIELDS (va_list_type_node);
7585 f_next_o_limit = DECL_CHAIN (f_next_o);
7586 f_next_fp = DECL_CHAIN (f_next_o_limit);
7587 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7588 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7590 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7591 NULL_TREE);
7592 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7593 valist, f_next_o_limit, NULL_TREE);
7594 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7595 NULL_TREE);
7596 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7597 valist, f_next_fp_limit, NULL_TREE);
7598 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7599 valist, f_next_stack, NULL_TREE);
7601 /* Call __builtin_saveregs. */
7602 u = make_tree (sizetype, expand_builtin_saveregs ());
7603 u = fold_convert (ptr_type_node, u);
7604 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7605 TREE_SIDE_EFFECTS (t) = 1;
7606 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7608 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7609 if (nfp < 8)
7610 nfp = 8 - nfp;
7611 else
7612 nfp = 0;
7613 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7614 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7615 TREE_SIDE_EFFECTS (t) = 1;
7616 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7618 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7619 TREE_SIDE_EFFECTS (t) = 1;
7620 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7622 nint = crtl->args.info.arg_count[SH_ARG_INT];
7623 if (nint < 4)
7624 nint = 4 - nint;
7625 else
7626 nint = 0;
7627 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7628 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7629 TREE_SIDE_EFFECTS (t) = 1;
7630 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7632 u = make_tree (ptr_type_node, nextarg);
7633 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7634 TREE_SIDE_EFFECTS (t) = 1;
7635 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7638 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7639 member, return it. */
7640 static tree
7641 find_sole_member (tree type)
7643 tree field, member = NULL_TREE;
7645 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7647 if (TREE_CODE (field) != FIELD_DECL)
7648 continue;
7649 if (!DECL_SIZE (field))
7650 return NULL_TREE;
7651 if (integer_zerop (DECL_SIZE (field)))
7652 continue;
7653 if (member)
7654 return NULL_TREE;
7655 member = field;
7657 return member;
7660 /* Implement `va_arg'. */
7661 static tree
7662 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7663 gimple_seq *post_p ATTRIBUTE_UNUSED)
7665 tree tmp;
7666 tree addr, lab_over = NULL, result = NULL;
7667 tree eff_type;
7669 const bool pass_by_ref
7670 = !VOID_TYPE_P (type) && must_pass_va_arg_in_stack (type);
7672 if (pass_by_ref)
7673 type = build_pointer_type (type);
7675 HOST_WIDE_INT size = int_size_in_bytes (type);
7676 HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7677 tree pptr_type_node = build_pointer_type (ptr_type_node);
7679 if ((TARGET_SH2E || TARGET_SH4)
7680 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7682 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7683 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7684 tree lab_false;
7685 tree member;
7687 f_next_o = TYPE_FIELDS (va_list_type_node);
7688 f_next_o_limit = DECL_CHAIN (f_next_o);
7689 f_next_fp = DECL_CHAIN (f_next_o_limit);
7690 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7691 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7693 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7694 NULL_TREE);
7695 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7696 valist, f_next_o_limit, NULL_TREE);
7697 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7698 valist, f_next_fp, NULL_TREE);
7699 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7700 valist, f_next_fp_limit, NULL_TREE);
7701 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7702 valist, f_next_stack, NULL_TREE);
7704 /* Structures with a single member with a distinct mode are passed
7705 like their member. This is relevant if the latter has a REAL_TYPE
7706 or COMPLEX_TYPE type. */
7707 eff_type = type;
7708 while (TREE_CODE (eff_type) == RECORD_TYPE
7709 && (member = find_sole_member (eff_type))
7710 && (SCALAR_FLOAT_TYPE_P (TREE_TYPE (member))
7711 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7712 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7714 tree field_type = TREE_TYPE (member);
7716 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7717 eff_type = field_type;
7718 else
7720 gcc_assert ((TYPE_ALIGN (eff_type)
7721 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7722 || (TYPE_ALIGN (eff_type)
7723 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7724 break;
7728 bool pass_as_float;
7729 if (TARGET_FPU_DOUBLE)
7731 pass_as_float = ((SCALAR_FLOAT_TYPE_P (eff_type) && size <= 8)
7732 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7733 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (eff_type))
7734 && size <= 16));
7736 else
7738 pass_as_float = (SCALAR_FLOAT_TYPE_P (eff_type) && size == 4);
7741 addr = create_tmp_var (pptr_type_node);
7742 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7743 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7745 valist = build_simple_mem_ref (addr);
7747 if (pass_as_float)
7749 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
7750 tree cmp;
7751 bool is_double = size == 8 && SCALAR_FLOAT_TYPE_P (eff_type);
7753 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7754 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7756 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7757 tmp = next_fp_limit;
7758 if (size > 4 && !is_double)
7759 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
7760 tmp = build2 (GE_EXPR, boolean_type_node,
7761 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7762 cmp = build3 (COND_EXPR, void_type_node, tmp,
7763 build1 (GOTO_EXPR, void_type_node,
7764 unshare_expr (lab_false)), NULL_TREE);
7765 if (!is_double)
7766 gimplify_and_add (cmp, pre_p);
7768 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7769 || (is_double || size == 16))
7771 tmp = fold_convert (sizetype, next_fp_tmp);
7772 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7773 size_int (UNITS_PER_WORD));
7774 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
7775 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7777 if (is_double)
7778 gimplify_and_add (cmp, pre_p);
7780 #ifdef FUNCTION_ARG_SCmode_WART
7781 if (TYPE_MODE (eff_type) == SCmode
7782 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7784 tree subtype = TREE_TYPE (eff_type);
7785 tree real, imag;
7787 imag
7788 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7789 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7791 real
7792 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7793 real = get_initialized_tmp_var (real, pre_p, NULL);
7795 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7796 if (type != eff_type)
7797 result = build1 (VIEW_CONVERT_EXPR, type, result);
7798 result = get_initialized_tmp_var (result, pre_p, NULL);
7800 #endif /* FUNCTION_ARG_SCmode_WART */
7802 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7803 gimplify_and_add (tmp, pre_p);
7805 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7806 gimplify_and_add (tmp, pre_p);
7808 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7809 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7810 gimplify_assign (unshare_expr (next_fp_tmp),
7811 unshare_expr (valist), pre_p);
7813 gimplify_assign (unshare_expr (valist),
7814 unshare_expr (next_fp_tmp), post_p);
7815 valist = next_fp_tmp;
7817 else
7819 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
7820 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7821 unshare_expr (next_o_limit));
7822 tmp = build3 (COND_EXPR, void_type_node, tmp,
7823 build1 (GOTO_EXPR, void_type_node,
7824 unshare_expr (lab_false)),
7825 NULL_TREE);
7826 gimplify_and_add (tmp, pre_p);
7828 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7829 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7831 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7832 gimplify_and_add (tmp, pre_p);
7834 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7835 gimplify_and_add (tmp, pre_p);
7837 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7838 gimplify_assign (unshare_expr (next_o),
7839 unshare_expr (next_o_limit), pre_p);
7841 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7842 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7845 if (!result)
7847 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7848 gimplify_and_add (tmp, pre_p);
7852 /* ??? In va-sh.h, there had been code to make values larger than
7853 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7855 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7856 if (result)
7858 gimplify_assign (result, tmp, pre_p);
7859 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7860 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7861 gimplify_and_add (tmp, pre_p);
7863 else
7864 result = tmp;
7866 if (pass_by_ref)
7867 result = build_va_arg_indirect_ref (result);
7869 return result;
7872 /* 64 bit floating points memory transfers are paired single precision loads
7873 or store. So DWARF information needs fixing in little endian (unless
7874 PR=SZ=1 in FPSCR). */
7876 sh_dwarf_register_span (rtx reg)
7878 unsigned regno = REGNO (reg);
7880 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7881 return NULL_RTX;
7883 return
7884 gen_rtx_PARALLEL (VOIDmode,
7885 gen_rtvec (2,
7886 gen_rtx_REG (SFmode, regno + 1),
7887 gen_rtx_REG (SFmode, regno)));
7890 static machine_mode
7891 sh_promote_function_mode (const_tree type, machine_mode mode,
7892 int *punsignedp, const_tree funtype,
7893 int for_return)
7895 if (sh_promote_prototypes (funtype))
7896 return promote_mode (type, mode, punsignedp);
7897 else
7898 return default_promote_function_mode (type, mode, punsignedp, funtype,
7899 for_return);
7902 static bool
7903 sh_promote_prototypes (const_tree type)
7905 if (TARGET_HITACHI)
7906 return false;
7907 if (! type)
7908 return true;
7909 return ! sh_attr_renesas_p (type);
7912 static bool
7913 sh_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
7915 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7917 if (targetm.calls.must_pass_in_stack (arg))
7918 return true;
7920 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7921 wants to know about pass-by-reference semantics for incoming
7922 arguments. */
7923 if (! cum)
7924 return false;
7926 return false;
7929 static bool
7930 sh_callee_copies (cumulative_args_t cum, const function_arg_info &arg)
7932 /* ??? How can it possibly be correct to return true only on the
7933 caller side of the equation? Is there someplace else in the
7934 sh backend that's magically producing the copies? */
7935 return (get_cumulative_args (cum)->outgoing
7936 && ((arg.mode == BLKmode
7937 ? TYPE_ALIGN (arg.type)
7938 : GET_MODE_ALIGNMENT (arg.mode))
7939 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7942 static sh_arg_class
7943 get_sh_arg_class (machine_mode mode)
7945 if (TARGET_FPU_ANY && mode == SFmode)
7946 return SH_ARG_FLOAT;
7948 if (TARGET_FPU_DOUBLE
7949 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7950 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT))
7951 return SH_ARG_FLOAT;
7953 return SH_ARG_INT;
7956 /* Round a register number up to a proper boundary for an arg of mode
7957 MODE.
7958 The SH doesn't care about double alignment, so we only
7959 round doubles to even regs when asked to explicitly. */
7960 static int
7961 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
7963 /* FIXME: This used to be a macro and has been copy pasted into this
7964 function as is. Make this more readable. */
7965 return
7966 (((TARGET_ALIGN_DOUBLE
7967 || (TARGET_FPU_DOUBLE
7968 && (mode == DFmode || mode == DCmode)
7969 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
7970 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
7971 ? (cum.arg_count[(int) get_sh_arg_class (mode)]
7972 + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1))
7973 : cum.arg_count[(int) get_sh_arg_class (mode)]);
7976 /* Return true if arg of the specified mode should be passed in a register
7977 or false otherwise. */
7978 static bool
7979 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
7980 const_tree type)
7982 /* FIXME: This used to be a macro and has been copy pasted into this
7983 function as is. Make this more readable. */
7984 return
7985 ((type == 0
7986 || (! TREE_ADDRESSABLE (type)
7987 && (! (TARGET_HITACHI || cum.renesas_abi)
7988 || ! (AGGREGATE_TYPE_P (type)
7989 || (!TARGET_FPU_ANY
7990 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7991 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
7992 && ! cum.force_mem
7993 && (TARGET_SH2E
7994 ? ((mode) == BLKmode
7995 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
7996 + int_size_in_bytes (type))
7997 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
7998 : ((sh_round_reg (cum, mode)
7999 + sh_hard_regno_nregs (BASE_ARG_REG (mode), mode))
8000 <= NPARM_REGS (mode)))
8001 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
8004 static int
8005 sh_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
8007 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8008 int words = 0;
8010 if (sh_pass_in_reg_p (*cum, arg.mode, arg.type)
8011 && !TARGET_FPU_DOUBLE
8012 && (sh_round_reg (*cum, arg.mode)
8013 + CEIL (arg.promoted_size_in_bytes (), UNITS_PER_WORD)
8014 > NPARM_REGS (arg.mode)))
8015 words = NPARM_REGS (arg.mode) - sh_round_reg (*cum, arg.mode);
8017 return words * UNITS_PER_WORD;
8021 /* Define where to put the arguments to a function.
8022 Value is zero to push the argument on the stack,
8023 or a hard register in which to store the argument.
8025 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8026 the preceding args and about the function being called.
8027 ARG is a description of the argument.
8029 On SH the first args are normally in registers
8030 and the rest are pushed. Any arg that starts within the first
8031 NPARM_REGS words is at least partially passed in a register unless
8032 its data type forbids. */
8033 static rtx
8034 sh_function_arg (cumulative_args_t ca_v, const function_arg_info &arg)
8036 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8037 machine_mode mode = arg.mode;
8039 if (arg.end_marker_p ())
8040 return ca->renesas_abi ? const1_rtx : const0_rtx;
8042 if (sh_pass_in_reg_p (*ca, mode, arg.type)
8043 && (arg.named || ! (TARGET_HITACHI || ca->renesas_abi)))
8045 int regno;
8047 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8048 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8050 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8051 gen_rtx_REG (SFmode,
8052 BASE_ARG_REG (mode)
8053 + (sh_round_reg (*ca, mode) ^ 1)),
8054 const0_rtx);
8055 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8056 gen_rtx_REG (SFmode,
8057 BASE_ARG_REG (mode)
8058 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8059 GEN_INT (4));
8060 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8063 /* If the alignment of a DF value causes an SF register to be
8064 skipped, we will use that skipped register for the next SF
8065 value. */
8066 if ((TARGET_HITACHI || ca->renesas_abi)
8067 && ca->free_single_fp_reg
8068 && mode == SFmode)
8069 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8071 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8072 ^ (mode == SFmode && TARGET_SH4
8073 && TARGET_LITTLE_ENDIAN
8074 && ! TARGET_HITACHI && ! ca->renesas_abi);
8075 return gen_rtx_REG (mode, regno);
8079 return NULL_RTX;
8082 /* Update the data in CUM to advance over argument ARG. */
8083 static void
8084 sh_function_arg_advance (cumulative_args_t ca_v,
8085 const function_arg_info &arg)
8087 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8089 if (ca->force_mem)
8090 ca->force_mem = false;
8092 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8094 /* Note that we've used the skipped register. */
8095 if (arg.mode == SFmode && ca->free_single_fp_reg)
8097 ca->free_single_fp_reg = 0;
8098 return;
8100 /* When we have a DF after an SF, there's an SF register that get
8101 skipped in order to align the DF value. We note this skipped
8102 register, because the next SF value will use it, and not the
8103 SF that follows the DF. */
8104 if (arg.mode == DFmode
8105 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
8107 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
8108 + BASE_ARG_REG (arg.mode));
8112 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8113 || sh_pass_in_reg_p (*ca, arg.mode, arg.type))
8114 (ca->arg_count[(int) get_sh_arg_class (arg.mode)]
8115 = (sh_round_reg (*ca, arg.mode)
8116 + CEIL (arg.promoted_size_in_bytes (), UNITS_PER_WORD)));
8119 /* The Renesas calling convention doesn't quite fit into this scheme since
8120 the address is passed like an invisible argument, but one that is always
8121 passed in memory. */
8122 static rtx
8123 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8125 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8126 return NULL_RTX;
8127 return gen_rtx_REG (Pmode, 2);
8130 /* Worker function for TARGET_FUNCTION_VALUE.
8132 For the SH, this is like LIBCALL_VALUE, except that we must change the
8133 mode like PROMOTE_MODE does.
8134 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8135 tested here has to be kept in sync with the one in
8136 explow.cc:promote_mode. */
8137 static rtx
8138 sh_function_value (const_tree valtype,
8139 const_tree fn_decl_or_type,
8140 bool outgoing ATTRIBUTE_UNUSED)
8142 if (fn_decl_or_type
8143 && !DECL_P (fn_decl_or_type))
8144 fn_decl_or_type = NULL;
8146 return gen_rtx_REG (
8147 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8148 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8149 && (TREE_CODE (valtype) == INTEGER_TYPE
8150 || TREE_CODE (valtype) == ENUMERAL_TYPE
8151 || TREE_CODE (valtype) == BOOLEAN_TYPE
8152 || SCALAR_FLOAT_TYPE_P (valtype)
8153 || TREE_CODE (valtype) == OFFSET_TYPE))
8154 && sh_promote_prototypes (fn_decl_or_type)
8155 ? SImode : TYPE_MODE (valtype)),
8156 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8159 /* Worker function for TARGET_LIBCALL_VALUE. */
8160 static rtx
8161 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8163 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8166 /* Return true if N is a possible register number of function value. */
8167 static bool
8168 sh_function_value_regno_p (const unsigned int regno)
8170 return regno == FIRST_RET_REG || (TARGET_SH2E && regno == FIRST_FP_RET_REG);
8173 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8174 static bool
8175 sh_return_in_memory (const_tree type, const_tree fndecl)
8177 return TYPE_MODE (type) == BLKmode
8178 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8179 && TREE_CODE (type) == RECORD_TYPE);
8182 /* We actually emit the code in sh_expand_prologue. We used to use
8183 a static variable to flag that we need to emit this code, but that
8184 doesn't when inlining, when functions are deferred and then emitted
8185 later. Fortunately, we already have two flags that are part of struct
8186 function that tell if a function uses varargs or stdarg. */
8187 static void
8188 sh_setup_incoming_varargs (cumulative_args_t ca,
8189 const function_arg_info &arg,
8190 int *pretend_arg_size,
8191 int second_time ATTRIBUTE_UNUSED)
8193 gcc_assert (cfun->stdarg);
8194 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8196 int named_parm_regs = 0, anon_parm_regs;
8198 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)))
8199 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), arg.mode)
8200 + CEIL (arg.promoted_size_in_bytes (),
8201 UNITS_PER_WORD));
8202 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8203 if (anon_parm_regs > 0)
8204 *pretend_arg_size = anon_parm_regs * 4;
8208 static bool
8209 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8211 return false;
8214 static bool
8215 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8217 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8219 return ! (TARGET_HITACHI || ca->renesas_abi);
8223 /* Define the offset between two registers, one to be eliminated, and
8224 the other its replacement, at the start of a routine. */
8226 initial_elimination_offset (int from, int to)
8228 const int regs_saved_rounding = 0;
8229 int save_flags = target_flags;
8230 HARD_REG_SET live_regs_mask;
8232 int regs_saved = calc_live_regs (&live_regs_mask);
8234 int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8235 target_flags = save_flags;
8237 int total_saved_regs_space = regs_saved + regs_saved_rounding;
8239 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8240 return total_saved_regs_space + total_auto_space;
8242 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8243 return total_saved_regs_space + total_auto_space;
8245 /* Initial gap between fp and sp is 0. */
8246 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8247 return 0;
8249 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8250 return rounded_frame_size (0);
8252 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8253 return rounded_frame_size (0);
8255 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8256 && (to == HARD_FRAME_POINTER_REGNUM
8257 || to == STACK_POINTER_REGNUM));
8258 return total_auto_space;
8261 /* Parse the -mfixed-range= option string. */
8262 void
8263 sh_fix_range (const char *const_str)
8265 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8266 REG2 are either register names or register numbers. The effect
8267 of this option is to mark the registers in the range from REG1 to
8268 REG2 as ``fixed'' so they won't be used by the compiler. */
8270 char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str);
8272 while (1)
8274 char* dash = strchr (str, '-');
8275 if (!dash)
8277 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
8278 return;
8280 *dash = '\0';
8281 char* comma = strchr (dash + 1, ',');
8282 if (comma)
8283 *comma = '\0';
8285 int first = decode_reg_name (str);
8286 if (first < 0)
8288 warning (0, "unknown register name: %s", str);
8289 return;
8292 int last = decode_reg_name (dash + 1);
8293 if (last < 0)
8295 warning (0, "unknown register name: %s", dash + 1);
8296 return;
8299 *dash = '-';
8301 if (first > last)
8303 warning (0, "%s-%s is an empty range", str, dash + 1);
8304 return;
8307 for (int i = first; i <= last; ++i)
8308 fixed_regs[i] = 1;
8310 if (!comma)
8311 break;
8313 *comma = ',';
8314 str = comma + 1;
8318 /* Insert any deferred function attributes from earlier pragmas. */
8319 static void
8320 sh_insert_attributes (tree node, tree *attributes)
8322 if (TREE_CODE (node) != FUNCTION_DECL)
8323 return;
8325 /* We are only interested in fields. */
8326 if (!DECL_P (node))
8327 return;
8329 /* Append the attributes to the deferred attributes. */
8330 *sh_deferred_function_attributes_tail = *attributes;
8331 tree attrs = sh_deferred_function_attributes;
8332 if (!attrs)
8333 return;
8335 /* Some attributes imply or require the interrupt attribute. */
8336 if (!lookup_attribute ("interrupt_handler", attrs)
8337 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8339 /* If we have a trapa_handler, but no interrupt_handler attribute,
8340 insert an interrupt_handler attribute. */
8341 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8342 /* We can't use sh_pr_interrupt here because that's not in the
8343 java frontend. */
8344 attrs
8345 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8346 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8347 if the interrupt attribute is missing, we ignore the attribute
8348 and warn. */
8349 else if (lookup_attribute ("sp_switch", attrs)
8350 || lookup_attribute ("trap_exit", attrs)
8351 || lookup_attribute ("nosave_low_regs", attrs)
8352 || lookup_attribute ("resbank", attrs))
8354 tree *tail;
8356 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8358 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8359 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8360 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8361 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8362 warning (OPT_Wattributes,
8363 "%qE attribute only applies to interrupt functions",
8364 TREE_PURPOSE (attrs));
8365 else
8367 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8368 NULL_TREE);
8369 tail = &TREE_CHAIN (*tail);
8372 attrs = *attributes;
8376 /* Install the processed list. */
8377 *attributes = attrs;
8379 /* Clear deferred attributes. */
8380 sh_deferred_function_attributes = NULL_TREE;
8381 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8383 return;
8386 /*------------------------------------------------------------------------------
8387 Target specific attributes
8388 Supported attributes are:
8390 * interrupt_handler
8391 Specifies this function is an interrupt handler.
8393 * trapa_handler
8394 Like interrupt_handler, but don't save all registers.
8396 * sp_switch
8397 Specifies an alternate stack for an interrupt handler to run on.
8399 * trap_exit
8400 Use a trapa to exit an interrupt function instead of rte.
8402 * nosave_low_regs
8403 Don't save r0..r7 in an interrupt handler function.
8404 This is useful on SH3* and SH4*, which have a separate set of low
8405 regs for user and privileged modes.
8406 This is mainly to be used for non-reentrant interrupt handlers (i.e.
8407 those that run with interrupts disabled and thus can't be
8408 interrupted thenselves).
8410 * renesas
8411 Use Renesas calling/layout conventions (functions and structures).
8413 * resbank
8414 In case of an interrupt handler function, use a register bank to
8415 save registers R0-R14, MACH, MACL, GBR and PR.
8416 This is available only on SH2A targets.
8418 * function_vector
8419 Declares a function to be called using the TBR relative addressing
8420 mode. Takes an argument that specifies the slot number in the table
8421 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
8424 /* Handle a 'resbank' attribute. */
8425 static tree
8426 sh_handle_resbank_handler_attribute (tree * node, tree name,
8427 tree args ATTRIBUTE_UNUSED,
8428 int flags ATTRIBUTE_UNUSED,
8429 bool * no_add_attrs)
8431 if (!TARGET_SH2A)
8433 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8434 name);
8435 *no_add_attrs = true;
8437 if (TREE_CODE (*node) != FUNCTION_DECL)
8439 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8440 name);
8441 *no_add_attrs = true;
8444 return NULL_TREE;
8447 /* Handle an "interrupt_handler" attribute; arguments as in
8448 struct attribute_spec.handler. */
8449 static tree
8450 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8451 tree args ATTRIBUTE_UNUSED,
8452 int flags ATTRIBUTE_UNUSED,
8453 bool *no_add_attrs)
8455 if (TREE_CODE (*node) != FUNCTION_DECL)
8457 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8458 name);
8459 *no_add_attrs = true;
8462 return NULL_TREE;
8465 /* Handle an 'function_vector' attribute; arguments as in
8466 struct attribute_spec.handler. */
8467 static tree
8468 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8469 tree args ATTRIBUTE_UNUSED,
8470 int flags ATTRIBUTE_UNUSED,
8471 bool * no_add_attrs)
8473 if (!TARGET_SH2A)
8475 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8476 name);
8477 *no_add_attrs = true;
8479 else if (TREE_CODE (*node) != FUNCTION_DECL)
8481 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8482 name);
8483 *no_add_attrs = true;
8485 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8487 /* The argument must be a constant integer. */
8488 warning (OPT_Wattributes,
8489 "%qE attribute argument not an integer constant",
8490 name);
8491 *no_add_attrs = true;
8493 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8495 /* The argument value must be between 0 to 255. */
8496 warning (OPT_Wattributes,
8497 "%qE attribute argument should be between 0 to 255",
8498 name);
8499 *no_add_attrs = true;
8501 return NULL_TREE;
8504 /* Returns true if current function has been assigned the attribute
8505 'function_vector'. */
8506 bool
8507 sh2a_is_function_vector_call (rtx x)
8509 if (GET_CODE (x) == SYMBOL_REF
8510 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8512 tree tr = SYMBOL_REF_DECL (x);
8514 if (sh2a_function_vector_p (tr))
8515 return true;
8518 return false;
8521 /* Returns the function vector number, if the attribute
8522 'function_vector' is assigned, otherwise returns zero. */
8524 sh2a_get_function_vector_number (rtx x)
8526 if ((GET_CODE (x) == SYMBOL_REF)
8527 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8529 tree t = SYMBOL_REF_DECL (x);
8531 if (TREE_CODE (t) != FUNCTION_DECL)
8532 return 0;
8534 for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list))
8535 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8536 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8538 return 0;
8540 else
8541 return 0;
8544 /* Handle an "sp_switch" attribute; arguments as in
8545 struct attribute_spec.handler. */
8546 static tree
8547 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8548 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8550 if (TREE_CODE (*node) != FUNCTION_DECL)
8552 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8553 name);
8554 *no_add_attrs = true;
8556 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8558 /* The argument must be a constant string. */
8559 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8560 name);
8561 *no_add_attrs = true;
8564 return NULL_TREE;
8567 /* Handle an "trap_exit" attribute; arguments as in
8568 struct attribute_spec.handler. */
8569 static tree
8570 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8571 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8573 if (TREE_CODE (*node) != FUNCTION_DECL)
8575 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8576 name);
8577 *no_add_attrs = true;
8579 /* The argument specifies a trap number to be used in a trapa instruction
8580 at function exit (instead of an rte instruction). */
8581 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8583 /* The argument must be a constant integer. */
8584 warning (OPT_Wattributes, "%qE attribute argument not an "
8585 "integer constant", name);
8586 *no_add_attrs = true;
8589 return NULL_TREE;
8592 static tree
8593 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8594 tree name ATTRIBUTE_UNUSED,
8595 tree args ATTRIBUTE_UNUSED,
8596 int flags ATTRIBUTE_UNUSED,
8597 bool *no_add_attrs ATTRIBUTE_UNUSED)
8599 return NULL_TREE;
8602 /* True if __attribute__((renesas)) or -mrenesas. */
8603 bool
8604 sh_attr_renesas_p (const_tree td)
8606 if (TARGET_HITACHI)
8607 return true;
8608 if (td == NULL_TREE)
8609 return false;
8610 if (DECL_P (td))
8611 td = TREE_TYPE (td);
8612 if (td == error_mark_node)
8613 return false;
8614 return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE;
8617 /* True if __attribute__((renesas)) or -mrenesas, for the current
8618 function. */
8619 bool
8620 sh_cfun_attr_renesas_p (void)
8622 return sh_attr_renesas_p (current_function_decl);
8625 /* Returns true if the current function has the "interrupt_handler"
8626 attribute set. */
8627 bool
8628 sh_cfun_interrupt_handler_p (void)
8630 return (lookup_attribute ("interrupt_handler",
8631 DECL_ATTRIBUTES (current_function_decl))
8632 != NULL_TREE);
8635 /* Returns true if FUNC has been assigned the attribute
8636 "function_vector". */
8637 bool
8638 sh2a_function_vector_p (tree func)
8640 if (TREE_CODE (func) != FUNCTION_DECL)
8641 return false;
8643 for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
8644 if (is_attribute_p ("function_vector", get_attribute_name (list)))
8645 return true;
8647 return false;
8650 /* Returns true if given tree has the "resbank" attribute set. */
8651 bool
8652 sh_cfun_resbank_handler_p (void)
8654 return ((lookup_attribute ("resbank",
8655 DECL_ATTRIBUTES (current_function_decl))
8656 != NULL_TREE)
8657 && (lookup_attribute ("interrupt_handler",
8658 DECL_ATTRIBUTES (current_function_decl))
8659 != NULL_TREE) && TARGET_SH2A);
8662 /* Returns true if the current function has a "trap_exit" attribute set. */
8663 bool
8664 sh_cfun_trap_exit_p (void)
8666 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
8667 != NULL_TREE;
8670 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8671 static const char *
8672 sh_check_pch_target_flags (int old_flags)
8674 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8675 | MASK_SH_E | MASK_HARD_SH4
8676 | MASK_FPU_SINGLE | MASK_SH4))
8677 return _("created and used with different architectures / ABIs");
8678 if ((old_flags ^ target_flags) & MASK_HITACHI)
8679 return _("created and used with different ABIs");
8680 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8681 return _("created and used with different endianness");
8682 return NULL;
8685 /* Predicates used by the templates. */
8687 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
8688 Used only in general_movsrc_operand. */
8689 bool
8690 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8692 switch (REGNO (op))
8694 case PR_REG:
8695 case MACL_REG:
8696 case MACH_REG:
8697 return true;
8699 return false;
8702 /* Returns true if OP is a floating point value with value 0.0. */
8703 bool
8704 fp_zero_operand (rtx op)
8706 if (GET_MODE (op) != SFmode)
8707 return false;
8709 const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op);
8710 return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r);
8713 /* Returns true if OP is a floating point value with value 1.0. */
8714 bool
8715 fp_one_operand (rtx op)
8717 if (GET_MODE (op) != SFmode)
8718 return false;
8720 return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1);
8723 /* Return the TLS type for TLS symbols. */
8724 enum tls_model
8725 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8727 if (GET_CODE (op) != SYMBOL_REF)
8728 return TLS_MODEL_NONE;
8729 return SYMBOL_REF_TLS_MODEL (op);
8732 /* Return the destination address of a branch. */
8733 static int
8734 branch_dest (rtx branch)
8736 rtx dest = SET_SRC (PATTERN (branch));
8738 if (GET_CODE (dest) == IF_THEN_ELSE)
8739 dest = XEXP (dest, 1);
8741 return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0)));
8744 /* Return nonzero if REG is not used after INSN.
8745 We assume REG is a reload reg, and therefore does
8746 not live past labels. It may live past calls or jumps though. */
8747 bool
8748 reg_unused_after (rtx reg, rtx_insn *insn)
8750 /* If the reg is set by this instruction, then it is safe for our
8751 case. Disregard the case where this is a store to memory, since
8752 we are checking a register used in the store address. */
8753 rtx set = single_set (insn);
8754 if (set && !MEM_P (SET_DEST (set))
8755 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8756 return true;
8758 while ((insn = NEXT_INSN (insn)))
8760 if (!INSN_P (insn))
8761 continue;
8763 rtx_code code = GET_CODE (insn);
8765 #if 0
8766 /* If this is a label that existed before reload, then the register
8767 is dead here. However, if this is a label added by reorg, then
8768 the register may still be live here. We can't tell the difference,
8769 so we just ignore labels completely. */
8770 if (code == CODE_LABEL)
8771 return 1;
8772 /* else */
8773 #endif
8775 if (code == JUMP_INSN)
8776 return false;
8778 /* If this is a sequence, we must handle them all at once.
8779 We could have for instance a call that sets the target register,
8780 and an insn in a delay slot that uses the register. In this case,
8781 we must return 0. */
8782 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8784 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
8785 bool retval = false;
8787 for (int i = 0; i < seq->len (); i++)
8789 rtx_insn *this_insn = seq->insn (i);
8790 rtx set = single_set (this_insn);
8792 if (CALL_P (this_insn))
8793 code = CALL_INSN;
8794 else if (JUMP_P (this_insn))
8796 if (INSN_ANNULLED_BRANCH_P (this_insn))
8797 return false;
8798 code = JUMP_INSN;
8801 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8802 return false;
8803 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8805 if (!MEM_P (SET_DEST (set)))
8806 retval = true;
8807 else
8808 return false;
8810 if (set == NULL_RTX
8811 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8812 return false;
8814 if (retval)
8815 return true;
8816 else if (code == JUMP_INSN)
8817 return false;
8820 rtx set = single_set (insn);
8821 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8822 return false;
8823 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8824 return !MEM_P (SET_DEST (set));
8825 if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8826 return false;
8828 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
8829 return true;
8831 return true;
8835 static GTY(()) rtx t_reg_rtx;
8837 get_t_reg_rtx (void)
8839 if (! t_reg_rtx)
8840 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
8841 return t_reg_rtx;
8844 static GTY(()) tree fpscr_values;
8846 static void
8847 emit_fpu_switch (rtx scratch, int index)
8849 if (fpscr_values == NULL)
8851 tree t = build_index_type (integer_one_node);
8852 t = build_array_type (integer_type_node, t);
8853 t = build_decl (BUILTINS_LOCATION,
8854 VAR_DECL, get_identifier ("__fpscr_values"), t);
8855 DECL_ARTIFICIAL (t) = 1;
8856 DECL_IGNORED_P (t) = 1;
8857 DECL_EXTERNAL (t) = 1;
8858 TREE_STATIC (t) = 1;
8859 TREE_PUBLIC (t) = 1;
8860 TREE_USED (t) = 1;
8862 fpscr_values = t;
8865 rtx src = DECL_RTL (fpscr_values);
8866 if (!can_create_pseudo_p ())
8868 emit_move_insn (scratch, XEXP (src, 0));
8869 if (index != 0)
8870 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8871 src = adjust_automodify_address (src, SImode, scratch, index * 4);
8873 else
8874 src = adjust_address (src, SImode, index * 4);
8876 emit_insn (gen_lds_fpscr (src));
8879 static rtx get_free_reg (HARD_REG_SET);
8881 /* This function returns a register to use to load the address to load
8882 the fpscr from. Currently it always returns r1 or r7, but when we are
8883 able to use pseudo registers after combine, or have a better mechanism
8884 for choosing a register, it should be done here. */
8885 /* REGS_LIVE is the liveness information for the point for which we
8886 need this allocation. In some bare-bones exit blocks, r1 is live at the
8887 start. We can even have all of r0..r3 being live:
8888 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8889 INSN before which new insns are placed with will clobber the register
8890 we return. If a basic block consists only of setting the return value
8891 register to a pseudo and using that register, the return value is not
8892 live before or after this block, yet we we'll insert our insns right in
8893 the middle. */
8894 static rtx
8895 get_free_reg (HARD_REG_SET regs_live)
8897 if (! TEST_HARD_REG_BIT (regs_live, 1))
8898 return gen_rtx_REG (Pmode, 1);
8900 /* Hard reg 1 is live; since this is a small register classes target,
8901 there shouldn't be anything but a jump before the function end. */
8902 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8903 return gen_rtx_REG (Pmode, 7);
8906 /* This function will set the fpscr from memory.
8907 MODE is the mode we are setting it to. */
8908 void
8909 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8911 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
8912 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8914 rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8915 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8918 /* Is the given character a logical line separator for the assembler? */
8919 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8920 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8921 #endif
8923 static bool
8924 sequence_insn_p (rtx_insn *insn)
8926 rtx_insn* prev = PREV_INSN (insn);
8927 if (prev == NULL)
8928 return false;
8930 rtx_insn* next = NEXT_INSN (prev);
8931 if (next == NULL)
8932 return false;
8934 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
8938 sh_insn_length_adjustment (rtx_insn *insn)
8940 /* Instructions with unfilled delay slots take up an extra two bytes for
8941 the nop in the delay slot. */
8942 if (((NONJUMP_INSN_P (insn)
8943 && GET_CODE (PATTERN (insn)) != USE
8944 && GET_CODE (PATTERN (insn)) != CLOBBER)
8945 || CALL_P (insn) || JUMP_P (insn))
8946 && ! sequence_insn_p (insn)
8947 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8948 return 2;
8950 /* Increase the insn length of a cbranch without a delay slot insn to
8951 force a delay slot which will be stuffed with a nop. */
8952 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
8953 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
8954 && ! sequence_insn_p (insn))
8955 return 2;
8957 /* sh-dsp parallel processing insn take four bytes instead of two. */
8959 if (NONJUMP_INSN_P (insn))
8961 int sum = 0;
8962 rtx body = PATTERN (insn);
8963 const char *templ;
8964 char c;
8965 bool maybe_label = true;
8967 if (GET_CODE (body) == ASM_INPUT)
8968 templ = XSTR (body, 0);
8969 else if (asm_noperands (body) >= 0)
8970 templ
8971 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8972 else
8973 return 0;
8976 int ppi_adjust = 0;
8979 c = *templ++;
8980 while (c == ' ' || c == '\t');
8981 /* all sh-dsp parallel-processing insns start with p.
8982 The only non-ppi sh insn starting with p is pref.
8983 The only ppi starting with pr is prnd. */
8984 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8985 ppi_adjust = 2;
8986 /* The repeat pseudo-insn expands two three insns, a total of
8987 six bytes in size. */
8988 else if ((c == 'r' || c == 'R')
8989 && ! strncasecmp ("epeat", templ, 5))
8990 ppi_adjust = 4;
8991 while (c && c != '\n'
8992 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8994 /* If this is a label, it is obviously not a ppi insn. */
8995 if (c == ':' && maybe_label)
8997 ppi_adjust = 0;
8998 break;
9000 else if (c == '\'' || c == '"')
9001 maybe_label = false;
9002 c = *templ++;
9004 sum += ppi_adjust;
9005 maybe_label = c != ':';
9007 while (c);
9008 return sum;
9010 return 0;
9013 /* Return TRUE for a valid displacement for the REG+disp addressing
9014 with MODE. */
9015 bool
9016 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
9017 bool allow_zero)
9019 if (! CONST_INT_P (op))
9020 return false;
9023 const HOST_WIDE_INT offset = INTVAL (op);
9024 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
9025 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
9027 /* If the mode does not support any displacement always return false.
9028 Even though an index of '0' is actually always valid, it will cause
9029 troubles when e.g. a DFmode move is split into two SFmode moves,
9030 where one SFmode move will have index '0' and the other move will
9031 have index '4'. */
9032 if (!allow_zero && max_disp < 1)
9033 return false;
9035 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
9039 /* Recognize an RTL expression that is a valid memory address for
9040 an instruction.
9041 The MODE argument is the machine mode for the MEM expression
9042 that wants to use this address.
9043 Allow REG
9044 REG+disp
9045 REG+r0
9046 REG++
9047 --REG
9049 GBR+disp */
9050 static bool
9051 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict, code_helper)
9053 if (REG_P (x) && REGNO (x) == GBR_REG)
9054 return true;
9056 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9057 return true;
9058 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9059 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9060 return true;
9061 else if (GET_CODE (x) == PLUS)
9063 rtx xop0 = XEXP (x, 0);
9064 rtx xop1 = XEXP (x, 1);
9066 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
9067 return gbr_displacement (xop1, mode);
9069 if (GET_MODE_SIZE (mode) <= 8
9070 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9071 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
9072 return true;
9074 if (GET_MODE_SIZE (mode) <= 4
9075 || (TARGET_FPU_DOUBLE && TARGET_FMOVD && mode == DFmode))
9077 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9078 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9079 return true;
9080 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9081 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9082 return true;
9086 return false;
9089 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9090 isn't protected by a PIC unspec. */
9091 bool
9092 nonpic_symbol_mentioned_p (rtx x)
9094 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9095 || GET_CODE (x) == PC)
9096 return true;
9098 /* We don't want to look into the possible MEM location of a
9099 CONST_DOUBLE, since we're not going to use it, in general. */
9100 if (GET_CODE (x) == CONST_DOUBLE)
9101 return false;
9103 if (GET_CODE (x) == UNSPEC
9104 && (XINT (x, 1) == UNSPEC_PIC
9105 || XINT (x, 1) == UNSPEC_GOT
9106 || XINT (x, 1) == UNSPEC_GOTOFF
9107 || XINT (x, 1) == UNSPEC_GOTPLT
9108 || XINT (x, 1) == UNSPEC_GOTTPOFF
9109 || XINT (x, 1) == UNSPEC_DTPOFF
9110 || XINT (x, 1) == UNSPEC_TPOFF
9111 || XINT (x, 1) == UNSPEC_PLT
9112 || XINT (x, 1) == UNSPEC_PCREL
9113 || XINT (x, 1) == UNSPEC_SYMOFF
9114 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
9115 || XINT (x, 1) == UNSPEC_GOTFUNCDESC
9116 || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
9117 return false;
9119 const char* fmt = GET_RTX_FORMAT (GET_CODE (x));
9120 for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9122 if (fmt[i] == 'E')
9124 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
9125 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9126 return true;
9128 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9129 return true;
9132 return false;
9135 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9136 @GOTOFF in `reg'. */
9138 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg)
9140 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9141 return orig;
9143 if (GET_CODE (orig) == LABEL_REF
9144 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9146 if (reg == NULL_RTX)
9147 reg = gen_reg_rtx (Pmode);
9149 if (TARGET_FDPIC
9150 && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig))
9152 /* Weak functions may be NULL which doesn't work with
9153 GOTOFFFUNCDESC because the runtime offset is not known. */
9154 if (SYMBOL_REF_WEAK (orig))
9155 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9156 else
9157 emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
9159 else if (TARGET_FDPIC
9160 && (GET_CODE (orig) == LABEL_REF
9161 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig)
9162 && (TREE_READONLY (SYMBOL_REF_DECL (orig))
9163 || SYMBOL_REF_EXTERNAL_P (orig)
9164 || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig))))))
9165 /* In FDPIC, GOTOFF can only be used for writable data. */
9166 emit_insn (gen_symGOT2reg (reg, orig));
9167 else
9168 emit_insn (gen_symGOTOFF2reg (reg, orig));
9169 return reg;
9171 else if (GET_CODE (orig) == SYMBOL_REF)
9173 if (reg == NULL_RTX)
9174 reg = gen_reg_rtx (Pmode);
9176 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
9177 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9178 else
9179 emit_insn (gen_symGOT2reg (reg, orig));
9180 return reg;
9182 return orig;
9185 /* Given a (logical) mode size and an offset in bytes, try to find a the
9186 appropriate displacement value for a mov insn. On SH the displacements
9187 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
9188 15 bytes in QImode. To compensate this we create a new base address by
9189 adding an adjustment value to it.
9191 If the originally requested offset is greater than 127 we prefer using
9192 values 124..127 over 128..131 to increase opportunities to use the
9193 add #imm, Rn insn.
9195 In some cases it is possible that a requested offset might seem unaligned
9196 or inappropriate for the mode size, like offset = 2 and mode size = 4.
9197 This is compensated by adjusting the base address so that the effective
9198 address of the displacement move insn will be aligned.
9200 This is not the best possible way of rebasing the base address, as it
9201 does not look at other present displacement addressings around it.
9202 In some cases this can create more base address adjustments than would
9203 actually be necessary. */
9204 struct disp_adjust
9206 rtx offset_adjust;
9207 rtx mov_disp;
9210 static struct disp_adjust
9211 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
9213 struct disp_adjust res = { NULL_RTX, NULL_RTX };
9215 /* Do not try to use SH2A's large displacements here, because this would
9216 effectively disable the small displacement insns. */
9217 const int mode_sz = GET_MODE_SIZE (mode);
9218 const int mov_insn_sz = mov_insn_size (mode, false);
9219 const int max_disp = sh_max_mov_insn_displacement (mode, false);
9220 const int max_disp_next = max_disp + mov_insn_sz;
9221 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
9222 HOST_WIDE_INT offset_adjust;
9224 /* In some cases this actually does happen and we must check for it. */
9225 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
9226 return res;
9228 /* Keeps the previous behavior for QImode displacement addressing.
9229 This just decides how the offset is re-based. Removing this special
9230 case will result in slightly bigger code on average, but it's not that
9231 bad actually. */
9232 if (mov_insn_sz == 1)
9233 align_modifier = 0;
9235 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
9237 if (mode_sz + offset - offset_adjust <= max_disp_next)
9239 res.offset_adjust = GEN_INT (offset_adjust);
9240 res.mov_disp = GEN_INT (offset - offset_adjust);
9243 return res;
9246 /* Try to modify an illegitimate address and make it legitimate.
9247 If we find one, return the new, valid address.
9248 Otherwise, return the original address. */
9249 static rtx
9250 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9252 if (flag_pic)
9253 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9255 if ((TARGET_FPU_DOUBLE && mode == DFmode)
9256 || (TARGET_SH2E && mode == SFmode))
9257 return x;
9259 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
9260 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
9262 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
9263 INTVAL (XEXP (x, 1)));
9265 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9267 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9268 adj.offset_adjust, NULL_RTX, 0,
9269 OPTAB_LIB_WIDEN);
9270 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9273 return x;
9276 /* Attempt to replace *p, which is an address that needs reloading, with
9277 a valid memory address for an operand of mode MODE.
9278 Like for sh_legitimize_address, for the SH we try to get a normal form
9279 of the address. That will allow inheritance of the address reloads. */
9280 bool
9281 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9282 int itype)
9284 enum reload_type type = (enum reload_type) itype;
9285 const int mode_sz = GET_MODE_SIZE (mode);
9287 if (sh_lra_p ())
9288 return false;
9290 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
9291 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true))
9293 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
9294 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
9296 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9298 push_reload (*p, NULL_RTX, p, NULL,
9299 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9300 return true;
9303 if (TARGET_SH2E && mode == SFmode)
9305 *p = copy_rtx (*p);
9306 push_reload (*p, NULL_RTX, p, NULL,
9307 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9308 return true;
9311 /* FIXME: Do not allow to legitimize QImode and HImode displacement
9312 moves because then reload has a problem figuring the constraint
9313 that the move insn target/source reg must be R0.
9314 Or maybe some handling is wrong in sh_secondary_reload for this
9315 to work properly? */
9316 if ((mode_sz == 4 || mode_sz == 8)
9317 && ! (TARGET_SH4 && mode == DFmode)
9318 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9320 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
9321 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9322 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9323 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9324 return true;
9328 /* We must re-recognize what we created before. */
9329 if (GET_CODE (*p) == PLUS
9330 && (mode_sz == 4 || mode_sz == 8)
9331 && GET_CODE (XEXP (*p, 0)) == PLUS
9332 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9333 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9334 && CONST_INT_P (XEXP (*p, 1))
9335 && ! (TARGET_SH2E && mode == SFmode))
9337 /* Because this address is so complex, we know it must have
9338 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9339 it is already unshared, and needs no further unsharing. */
9340 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9341 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9342 return true;
9345 return false;
9348 /* In the name of slightly smaller debug output, and to cater to
9349 general assembler lossage, recognize various UNSPEC sequences
9350 and turn them back into a direct symbol reference. */
9351 static rtx
9352 sh_delegitimize_address (rtx orig_x)
9354 orig_x = delegitimize_mem_from_attrs (orig_x);
9356 rtx x = orig_x;
9357 if (MEM_P (x))
9358 x = XEXP (x, 0);
9359 if (GET_CODE (x) == CONST)
9361 rtx y = XEXP (x, 0);
9362 if (GET_CODE (y) == UNSPEC)
9364 if (XINT (y, 1) == UNSPEC_GOT
9365 || XINT (y, 1) == UNSPEC_GOTOFF
9366 || XINT (y, 1) == UNSPEC_SYMOFF)
9367 return XVECEXP (y, 0, 0);
9368 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9370 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9372 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9374 if (GET_CODE (symplt) == UNSPEC
9375 && (XINT (symplt, 1) == UNSPEC_PLT
9376 || XINT (symplt, 1) == UNSPEC_PCREL))
9377 return XVECEXP (symplt, 0, 0);
9383 return orig_x;
9386 /* Mark the use of a constant in the literal table. If the constant
9387 has multiple labels, make it unique. */
9388 static rtx
9389 mark_constant_pool_use (rtx x)
9391 if (x == NULL_RTX)
9392 return x;
9394 switch (GET_CODE (x))
9396 case LABEL_REF:
9397 x = XEXP (x, 0);
9398 case CODE_LABEL:
9399 break;
9400 default:
9401 return x;
9404 /* Get the first label in the list of labels for the same constant
9405 and delete another labels in the list. */
9406 rtx_insn* lab = as_a <rtx_insn*> (x);
9407 for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
9409 if (!LABEL_P (insn)
9410 || LABEL_REFS (insn) != NEXT_INSN (insn))
9411 break;
9412 lab = insn;
9415 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9416 as_a<rtx_insn *> (insn)->set_deleted ();
9418 /* Mark constants in a window. */
9419 for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn;
9420 insn = NEXT_INSN (insn))
9422 if (!NONJUMP_INSN_P (insn))
9423 continue;
9425 rtx pattern = PATTERN (insn);
9426 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9427 continue;
9429 switch (XINT (pattern, 1))
9431 case UNSPECV_CONST2:
9432 case UNSPECV_CONST4:
9433 case UNSPECV_CONST8:
9434 XVECEXP (pattern, 0, 1) = const1_rtx;
9435 break;
9436 case UNSPECV_WINDOW_END:
9437 if (XVECEXP (pattern, 0, 0) == x)
9438 return lab;
9439 break;
9440 case UNSPECV_CONST_END:
9441 return lab;
9442 default:
9443 break;
9447 return lab;
9450 /* Return true if it's possible to redirect BRANCH1 to the destination
9451 of an unconditional jump BRANCH2. We only want to do this if the
9452 resulting branch will have a short displacement. */
9453 static bool
9454 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
9456 /* Don't follow if BRANCH2 is possible to be a jump crossing between
9457 hot and cold partitions. */
9458 if (flag_reorder_blocks_and_partition
9459 && simplejump_p (branch2)
9460 && CROSSING_JUMP_P (branch2))
9461 return false;
9463 if (flag_expensive_optimizations && simplejump_p (branch2))
9465 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9466 rtx_insn *insn;
9467 int distance;
9469 for (distance = 0, insn = NEXT_INSN (branch1);
9470 insn && distance < 256;
9471 insn = PREV_INSN (insn))
9473 if (insn == dest)
9474 return true;
9475 else
9476 distance += get_attr_length (insn);
9478 for (distance = 0, insn = NEXT_INSN (branch1);
9479 insn && distance < 256;
9480 insn = NEXT_INSN (insn))
9482 if (insn == dest)
9483 return true;
9484 else
9485 distance += get_attr_length (insn);
9488 return false;
9491 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9492 bool
9493 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9494 unsigned int new_reg)
9496 /* Interrupt functions can only use registers that have already been
9497 saved by the prologue, even if they would normally be
9498 call-clobbered. */
9499 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9500 return false;
9502 return true;
9505 /* Function to update the integer COST
9506 based on the relationship between INSN that is dependent on
9507 DEP_INSN through the dependence LINK. The default is to make no
9508 adjustment to COST. This can be used for example to specify to
9509 the scheduler that an output- or anti-dependence does not incur
9510 the same cost as a data-dependence. The return value should be
9511 the new value for COST. */
9512 static int
9513 sh_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
9514 unsigned int)
9516 rtx reg, use_pat;
9518 if (dep_type == 0)
9520 if (recog_memoized (insn) < 0
9521 || recog_memoized (dep_insn) < 0)
9522 return cost;
9524 rtx dep_set = single_set (dep_insn);
9526 /* The latency that we specify in the scheduling description refers
9527 to the actual output, not to an auto-increment register; for that,
9528 the latency is one. */
9529 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9531 rtx set = single_set (insn);
9533 if (set
9534 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9535 && (!MEM_P (SET_DEST (set))
9536 || !reg_mentioned_p (SET_DEST (dep_set),
9537 XEXP (SET_DEST (set), 0))))
9538 cost = 1;
9540 /* The only input for a call that is timing-critical is the
9541 function's address. */
9542 if (CALL_P (insn))
9544 rtx call = get_call_rtx_from (insn);
9545 if (call
9546 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9547 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9548 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9549 cost -= TARGET_SH4_300 ? 3 : 6;
9551 /* Likewise, the most timing critical input for an sfuncs call
9552 is the function address. However, sfuncs typically start
9553 using their arguments pretty quickly.
9554 Assume a four cycle delay for SH4 before they are needed.
9555 Cached ST40-300 calls are quicker, so assume only a one
9556 cycle delay there.
9557 ??? Maybe we should encode the delays till input registers
9558 are needed by sfuncs into the sfunc call insn. */
9559 /* All sfunc calls are parallels with at least four components.
9560 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9561 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9562 && XVECLEN (PATTERN (insn), 0) >= 4
9563 && (reg = sfunc_uses_reg (insn)))
9565 if (! reg_set_p (reg, dep_insn))
9566 cost -= TARGET_SH4_300 ? 1 : 4;
9568 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9570 attr_type dep_type = get_attr_type (dep_insn);
9571 attr_type type;
9572 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9573 cost--;
9574 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9575 && (type = get_attr_type (insn)) != TYPE_CALL
9576 && type != TYPE_SFUNC)
9577 cost--;
9578 /* When the preceding instruction loads the shift amount of
9579 the following SHAD/SHLD, the latency of the load is increased
9580 by 1 cycle. */
9581 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9582 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9583 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9584 XEXP (SET_SRC (single_set (insn)),
9585 1)))
9586 cost++;
9587 /* When an LS group instruction with a latency of less than
9588 3 cycles is followed by a double-precision floating-point
9589 instruction, FIPR, or FTRV, the latency of the first
9590 instruction is increased to 3 cycles. */
9591 else if (cost < 3
9592 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9593 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9594 cost = 3;
9595 /* The lsw register of a double-precision computation is ready one
9596 cycle earlier. */
9597 else if (reload_completed
9598 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9599 && (use_pat = single_set (insn))
9600 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9601 SET_SRC (use_pat)))
9602 cost -= 1;
9604 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9605 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9606 cost -= 1;
9608 else if (TARGET_SH4_300)
9610 /* Stores need their input register two cycles later. */
9611 attr_type type;
9612 if (dep_set && cost >= 1
9613 && ((type = get_attr_type (insn)) == TYPE_STORE
9614 || type == TYPE_PSTORE
9615 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9617 rtx set = single_set (insn);
9619 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9620 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9622 cost -= 2;
9623 /* But don't reduce the cost below 1 if the address depends
9624 on a side effect of dep_insn. */
9625 if (cost < 1
9626 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9627 cost = 1;
9632 /* An anti-dependence penalty of two applies if the first insn is a double
9633 precision fadd / fsub / fmul. */
9634 else if (!TARGET_SH4_300
9635 && dep_type == REG_DEP_ANTI
9636 && recog_memoized (dep_insn) >= 0
9637 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9638 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9639 /* A lot of alleged anti-flow dependences are fake,
9640 so check this one is real. */
9641 && flow_dependent_p (dep_insn, insn))
9642 cost = 2;
9644 return cost;
9647 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9648 if DEP_INSN is anti-flow dependent on INSN. */
9649 static bool
9650 flow_dependent_p (rtx_insn *insn, rtx_insn *dep_insn)
9652 rtx tmp = PATTERN (insn);
9654 note_stores (dep_insn, flow_dependent_p_1, &tmp);
9655 return tmp == NULL_RTX;
9658 /* A helper function for flow_dependent_p called through note_stores. */
9659 static void
9660 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9662 rtx * pinsn = (rtx *) data;
9664 if (*pinsn && reg_referenced_p (x, *pinsn))
9665 *pinsn = NULL_RTX;
9668 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9669 'special function' patterns (type sfunc) that clobber pr, but that
9670 do not look like function calls to leaf_function_p. Hence we must
9671 do this extra check. */
9672 static int
9673 sh_pr_n_sets (void)
9675 return DF_REG_DEF_COUNT (PR_REG);
9678 /* Return where to allocate pseudo for a given hard register initial
9679 value. */
9680 static rtx
9681 sh_allocate_initial_value (rtx hard_reg)
9683 if (REGNO (hard_reg) == PR_REG)
9685 if (crtl->is_leaf && ! sh_pr_n_sets ())
9686 return hard_reg;
9687 else
9688 return gen_frame_mem (Pmode, return_address_pointer_rtx);
9691 return NULL_RTX;
9694 /* This function returns "2" to indicate dual issue for the SH4
9695 processor. To be used by the DFA pipeline description. */
9696 static int
9697 sh_issue_rate (void)
9699 if (TARGET_SUPERSCALAR)
9700 return 2;
9701 else
9702 return 1;
9705 /* Functions for ready queue reordering for sched1. */
9707 /* Get weight for mode for a set x. */
9708 static short
9709 find_set_regmode_weight (rtx x, machine_mode mode)
9711 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9712 return 1;
9713 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9715 if (REG_P (SET_DEST (x)))
9717 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9718 return 1;
9719 else
9720 return 0;
9722 return 1;
9724 return 0;
9727 /* Get regmode weight for insn. */
9728 static short
9729 find_insn_regmode_weight (rtx insn, machine_mode mode)
9731 /* Increment weight for each register born here. */
9732 rtx x = PATTERN (insn);
9733 short reg_weight = find_set_regmode_weight (x, mode);
9734 if (GET_CODE (x) == PARALLEL)
9736 int j;
9737 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9739 x = XVECEXP (PATTERN (insn), 0, j);
9740 reg_weight += find_set_regmode_weight (x, mode);
9743 /* Decrement weight for each register that dies here. */
9744 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9746 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9748 rtx note = XEXP (x, 0);
9749 if (REG_P (note) && GET_MODE (note) == mode)
9750 reg_weight--;
9753 return reg_weight;
9756 /* Calculate regmode weights for all insns of a basic block. */
9757 static void
9758 find_regmode_weight (basic_block b, machine_mode mode)
9760 rtx_insn *insn, *next_tail, *head, *tail;
9762 get_ebb_head_tail (b, b, &head, &tail);
9763 next_tail = NEXT_INSN (tail);
9765 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9767 /* Handle register life information. */
9768 if (!INSN_P (insn))
9769 continue;
9771 if (mode == SFmode)
9772 INSN_REGMODE_WEIGHT (insn, mode) =
9773 find_insn_regmode_weight (insn, mode)
9774 + 2 * find_insn_regmode_weight (insn, DFmode);
9775 else if (mode == SImode)
9776 INSN_REGMODE_WEIGHT (insn, mode) =
9777 find_insn_regmode_weight (insn, mode)
9778 + 2 * find_insn_regmode_weight (insn, DImode);
9782 /* Comparison function for ready queue sorting. */
9783 static int
9784 rank_for_reorder (const void *x, const void *y)
9786 rtx_insn *tmp = *(rtx_insn * const *) y;
9787 rtx_insn *tmp2 = *(rtx_insn * const *) x;
9789 /* The insn in a schedule group should be issued the first. */
9790 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9791 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9793 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9794 minimizes instruction movement, thus minimizing sched's effect on
9795 register pressure. */
9796 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9799 /* Resort the array A in which only element at index N may be out of order. */
9800 static void
9801 swap_reorder (rtx_insn **a, int n)
9803 rtx_insn *insn = a[n - 1];
9804 int i = n - 2;
9806 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9808 a[i + 1] = a[i];
9809 i -= 1;
9811 a[i + 1] = insn;
9814 /* Sort the ready list by ascending priority. */
9815 static void
9816 ready_reorder (rtx_insn **ready, int nready)
9818 if (nready == 2)
9819 swap_reorder (ready, nready);
9820 else if (nready > 2)
9821 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
9824 /* Count life regions of r0 for a block. */
9825 static int
9826 find_r0_life_regions (basic_block b)
9828 bool live;
9829 int set;
9830 int death = 0;
9832 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9834 set = 1;
9835 live = true;
9837 else
9839 set = 0;
9840 live = false;
9843 rtx_insn* insn = BB_HEAD (b);
9844 rtx_insn* end = BB_END (b);
9845 rtx r0_reg = gen_rtx_REG (SImode, R0_REG);
9846 while (1)
9848 if (INSN_P (insn))
9850 if (find_regno_note (insn, REG_DEAD, R0_REG))
9852 death++;
9853 live = false;
9856 rtx pset;
9857 if (!live
9858 && (pset = single_set (insn))
9859 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9860 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9862 set++;
9863 live = true;
9866 if (insn == end)
9867 break;
9868 insn = NEXT_INSN (insn);
9870 return set - death;
9873 /* Calculate regmode weights for all insns of all basic block. */
9874 static void
9875 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9876 int verbose ATTRIBUTE_UNUSED,
9877 int old_max_uid)
9879 basic_block b;
9881 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9882 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9883 r0_life_regions = 0;
9885 FOR_EACH_BB_REVERSE_FN (b, cfun)
9887 find_regmode_weight (b, SImode);
9888 find_regmode_weight (b, SFmode);
9889 if (!reload_completed)
9890 r0_life_regions += find_r0_life_regions (b);
9893 CURR_REGMODE_PRESSURE (SImode) = 0;
9894 CURR_REGMODE_PRESSURE (SFmode) = 0;
9897 /* Cleanup. */
9898 static void
9899 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9900 int verbose ATTRIBUTE_UNUSED)
9902 if (regmode_weight[0])
9904 free (regmode_weight[0]);
9905 regmode_weight[0] = NULL;
9907 if (regmode_weight[1])
9909 free (regmode_weight[1]);
9910 regmode_weight[1] = NULL;
9914 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9915 keep count of register pressures on SImode and SFmode. */
9916 static int
9917 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9918 int sched_verbose ATTRIBUTE_UNUSED,
9919 rtx_insn *insn,
9920 int can_issue_more)
9922 if (GET_CODE (PATTERN (insn)) != USE
9923 && GET_CODE (PATTERN (insn)) != CLOBBER)
9924 cached_can_issue_more = can_issue_more - 1;
9925 else
9926 cached_can_issue_more = can_issue_more;
9928 if (reload_completed)
9929 return cached_can_issue_more;
9931 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9932 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9934 return cached_can_issue_more;
9937 static void
9938 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9939 int verbose ATTRIBUTE_UNUSED,
9940 int veclen ATTRIBUTE_UNUSED)
9942 CURR_REGMODE_PRESSURE (SImode) = 0;
9943 CURR_REGMODE_PRESSURE (SFmode) = 0;
9946 /* Some magic numbers. */
9947 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9948 functions that already have high pressure on r0. */
9949 #define R0_MAX_LIFE_REGIONS 2
9950 /* Register Pressure thresholds for SImode and SFmode registers. */
9951 #define SIMODE_MAX_WEIGHT 5
9952 #define SFMODE_MAX_WEIGHT 10
9954 /* Return true if the pressure is high for MODE. */
9955 static bool
9956 high_pressure (machine_mode mode)
9958 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9959 functions that already have high pressure on r0. */
9960 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9961 return true;
9963 if (mode == SFmode)
9964 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9965 else
9966 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9969 /* Reorder ready queue if register pressure is high. */
9970 static int
9971 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9972 int sched_verbose ATTRIBUTE_UNUSED,
9973 rtx_insn **ready,
9974 int *n_readyp,
9975 int clock_var ATTRIBUTE_UNUSED)
9977 if (reload_completed)
9978 return sh_issue_rate ();
9980 if (high_pressure (SFmode) || high_pressure (SImode))
9982 ready_reorder (ready, *n_readyp);
9985 return sh_issue_rate ();
9988 /* Skip cycles if the current register pressure is high. */
9989 static int
9990 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9991 int sched_verbose ATTRIBUTE_UNUSED,
9992 rtx_insn **ready ATTRIBUTE_UNUSED,
9993 int *n_readyp ATTRIBUTE_UNUSED,
9994 int clock_var ATTRIBUTE_UNUSED)
9996 if (reload_completed)
9997 return cached_can_issue_more;
9999 if (high_pressure(SFmode) || high_pressure (SImode))
10000 skip_cycles = 1;
10002 return cached_can_issue_more;
10005 /* Skip cycles without sorting the ready queue. This will move insn from
10006 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10007 queue by sh_reorder. */
10009 /* Generally, skipping these many cycles are sufficient for all insns to move
10010 from Q -> R. */
10011 #define MAX_SKIPS 8
10013 static int
10014 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10015 int sched_verbose ATTRIBUTE_UNUSED,
10016 rtx_insn *insn ATTRIBUTE_UNUSED,
10017 int last_clock_var,
10018 int clock_var,
10019 int *sort_p)
10021 if (reload_completed)
10022 return 0;
10024 if (skip_cycles)
10026 if ((clock_var - last_clock_var) < MAX_SKIPS)
10028 *sort_p = 0;
10029 return 1;
10031 /* If this is the last cycle we are skipping, allow reordering of R. */
10032 if ((clock_var - last_clock_var) == MAX_SKIPS)
10034 *sort_p = 1;
10035 return 1;
10039 skip_cycles = 0;
10041 return 0;
10044 static bool
10045 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10047 return TARGET_HITACHI || sh_attr_renesas_p (record_type);
10051 On the SH1..SH4, the trampoline looks like
10052 2 0002 D202 mov.l l2,r2
10053 1 0000 D301 mov.l l1,r3
10054 3 0004 422B jmp @r2
10055 4 0006 0009 nop
10056 5 0008 00000000 l1: .long area
10057 6 000c 00000000 l2: .long function
10059 FDPIC needs a form that includes a function descriptor and
10060 code to load the GOT register:
10061 0 0000 00000000 .long l0
10062 1 0004 00000000 .long gotval
10063 2 0008 D302 l0: mov.l l1,r3
10064 3 000a D203 mov.l l2,r2
10065 4 000c 6122 mov.l @r2,r1
10066 5 000e 5C21 mov.l @(4,r2),r12
10067 6 0010 412B jmp @r1
10068 7 0012 0009 nop
10069 8 0014 00000000 l1: .long area
10070 9 0018 00000000 l2: .long function
10072 SH5 (compact) uses r1 instead of r3 for the static chain. */
10074 /* Emit insns to store a value at memory address + offset. */
10075 static void
10076 sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value)
10078 gcc_assert ((offset & 3) == 0);
10079 emit_move_insn (offset == 0
10080 ? change_address (addr, SImode, NULL_RTX)
10081 : adjust_address (addr, SImode, offset), value);
10084 /* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */
10085 static void
10086 sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1)
10088 sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN
10089 ? (w0 | (w1 << 16))
10090 : (w1 | (w0 << 16)), SImode));
10093 /* Emit RTL insns to initialize the variable parts of a trampoline.
10094 FNADDR is an RTX for the address of the function's pure code.
10095 CXT is an RTX for the static chain value for the function. */
10096 static void
10097 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10099 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10100 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10102 if (TARGET_FDPIC)
10104 rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
10106 sh_emit_storesi (tramp_mem, 0, a);
10107 sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ());
10109 sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203);
10110 sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21);
10111 sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009);
10113 sh_emit_storesi (tramp_mem, 20, cxt);
10114 sh_emit_storesi (tramp_mem, 24, fnaddr);
10116 else
10118 sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301);
10119 sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009);
10121 sh_emit_storesi (tramp_mem, 8, cxt);
10122 sh_emit_storesi (tramp_mem, 12, fnaddr);
10124 if (TARGET_HARD_SH4)
10126 if (!TARGET_INLINE_IC_INVALIDATE
10127 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
10128 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10129 FUNCTION_ORDINARY).sym,
10130 LCT_NORMAL, VOIDmode, tramp, SImode);
10131 else
10132 emit_insn (gen_ic_invalidate_line (tramp));
10136 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10137 static rtx
10138 sh_trampoline_adjust_address (rtx tramp)
10140 return tramp;
10143 /* If PIC, we cannot make sibling calls to global functions
10144 because the PLT requires r12 to be live. */
10145 static bool
10146 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10148 return (1
10149 && ! sh_cfun_interrupt_handler_p ()
10150 && (! flag_pic || TARGET_FDPIC
10151 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
10152 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10155 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
10156 void
10157 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
10159 const_tree decl = SYMBOL_REF_DECL (sym);
10160 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
10162 if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
10163 emit_insn (gen_sym_label2reg (reg, sym, lab));
10164 else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
10165 emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
10166 else
10167 emit_insn (gen_symPLT_label2reg (reg, sym, lab));
10170 /* Machine specific built-in functions. */
10172 struct builtin_description
10174 bool (* const is_enabled) (void);
10175 const enum insn_code icode;
10176 const char *const name;
10177 int signature;
10178 tree fndecl;
10181 /* This function can be used if there are any built-ins that are not for
10182 SHmedia. It's commented out to avoid the defined-but-unused warning. */
10183 static bool
10184 sh1_builtin_p (void)
10186 return TARGET_SH1;
10189 /* describe number and signedness of arguments; arg[0] == result
10190 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10191 /* 9: 64-bit pointer, 10: 32-bit pointer */
10192 static const char signature_args[][4] =
10194 #define SH_BLTIN_V2SI2 0
10195 { 4, 4 },
10196 #define SH_BLTIN_V4HI2 1
10197 { 4, 4 },
10198 #define SH_BLTIN_V2SI3 2
10199 { 4, 4, 4 },
10200 #define SH_BLTIN_V4HI3 3
10201 { 4, 4, 4 },
10202 #define SH_BLTIN_V8QI3 4
10203 { 4, 4, 4 },
10204 #define SH_BLTIN_MAC_HISI 5
10205 { 1, 4, 4, 1 },
10206 #define SH_BLTIN_SH_HI 6
10207 { 4, 4, 1 },
10208 #define SH_BLTIN_SH_SI 7
10209 { 4, 4, 1 },
10210 #define SH_BLTIN_V4HI2V2SI 8
10211 { 4, 4, 4 },
10212 #define SH_BLTIN_V4HI2V8QI 9
10213 { 4, 4, 4 },
10214 #define SH_BLTIN_SISF 10
10215 { 4, 2 },
10216 #define SH_BLTIN_LDUA_L 11
10217 { 2, 10 },
10218 #define SH_BLTIN_LDUA_Q 12
10219 { 1, 10 },
10220 #define SH_BLTIN_STUA_L 13
10221 { 0, 10, 2 },
10222 #define SH_BLTIN_STUA_Q 14
10223 { 0, 10, 1 },
10224 #define SH_BLTIN_LDUA_L64 15
10225 { 2, 9 },
10226 #define SH_BLTIN_LDUA_Q64 16
10227 { 1, 9 },
10228 #define SH_BLTIN_STUA_L64 17
10229 { 0, 9, 2 },
10230 #define SH_BLTIN_STUA_Q64 18
10231 { 0, 9, 1 },
10232 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10233 #define SH_BLTIN_2 19
10234 #define SH_BLTIN_SU 19
10235 { 1, 2 },
10236 #define SH_BLTIN_3 20
10237 #define SH_BLTIN_SUS 20
10238 { 2, 2, 1 },
10239 #define SH_BLTIN_PSSV 21
10240 { 0, 8, 2, 2 },
10241 #define SH_BLTIN_XXUU 22
10242 #define SH_BLTIN_UUUU 22
10243 { 1, 1, 1, 1 },
10244 #define SH_BLTIN_PV 23
10245 { 0, 8 },
10246 #define SH_BLTIN_VP 24
10247 { 8, 0 },
10248 #define SH_BLTIN_UV 25
10249 { 1, 0 },
10250 #define SH_BLTIN_VU 26
10251 { 0, 1 },
10253 /* mcmv: operands considered unsigned. */
10254 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10255 /* mperm: control value considered unsigned int. */
10256 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10257 /* mshards_q: returns signed short. */
10258 /* nsb: takes long long arg, returns unsigned char. */
10259 static struct builtin_description bdesc[] =
10261 { sh1_builtin_p,
10262 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
10263 { sh1_builtin_p,
10264 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
10267 static tree sh_builtin_get_fpscr;
10268 static tree sh_builtin_set_fpscr;
10270 static void
10271 sh_init_builtins (void)
10273 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10274 memset (shared, 0, sizeof shared);
10276 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
10278 builtin_description* d = &bdesc[di];
10280 if (!d->is_enabled ())
10281 continue;
10283 tree type, arg_type = NULL_TREE;
10284 int signature = d->signature;
10286 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10287 type = shared[signature];
10288 else
10290 int has_result = signature_args[signature][0] != 0;
10291 tree args[3];
10293 if (! TARGET_FPU_ANY
10294 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10295 continue;
10296 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
10297 args[i] = NULL_TREE;
10298 for (int i = 3; ; i--)
10300 int arg = signature_args[signature][i];
10301 int opno = i - 1 + has_result;
10303 if (arg & 8)
10304 arg_type = ptr_type_node;
10305 else if (arg)
10306 arg_type = (*lang_hooks.types.type_for_mode)
10307 (insn_data[d->icode].operand[opno].mode, (arg & 1));
10308 else if (i)
10309 continue;
10310 else
10311 arg_type = void_type_node;
10312 if (i == 0)
10313 break;
10314 args[i-1] = arg_type;
10316 type = build_function_type_list (arg_type, args[0], args[1],
10317 args[2], NULL_TREE);
10318 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10319 shared[signature] = type;
10321 d->fndecl =
10322 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10323 NULL, NULL_TREE);
10324 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
10325 if (d->icode == CODE_FOR_sts_fpscr)
10326 sh_builtin_get_fpscr = d->fndecl;
10327 else if (d->icode == CODE_FOR_set_fpscr)
10328 sh_builtin_set_fpscr = d->fndecl;
10332 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
10334 static void
10335 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
10337 const unsigned SH_FE_INVALID = 64;
10338 const unsigned SH_FE_DIVBYZERO = 32;
10339 const unsigned SH_FE_OVERFLOW = 16;
10340 const unsigned SH_FE_UNDERFLOW = 8;
10341 const unsigned SH_FE_INEXACT = 4;
10342 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
10343 | SH_FE_DIVBYZERO
10344 | SH_FE_OVERFLOW
10345 | SH_FE_UNDERFLOW
10346 | SH_FE_INEXACT);
10347 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
10348 tree fenv_var, mask, ld_fenv, masked_fenv;
10349 tree new_fenv_var, reload_fenv, restore_fnenv;
10350 tree update_call, atomic_feraiseexcept, hold_fnclex;
10352 if (! TARGET_FPU_ANY)
10353 return;
10355 /* Generate the equivalent of :
10356 unsigned int fenv_var;
10357 fenv_var = __builtin_sh_get_fpscr ();
10359 unsigned int masked_fenv;
10360 masked_fenv = fenv_var & mask;
10362 __builtin_sh_set_fpscr (masked_fenv); */
10364 fenv_var = create_tmp_var_raw (unsigned_type_node);
10365 mask = build_int_cst (unsigned_type_node,
10366 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
10367 | SH_FE_ALL_EXCEPT));
10368 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
10369 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
10370 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
10371 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10372 fenv_var = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
10373 build2 (COMPOUND_EXPR, void_type_node, masked_fenv,
10374 ld_fenv),
10375 NULL_TREE, NULL_TREE);
10376 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, hold_fnclex);
10378 /* Store the value of masked_fenv to clear the exceptions:
10379 __builtin_sh_set_fpscr (masked_fenv); */
10381 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10383 /* Generate the equivalent of :
10384 unsigned int new_fenv_var;
10385 new_fenv_var = __builtin_sh_get_fpscr ();
10387 __builtin_sh_set_fpscr (fenv_var);
10389 __atomic_feraiseexcept (new_fenv_var); */
10391 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
10392 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
10393 build_call_expr (sh_builtin_get_fpscr, 0));
10394 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
10395 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
10396 update_call = build_call_expr (atomic_feraiseexcept, 1,
10397 fold_convert (integer_type_node,
10398 new_fenv_var));
10399 *update = build2 (COMPOUND_EXPR, void_type_node,
10400 build2 (COMPOUND_EXPR, void_type_node,
10401 reload_fenv, restore_fnenv), update_call);
10404 /* Implements target hook vector_mode_supported_p. */
10405 bool
10406 sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED)
10408 return false;
10411 bool
10412 sh_frame_pointer_required (void)
10414 /* If needed override this in other tm.h files to cope with various OS
10415 lossage requiring a frame pointer. */
10416 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10417 return true;
10419 if (crtl->profile)
10420 return true;
10422 return false;
10425 /* Implements target hook dwarf_calling_convention. Return an enum
10426 of dwarf_calling_convention. */
10428 sh_dwarf_calling_convention (const_tree func)
10430 if (sh_attr_renesas_p (func))
10431 return DW_CC_GNU_renesas_sh;
10433 return DW_CC_normal;
10436 /* Returns the sh builtin decl for CODE. */
10437 static tree
10438 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10440 if (code >= ARRAY_SIZE (bdesc))
10441 return error_mark_node;
10443 if (!bdesc[code].is_enabled ())
10444 return error_mark_node;
10446 return bdesc[code].fndecl;
10449 /* Expand an expression EXP that calls a built-in function,
10450 with result going to TARGET if that's convenient
10451 (and in mode MODE if that's convenient).
10452 SUBTARGET may be used as the target for computing one of EXP's operands.
10453 IGNORE is nonzero if the value is to be ignored. */
10454 static rtx
10455 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10456 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10458 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10459 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
10460 const struct builtin_description *d = &bdesc[fcode];
10461 enum insn_code icode = d->icode;
10462 int signature = d->signature;
10463 int nop = 0;
10464 rtx op[4];
10466 if (signature_args[signature][0])
10468 if (ignore)
10469 return NULL_RTX;
10471 machine_mode tmode = insn_data[icode].operand[0].mode;
10472 if (! target || GET_MODE (target) != tmode
10473 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10474 target = gen_reg_rtx (tmode);
10475 op[nop++] = target;
10477 else
10478 target = NULL_RTX;
10480 for (int i = 1; i <= 3; i++, nop++)
10482 if (! signature_args[signature][i])
10483 break;
10484 tree arg = CALL_EXPR_ARG (exp, i - 1);
10485 if (arg == error_mark_node)
10486 return const0_rtx;
10488 machine_mode opmode;
10489 tree optype;
10490 if (signature_args[signature][i] & 8)
10492 opmode = ptr_mode;
10493 optype = ptr_type_node;
10495 else
10497 opmode = insn_data[icode].operand[nop].mode;
10498 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10501 machine_mode argmode = TYPE_MODE (TREE_TYPE (arg));
10502 if (argmode != opmode)
10503 arg = build1 (NOP_EXPR, optype, arg);
10504 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10505 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10506 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10509 rtx pat = NULL_RTX;
10511 switch (nop)
10513 case 1:
10514 pat = (*insn_data[d->icode].genfun) (op[0]);
10515 break;
10516 case 2:
10517 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10518 break;
10519 case 3:
10520 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10521 break;
10522 case 4:
10523 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10524 break;
10525 default:
10526 gcc_unreachable ();
10528 if (! pat)
10529 return NULL_RTX;
10530 emit_insn (pat);
10531 return target;
10534 /* Implement TARGET_HARD_REGNO_NREGS. On the SH all but the XD regs are
10535 UNITS_PER_WORD bits wide. */
10537 static unsigned int
10538 sh_hard_regno_nregs (unsigned int regno, machine_mode mode)
10540 if (XD_REGISTER_P (regno))
10541 return CEIL (GET_MODE_SIZE (mode), 2 * UNITS_PER_WORD);
10542 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
10545 /* Implement TARGET_HARD_REGNO_MODE_OK.
10547 We can allow any mode in any general register. The special registers
10548 only allow SImode. Don't allow any mode in the PR.
10550 We cannot hold DCmode values in the XD registers because alter_reg
10551 handles subregs of them incorrectly. We could work around this by
10552 spacing the XD registers like the DR registers, but this would require
10553 additional memory in every compilation to hold larger register vectors.
10554 We could hold SFmode / SCmode values in XD registers, but that
10555 would require a tertiary reload when reloading from / to memory,
10556 and a secondary reload to reload from / to general regs; that
10557 seems to be a losing proposition.
10559 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10560 it won't be ferried through GP registers first. */
10561 static bool
10562 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10564 if (SPECIAL_REGISTER_P (regno))
10565 return mode == SImode;
10567 if (regno == FPUL_REG)
10568 return (mode == SImode || mode == SFmode);
10570 if (FP_REGISTER_P (regno) && mode == SFmode)
10571 return true;
10573 if (mode == V2SFmode)
10575 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10576 || GENERAL_REGISTER_P (regno)))
10577 return true;
10578 else
10579 return false;
10582 if (mode == V4SFmode)
10584 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10585 || GENERAL_REGISTER_P (regno))
10586 return true;
10587 else
10588 return false;
10591 if (mode == V16SFmode)
10592 return regno == FIRST_XD_REG;
10594 if (FP_REGISTER_P (regno))
10596 if (mode == SFmode
10597 || mode == SImode
10598 || ((TARGET_SH2E) && mode == SCmode)
10599 || (((TARGET_FPU_DOUBLE && mode == DFmode) || mode == DCmode)
10600 && ((regno - FIRST_FP_REG) & 1) == 0)
10601 || (TARGET_SH4 && mode == TImode
10602 && ((regno - FIRST_FP_REG) & 3) == 0))
10603 return true;
10604 else
10605 return false;
10608 if (XD_REGISTER_P (regno))
10609 return mode == DFmode;
10611 if (regno == PR_REG)
10612 return mode == SImode;
10614 if (regno == FPSCR_REG)
10615 return mode == SImode;
10617 return true;
10620 /* Implement TARGET_MODES_TIEABLE_P.
10622 If TARGET_HARD_REGNO_MODE_OK could produce different values for MODE1
10623 and MODE2, for any hard reg, then this must be false for correct output.
10624 That's the case for xd registers: we don't hold SFmode values in
10625 them, so we can't tie an SFmode pseudos with one in another
10626 floating-point mode. */
10628 static bool
10629 sh_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10631 return (mode1 == mode2
10632 || (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
10633 && (mode1 != SFmode && mode2 != SFmode)));
10636 /* Specify the modes required to caller save a given hard regno.
10637 choose_hard_reg_mode chooses mode based on TARGET_HARD_REGNO_MODE_OK
10638 and returns ?Imode for float regs when sh_hard_regno_mode_ok
10639 permits integer modes on them. That makes LRA's split process
10640 unhappy. See PR55212.
10642 machine_mode
10643 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
10644 machine_mode mode)
10646 if (FP_REGISTER_P (regno)
10647 && (mode == SFmode
10648 || mode == SCmode
10649 || ((mode == DFmode || mode == DCmode)
10650 && ((regno - FIRST_FP_REG) & 1) == 0)))
10651 return mode;
10653 return choose_hard_reg_mode (regno, nregs, NULL);
10656 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10657 static bool
10658 sh_can_change_mode_class (machine_mode from, machine_mode to,
10659 reg_class_t rclass)
10661 /* We want to enable the use of SUBREGs as a means to
10662 VEC_SELECT a single element of a vector. */
10664 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
10665 This can be problematic when SFmode vector subregs need to be accessed
10666 on the stack with displacement addressing, as it happens with -O0.
10667 Thus we disallow the mode change for -O0. */
10668 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10669 return optimize ? !reg_classes_intersect_p (GENERAL_REGS, rclass) : true;
10671 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10673 if (TARGET_LITTLE_ENDIAN)
10675 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10676 return !reg_classes_intersect_p (DF_REGS, rclass);
10678 else
10680 if (GET_MODE_SIZE (from) < 8)
10681 return !reg_classes_intersect_p (DF_REGS, rclass);
10684 return true;
10687 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return SFmode or DFmode
10688 for TI_DOUBLE_TYPE which is for double type, go with the default one
10689 for the others. */
10691 static machine_mode
10692 sh_c_mode_for_floating_type (enum tree_index ti)
10694 /* Since the SH2e has only `float' support, it is desirable to make all
10695 floating point types equivalent to `float'. */
10696 if (ti == TI_DOUBLE_TYPE)
10697 return TARGET_FPU_SINGLE_ONLY ? SFmode : DFmode;
10698 return default_mode_for_floating_type (ti);
10701 /* Return true if registers in machine mode MODE will likely be
10702 allocated to registers in small register classes. */
10703 bool
10704 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
10706 return true;
10709 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10710 that label is used. */
10711 void
10712 sh_mark_label (rtx address, int nuses)
10714 if (GOTOFF_P (address))
10716 /* Extract the label or symbol. */
10717 address = XEXP (address, 0);
10718 if (GET_CODE (address) == PLUS)
10719 address = XEXP (address, 0);
10720 address = XVECEXP (address, 0, 0);
10722 if (GET_CODE (address) == LABEL_REF
10723 && LABEL_P (XEXP (address, 0)))
10724 LABEL_NUSES (XEXP (address, 0)) += nuses;
10727 /* Compute extra cost of moving data between one register class
10728 and another.
10730 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10731 uses this information. Hence, the general register <-> floating point
10732 register information here is not used for SFmode. */
10733 static int
10734 sh_register_move_cost (machine_mode mode,
10735 reg_class_t srcclass, reg_class_t dstclass)
10737 if (dstclass == T_REGS || dstclass == PR_REGS)
10738 return 10;
10740 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10741 return 4;
10743 if (mode == SImode && TARGET_FMOVD
10744 && REGCLASS_HAS_FP_REG (srcclass)
10745 && REGCLASS_HAS_FP_REG (dstclass))
10746 return 4;
10748 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10749 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10751 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10752 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10753 return 9;
10755 if ((REGCLASS_HAS_FP_REG (dstclass)
10756 && REGCLASS_HAS_GENERAL_REG (srcclass))
10757 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10758 && REGCLASS_HAS_FP_REG (srcclass)))
10760 /* Discourage trying to use fp regs for a pointer. This also
10761 discourages fp regs with SImode because Pmode is an alias
10762 of SImode on this target. See PR target/48596. */
10763 int addend = (mode == Pmode) ? 40 : 0;
10765 return ((TARGET_FMOVD ? 8 : 12) + addend)
10766 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10769 if ((dstclass == FPUL_REGS
10770 && REGCLASS_HAS_GENERAL_REG (srcclass))
10771 || (srcclass == FPUL_REGS
10772 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10773 return 5;
10775 if ((dstclass == FPUL_REGS
10776 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10777 || (srcclass == FPUL_REGS
10778 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10779 return 7;
10781 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10782 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10783 return 4;
10785 if (TARGET_FMOVD
10786 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10787 && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10788 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10790 if (((dstclass == FP_REGS || dstclass == DF_REGS)
10791 && (srcclass == PR_REGS))
10792 || ((srcclass == FP_REGS || srcclass == DF_REGS)
10793 && (dstclass == PR_REGS)))
10794 return 7;
10796 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10799 static rtx
10800 emit_load_ptr (rtx reg, rtx addr)
10802 rtx mem = gen_const_mem (ptr_mode, addr);
10804 if (Pmode != ptr_mode)
10805 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10806 return emit_move_insn (reg, mem);
10809 static void
10810 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10811 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10812 tree function)
10814 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
10815 CUMULATIVE_ARGS cum;
10816 int structure_value_byref = 0;
10817 rtx this_rtx, this_value, sibcall, funexp;
10818 rtx_insn *insns;
10819 tree funtype = TREE_TYPE (function);
10820 int simple_add = CONST_OK_FOR_ADD (delta);
10821 int did_load = 0;
10822 rtx scratch0, scratch1, scratch2;
10824 reload_completed = 1;
10825 epilogue_completed = 1;
10826 crtl->uses_only_leaf_regs = 1;
10828 emit_note (NOTE_INSN_PROLOGUE_END);
10830 /* Find the "this" pointer. We have such a wide range of ABIs for the
10831 SH that it's best to do this completely machine independently.
10832 "this" is passed as first argument, unless a structure return pointer
10833 comes first, in which case "this" comes second. */
10834 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10835 #ifndef PCC_STATIC_STRUCT_RETURN
10836 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10837 structure_value_byref = 1;
10838 #endif /* not PCC_STATIC_STRUCT_RETURN */
10839 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10841 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10843 function_arg_info ptr_arg (ptype, Pmode, /*named=*/true);
10844 sh_function_arg_advance (pack_cumulative_args (&cum), ptr_arg);
10846 function_arg_info ptr_arg (ptr_type_node, Pmode, /*named=*/true);
10847 this_rtx = sh_function_arg (pack_cumulative_args (&cum), ptr_arg);
10849 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10850 static chain pointer (even if you can't have nested virtual functions
10851 right now, someone might implement them sometime), and the rest of the
10852 registers are used for argument passing, are callee-saved, or reserved. */
10853 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10854 -ffixed-reg has been used. */
10855 if (! call_used_or_fixed_reg_p (0) || fixed_regs[0])
10856 error ("r0 needs to be available as a call-clobbered register");
10857 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10860 if (call_used_or_fixed_reg_p (1) && ! fixed_regs[1])
10861 scratch1 = gen_rtx_REG (ptr_mode, 1);
10862 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10863 pointing where to return struct values. */
10864 if (call_used_or_fixed_reg_p (3) && ! fixed_regs[3])
10865 scratch2 = gen_rtx_REG (Pmode, 3);
10868 this_value = plus_constant (Pmode, this_rtx, delta);
10869 if (vcall_offset
10870 && (simple_add || scratch0 != scratch1)
10871 && strict_memory_address_p (ptr_mode, this_value))
10873 emit_load_ptr (scratch0, this_value);
10874 did_load = 1;
10877 if (!delta)
10878 ; /* Do nothing. */
10879 else if (simple_add)
10880 emit_move_insn (this_rtx, this_value);
10881 else
10883 emit_move_insn (scratch1, GEN_INT (delta));
10884 emit_insn (gen_add2_insn (this_rtx, scratch1));
10887 if (vcall_offset)
10889 rtx offset_addr;
10891 if (!did_load)
10892 emit_load_ptr (scratch0, this_rtx);
10894 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
10895 if (strict_memory_address_p (ptr_mode, offset_addr))
10896 ; /* Do nothing. */
10897 else if (scratch0 != scratch1)
10899 /* scratch0 != scratch1, and we have indexed loads. Get better
10900 schedule by loading the offset into r1 and using an indexed
10901 load - then the load of r1 can issue before the load from
10902 (this_rtx + delta) finishes. */
10903 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10904 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10906 else if (CONST_OK_FOR_ADD (vcall_offset))
10908 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10909 offset_addr = scratch0;
10911 else
10912 gcc_unreachable (); /* FIXME */
10913 emit_load_ptr (scratch0, offset_addr);
10915 if (Pmode != ptr_mode)
10916 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10917 emit_insn (gen_add2_insn (this_rtx, scratch0));
10920 /* Generate a tail call to the target function. */
10921 if (! TREE_USED (function))
10923 assemble_external (function);
10924 TREE_USED (function) = 1;
10926 funexp = XEXP (DECL_RTL (function), 0);
10927 /* If the function is overridden, so is the thunk, hence we don't
10928 need GOT addressing even if this is a public symbol. */
10929 #if 0
10930 if (TARGET_SH1 && ! flag_weak)
10931 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10932 else
10933 #endif
10934 if (TARGET_SH2 && flag_pic)
10936 if (TARGET_FDPIC)
10938 sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
10939 XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
10941 else
10943 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10944 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10947 else
10949 emit_move_insn (scratch2, funexp);
10950 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10951 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10953 sibcall = emit_call_insn (sibcall);
10954 SIBLING_CALL_P (sibcall) = 1;
10955 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10956 emit_barrier ();
10958 /* Run just enough of rest_of_compilation to do scheduling and get
10959 the insns emitted. */
10961 insns = get_insns ();
10963 if (optimize > 0)
10965 if (! cfun->cfg)
10966 init_flow (cfun);
10967 split_all_insns_noflow ();
10970 sh_reorg ();
10971 shorten_branches (insns);
10972 assemble_start_function (thunk_fndecl, fnname);
10973 final_start_function (insns, file, 1);
10974 final (insns, file, 1);
10975 final_end_function ();
10976 assemble_end_function (thunk_fndecl, fnname);
10978 reload_completed = 0;
10979 epilogue_completed = 0;
10982 /* Return an RTX pair for the address and call site label of a function
10983 NAME of kind KIND, placing the result in TARGET if not NULL. For
10984 SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
10985 (const_int 0) if jsr should be used, or a label_ref if bsrf should
10986 be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
10987 address of the function itself, not a function descriptor, so they
10988 can only be used with functions not using the FDPIC register that
10989 are known to be called directory without a PLT entry. */
10991 function_symbol_result
10992 function_symbol (rtx target, const char *name, sh_function_kind kind)
10994 /* If this is not an ordinary function, the name usually comes from a
10995 string literal or an sprintf buffer. Make sure we use the same
10996 string consistently, so that cse will be able to unify address loads. */
10997 if (kind != FUNCTION_ORDINARY)
10998 name = IDENTIFIER_POINTER (get_identifier (name));
10999 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
11000 rtx lab = const0_rtx;
11001 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
11002 if (flag_pic)
11003 switch (kind)
11005 case FUNCTION_ORDINARY:
11006 break;
11007 case SFUNC_GOT:
11009 rtx reg = target ? target : gen_reg_rtx (Pmode);
11011 emit_insn (gen_symGOT2reg (reg, sym));
11012 sym = reg;
11013 break;
11015 case SFUNC_STATIC:
11017 rtx reg = target ? target : gen_reg_rtx (Pmode);
11019 if (TARGET_FDPIC)
11021 /* We use PC-relative calls, since GOTOFF can only refer
11022 to writable data. This works along with sh_sfunc_call. */
11023 lab = PATTERN (gen_call_site ());
11024 emit_insn (gen_sym_label2reg (reg, sym, lab));
11026 else
11028 /* ??? To allow cse to work, we use GOTOFF relocations.
11029 we could add combiner patterns to transform this into
11030 straight pc-relative calls with sym2PIC / bsrf when
11031 label load and function call are still 1:1 and in the
11032 same basic block during combine. */
11033 emit_insn (gen_symGOTOFF2reg (reg, sym));
11036 sym = reg;
11037 break;
11040 if (target && sym != target)
11042 emit_move_insn (target, sym);
11043 return function_symbol_result (target, lab);
11045 return function_symbol_result (sym, lab);
11048 /* Find the number of the first general purpose register in S that
11049 is not set. */
11050 static int
11051 scavenge_reg (HARD_REG_SET *s)
11053 for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11054 if (TEST_HARD_REG_BIT (*s, r))
11055 return r;
11056 return -1;
11060 sh_get_pr_initial_val (void)
11062 /* If we haven't finished rtl generation, there might be a nonlocal label
11063 that we haven't seen yet.
11064 ??? get_hard_reg_initial_val fails if it is called after register
11065 allocation has started, unless it has been called before for the
11066 same register. And even then, we end in trouble if we didn't use
11067 the register in the same basic block before. So call
11068 get_hard_reg_initial_val now and wrap it in an unspec if we might
11069 need to replace it. */
11070 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11071 combine can put the pseudo returned by get_hard_reg_initial_val into
11072 instructions that need a general purpose registers, which will fail to
11073 be recognized when the pseudo becomes allocated to PR. */
11074 rtx val = get_hard_reg_initial_val (Pmode, PR_REG);
11075 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11078 bool
11079 sh_expand_t_scc (rtx operands[])
11081 enum rtx_code code = GET_CODE (operands[1]);
11082 rtx target = operands[0];
11083 rtx op0 = operands[2];
11084 rtx op1 = operands[3];
11085 rtx result = target;
11087 if (!REG_P (op0) || REGNO (op0) != T_REG
11088 || !CONST_INT_P (op1))
11089 return false;
11090 if (!REG_P (result))
11091 result = gen_reg_rtx (SImode);
11092 HOST_WIDE_INT val = INTVAL (op1);
11093 if ((code == EQ && val == 1) || (code == NE && val == 0))
11094 emit_insn (gen_movt (result, get_t_reg_rtx ()));
11095 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11096 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
11097 else if (code == EQ || code == NE)
11098 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11099 else
11100 return false;
11101 if (result != target)
11102 emit_move_insn (target, result);
11103 return true;
11106 /* INSN is an sfunc; return the rtx that describes the address used. */
11107 static rtx
11108 extract_sfunc_addr (rtx insn)
11110 rtx pattern = PATTERN (insn);
11111 const int len = XVECLEN (pattern, 0);
11112 for (int i = 0; i < len; i++)
11114 rtx part = XVECEXP (pattern, 0, i);
11115 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11116 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11117 return XEXP (part, 0);
11119 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11120 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11123 /* Verify that the register in use_sfunc_addr still agrees with the address
11124 used in the sfunc. This prevents fill_slots_from_thread from changing
11125 use_sfunc_addr.
11126 INSN is the use_sfunc_addr instruction, and REG is the register it
11127 guards. */
11128 bool
11129 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
11131 /* Search for the sfunc. It should really come right after INSN. */
11132 while ((insn = NEXT_INSN (insn)))
11134 if (LABEL_P (insn) || JUMP_P (insn))
11135 break;
11136 if (! INSN_P (insn))
11137 continue;
11139 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
11140 insn = seq->insn (0);
11141 if (GET_CODE (PATTERN (insn)) != PARALLEL
11142 || get_attr_type (insn) != TYPE_SFUNC)
11143 continue;
11144 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11146 gcc_unreachable ();
11149 /* This function returns a constant rtx that represents 2**15 / pi in
11150 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
11151 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
11152 static GTY(()) rtx sh_fsca_sf2int_rtx;
11155 sh_fsca_sf2int (void)
11157 if (! sh_fsca_sf2int_rtx)
11159 REAL_VALUE_TYPE rv;
11161 real_from_string (&rv, "10430.378350470453");
11162 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11165 return sh_fsca_sf2int_rtx;
11168 /* This function returns a constant rtx that represents pi / 2**15 in
11169 SFmode. It's used to scale SFmode angles, in radians, to a
11170 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
11171 maps to 0x10000. */
11172 static GTY(()) rtx sh_fsca_int2sf_rtx;
11175 sh_fsca_int2sf (void)
11177 if (! sh_fsca_int2sf_rtx)
11179 REAL_VALUE_TYPE rv;
11181 real_from_string (&rv, "9.587379924285257e-5");
11182 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11185 return sh_fsca_int2sf_rtx;
11188 /* Initialize the CUMULATIVE_ARGS structure. */
11189 void
11190 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11191 tree fntype,
11192 rtx libname ATTRIBUTE_UNUSED,
11193 tree fndecl,
11194 signed int n_named_args,
11195 machine_mode mode)
11197 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11198 pcum->free_single_fp_reg = 0;
11199 pcum->outgoing = n_named_args != -1;
11201 /* FIXME: Should we check TARGET_HITACHI here ??? */
11202 pcum->renesas_abi = sh_attr_renesas_p (fntype);
11204 if (fntype)
11206 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11207 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11208 pcum->prototype_p = prototype_p (fntype);
11209 pcum->arg_count [(int) SH_ARG_INT] = false;
11211 else
11213 pcum->arg_count [(int) SH_ARG_INT] = 0;
11214 pcum->prototype_p = false;
11215 if (mode != VOIDmode)
11217 /* If the default ABI is the Renesas ABI then all library
11218 calls must assume that the library will be using the
11219 Renesas ABI. So if the function would return its result
11220 in memory then we must force the address of this memory
11221 block onto the stack. Ideally we would like to call
11222 targetm.calls.return_in_memory() here but we do not have
11223 the TYPE or the FNDECL available so we synthesize the
11224 contents of that function as best we can. */
11225 pcum->force_mem =
11226 (TARGET_DEFAULT & MASK_HITACHI)
11227 && (mode == BLKmode
11228 || (GET_MODE_SIZE (mode) > 4
11229 && !(mode == DFmode
11230 && TARGET_FPU_DOUBLE)));
11232 else
11233 pcum->force_mem = false;
11238 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
11240 enum rtx_code code = TRUNCATE;
11242 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11244 rtx inner = XEXP (x, 0);
11245 machine_mode inner_mode = GET_MODE (inner);
11247 if (inner_mode == mode)
11248 return inner;
11249 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11250 x = inner;
11251 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11252 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11254 code = GET_CODE (x);
11255 x = inner;
11258 return gen_rtx_fmt_e (code, mode, x);
11261 /* Load and store depend on the highpart of the address. However,
11262 set_attr_alternative does not give well-defined results before reload,
11263 so we must look at the rtl ourselves to see if any of the feeding
11264 registers is used in a memref.
11266 Return true iff INSN contains a MEM. */
11267 bool
11268 sh_contains_memref_p (rtx insn)
11270 subrtx_iterator::array_type array;
11271 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
11272 if (MEM_P (*iter))
11273 return true;
11274 return false;
11277 /* Return true iff INSN loads a banked register. */
11278 bool
11279 sh_loads_bankedreg_p (rtx insn)
11281 if (GET_CODE (PATTERN (insn)) == SET)
11283 rtx op = SET_DEST (PATTERN(insn));
11284 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11285 return true;
11288 return false;
11291 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
11292 static reg_class_t
11293 sh_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
11295 return rclass;
11298 /* Implement TARGET_SECONDARY_RELOAD. */
11299 static reg_class_t
11300 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11301 machine_mode mode, secondary_reload_info *sri)
11303 enum reg_class rclass = (enum reg_class) rclass_i;
11305 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
11306 && REG_P (XEXP (XEXP (x, 0), 0))
11307 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
11308 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11310 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
11311 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11313 if (REG_P (x) && REGNO (x) == GBR_REG)
11314 return NO_REGS;
11316 if (in_p)
11318 if (REGCLASS_HAS_FP_REG (rclass)
11319 && immediate_operand ((x), mode)
11320 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
11321 switch (mode)
11323 case E_SFmode:
11324 sri->icode = CODE_FOR_reload_insf__frn;
11325 return NO_REGS;
11326 case E_DFmode:
11327 sri->icode = CODE_FOR_reload_indf__frn;
11328 return NO_REGS;
11329 case E_SImode:
11330 /* ??? If we knew that we are in the appropriate mode -
11331 single precision - we could use a reload pattern directly. */
11332 return FPUL_REGS;
11333 default:
11334 abort ();
11336 if (rclass == FPUL_REGS
11337 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11338 || REGNO (x) == T_REG))
11339 || GET_CODE (x) == PLUS))
11340 return GENERAL_REGS;
11341 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11343 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11344 return GENERAL_REGS;
11345 else if (mode == SFmode)
11346 return FP_REGS;
11347 sri->icode = CODE_FOR_reload_insi__i_fpul;
11348 return NO_REGS;
11350 if (rclass == FPSCR_REGS
11351 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11352 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11353 return GENERAL_REGS;
11354 } /* end of input-only processing. */
11356 if (((REGCLASS_HAS_FP_REG (rclass)
11357 && (REG_P (x)
11358 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11359 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11360 && TARGET_FMOVD))))
11361 || (REGCLASS_HAS_GENERAL_REG (rclass)
11362 && REG_P (x)
11363 && FP_REGISTER_P (REGNO (x))))
11364 && (mode == SFmode || mode == SImode))
11365 return FPUL_REGS;
11366 if ((rclass == FPUL_REGS
11367 || (REGCLASS_HAS_FP_REG (rclass) && mode == SImode))
11368 && (MEM_P (x)
11369 || (REG_P (x)
11370 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11371 || REGNO (x) == T_REG
11372 || system_reg_operand (x, VOIDmode)))))
11374 if (rclass == FPUL_REGS)
11375 return GENERAL_REGS;
11376 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
11379 if ((rclass == MAC_REGS || rclass == PR_REGS)
11380 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11381 && rclass != REGNO_REG_CLASS (REGNO (x)))
11382 return GENERAL_REGS;
11384 /* If here fall back to loading FPUL register through general registers.
11385 This case can happen when movsi_ie insn is picked initially to
11386 load/store the FPUL register from/to another register, and then the
11387 other register is allocated on the stack. */
11388 if (rclass == FPUL_REGS && true_regnum (x) == -1)
11389 return GENERAL_REGS;
11391 /* Force mov.b / mov.w displacement addressing insn to use R0 as
11392 the other operand.
11393 On SH2A could also just leave it alone here, which would result in a
11394 4 byte move insn being generated instead. However, for this to work
11395 the insns must have the appropriate alternatives. */
11396 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11397 && satisfies_constraint_Sdd (x)
11398 && sh_disp_addr_displacement (x)
11399 <= sh_max_mov_insn_displacement (mode, false))
11400 return R0_REGS;
11402 /* When reload is trying to address a QImode or HImode subreg on the stack,
11403 force any subreg byte into R0_REGS, as this is going to become a
11404 displacement address.
11405 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
11406 is on the stack, the memref to it might already require a displacement
11407 and that has to be added to the final address. At this point we don't
11408 know the cumulative displacement so we assume the worst case. */
11409 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11410 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
11411 return R0_REGS;
11413 return NO_REGS;
11416 /* Return true if SUBST can't safely replace its equivalent during RA. */
11417 static bool
11418 sh_cannot_substitute_mem_equiv_p (rtx)
11420 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
11421 uses R0 and may cause spill failure when R0 is already used.
11422 We have to return true for that case at least.
11423 Moreover SH has strong R0 parity and also have not enough numbers of
11424 the hard registers to make the equiv substitution win in the size
11425 and the speed on average working sets. The pseudos produced to
11426 hold the equiv values can't get good hard registers for bad cases
11427 and end up memory save/restore insns which make the code worse. */
11428 return true;
11431 /* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
11432 static bool
11433 sh_legitimize_address_displacement (rtx *offset1, rtx *offset2,
11434 poly_int64 orig_offset,
11435 machine_mode mode)
11437 if ((TARGET_FPU_DOUBLE && mode == DFmode)
11438 || (TARGET_SH2E && mode == SFmode))
11439 return false;
11441 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, orig_offset);
11442 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
11444 *offset1 = adj.offset_adjust;
11445 *offset2 = adj.mov_disp;
11446 return true;
11449 return false;
11452 /* Return true if movsf insn should be splited with an additional
11453 register. */
11454 bool
11455 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
11457 /* op0 == op1 */
11458 if (rtx_equal_p (op0, op1))
11459 return true;
11460 /* fy, FQ, reg */
11461 if (GET_CODE (op1) == CONST_DOUBLE
11462 && ! satisfies_constraint_G (op1)
11463 && ! satisfies_constraint_H (op1)
11464 && REG_P (op0)
11465 && REG_P (op2))
11466 return true;
11467 /* f, r, y */
11468 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
11469 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
11470 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11471 return true;
11472 /* r, f, y */
11473 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
11474 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
11475 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11476 return true;
11478 return false;
11481 static void
11482 sh_conditional_register_usage (void)
11484 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
11485 if (! VALID_REGISTER_P (regno))
11486 fixed_regs[regno] = 1;
11487 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
11488 if (flag_pic)
11489 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11490 if (TARGET_FDPIC)
11492 fixed_regs[PIC_REG] = 1;
11493 call_used_regs[PIC_REG] = 1;
11495 /* Renesas saves and restores mac registers on call. */
11496 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
11498 call_used_regs[MACH_REG] = 0;
11499 call_used_regs[MACL_REG] = 0;
11502 for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
11503 if (! fixed_regs[regno] && call_used_regs[regno])
11504 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
11506 call_used_regs[FPSCR_MODES_REG] = 0;
11507 call_used_regs[FPSCR_STAT_REG] = 0;
11510 /* Implement TARGET_LEGITIMATE_CONSTANT_P
11512 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
11513 static bool
11514 sh_legitimate_constant_p (machine_mode mode, rtx x)
11516 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
11518 rtx base, offset;
11519 split_const (x, &base, &offset);
11521 if (GET_CODE (base) == SYMBOL_REF
11522 && !offset_within_block_p (base, INTVAL (offset)))
11523 return false;
11526 if (TARGET_FDPIC
11527 && (SYMBOLIC_CONST_P (x)
11528 || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
11529 && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
11530 return false;
11532 return GET_CODE (x) != CONST_DOUBLE
11533 || mode == DFmode || mode == SFmode
11534 || mode == DImode || GET_MODE (x) == VOIDmode;
11537 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11539 static void
11540 sh_init_sync_libfuncs (void)
11542 init_sync_libfuncs (UNITS_PER_WORD);
11545 /* Return true if it is appropriate to emit `ret' instructions in the
11546 body of a function. */
11547 bool
11548 sh_can_use_simple_return_p (void)
11550 if (! reload_completed || frame_pointer_needed)
11551 return false;
11553 /* Moving prologue around does't reduce the size. */
11554 if (optimize_function_for_size_p (cfun))
11555 return false;
11557 /* Finally, allow for pr save. */
11558 HARD_REG_SET live_regs_mask;
11559 int d = calc_live_regs (&live_regs_mask);
11561 if (rounded_frame_size (d) > 4)
11562 return false;
11564 return true;
11567 /*------------------------------------------------------------------------------
11568 Address mode optimization support code
11571 typedef HOST_WIDE_INT disp_t;
11572 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
11573 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
11574 static const disp_t INVALID_DISP = MAX_DISP;
11576 /* A memory reference which is described by a base register and a
11577 displacement. */
11578 class base_reg_disp
11580 public:
11581 base_reg_disp (rtx br, disp_t d);
11583 bool is_reg (void) const;
11584 bool is_disp (void) const;
11585 rtx reg (void) const;
11586 disp_t disp (void) const;
11588 private:
11589 rtx reg_;
11590 disp_t disp_;
11593 inline
11594 base_reg_disp::base_reg_disp (rtx br, disp_t d)
11595 : reg_ (br), disp_ (d)
11599 inline bool
11600 base_reg_disp::is_reg (void) const
11602 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
11605 inline bool
11606 base_reg_disp::is_disp (void) const
11608 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
11611 inline rtx
11612 base_reg_disp::reg (void) const
11614 return reg_;
11617 inline disp_t
11618 base_reg_disp::disp (void) const
11620 return disp_;
11623 /* Find the base register and calculate the displacement for a given
11624 address rtx 'x'. */
11625 static base_reg_disp
11626 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
11627 rtx base_reg = NULL)
11629 if (REG_P (x))
11631 if (REGNO (x) == GBR_REG)
11632 return base_reg_disp (x, disp);
11634 /* We've reached a hard-reg. This is probably the point where
11635 function args are copied to pseudos. Do not go any further and
11636 stick to the pseudo. If the original mem addr was in a hard reg
11637 from the beginning, it will become the base reg. */
11638 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
11639 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
11641 /* Find the def of the reg and trace it. If there are more than one
11642 defs and they are not the same, assume it's not safe to proceed. */
11643 rtx_insn* last_i = NULL;
11644 rtx last_set = NULL;
11645 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
11646 d = DF_REF_NEXT_REG (d))
11648 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
11650 /* Accept multiple defs, as long as they are equal. */
11651 if (last_set == NULL || rtx_equal_p (last_set, set))
11653 last_i = DF_REF_INSN (d);
11654 last_set = set;
11656 else
11658 last_i = NULL;
11659 last_set = NULL;
11660 break;
11664 if (last_set != NULL && last_i != NULL)
11665 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
11666 XEXP (last_set, 0));
11668 /* When here, no previous insn was found that sets the reg.
11669 The input reg is already the base reg. */
11670 return base_reg_disp (x, disp);
11673 else if (GET_CODE (x) == PLUS)
11675 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
11676 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
11678 /* Either left or right val must be a reg.
11679 We don't handle the case of 'reg + reg' here. */
11680 if (left_val.is_reg () && right_val.is_disp ())
11681 return base_reg_disp (left_val.reg (), left_val.disp ()
11682 + right_val.disp () + disp);
11683 else if (right_val.is_reg () && left_val.is_disp ())
11684 return base_reg_disp (right_val.reg (), right_val.disp ()
11685 + left_val.disp () + disp);
11686 else
11687 return base_reg_disp (base_reg, disp);
11690 else if (CONST_INT_P (x))
11691 return base_reg_disp (NULL, disp + INTVAL (x));
11693 /* Didn't find anything useful. */
11694 return base_reg_disp (base_reg, disp);
11697 /* Given an insn and a memory operand, try to find an equivalent GBR
11698 based memory address and return the corresponding new memory address.
11699 Return NULL_RTX if not found. */
11701 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
11703 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
11704 return NULL_RTX;
11706 /* Leave post/pre inc/dec or any other side effect addresses alone. */
11707 if (side_effects_p (XEXP (mem, 0)))
11708 return NULL_RTX;
11710 /* When not optimizing there might be no dataflow available. */
11711 if (df == NULL)
11712 return NULL_RTX;
11714 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
11716 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
11718 /* If GBR is marked as call clobbered we bail out if we see a call.
11719 FIXME: Actually should check if this mem refers to the gbr value
11720 before or after the call. If there is a store_gbr preceeding this
11721 mem, it's safe to use GBR for this mem.
11723 If GBR is not marked as call clobbered, but there is some other
11724 def than a call, it's probably a load_gbr upon which we also
11725 bail out to be on the safe side.
11726 FIXME: Should check if we have a use-after-def case, such as
11727 the call case above. */
11728 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
11729 d = DF_REF_NEXT_REG (d))
11731 if (CALL_P (DF_REF_INSN (d)))
11733 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, GBR_REG))
11734 return NULL_RTX;
11735 else
11736 continue;
11738 else
11739 return NULL_RTX;
11742 rtx disp = GEN_INT (gbr_disp.disp ());
11743 if (gbr_displacement (disp, GET_MODE (mem)))
11744 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
11747 return NULL_RTX;
11750 /*------------------------------------------------------------------------------
11751 Manual insn combine support code.
11754 /* Return true if the specified insn contains any UNSPECs or
11755 UNSPEC_VOLATILEs. */
11756 static bool
11757 sh_unspec_insn_p (rtx x)
11759 subrtx_iterator::array_type array;
11760 FOR_EACH_SUBRTX (i, array, x, ALL)
11761 if (*i != NULL
11762 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
11763 return true;
11765 return false;
11768 /* Return true if the register operands of the specified insn are modified
11769 between the specified from and to insns (exclusive of those two). */
11770 bool
11771 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
11772 const rtx_insn* from,
11773 const rtx_insn* to)
11775 /* FIXME: Return true for multiple sets for now. */
11776 rtx s = single_set (operands_insn);
11777 if (s == NULL_RTX)
11778 return true;
11780 subrtx_iterator::array_type array;
11781 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
11782 if (*i != NULL &&
11783 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
11784 return true;
11786 return false;
11789 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
11790 negates the T bit and stores the result in the T bit. */
11791 bool
11792 sh_is_nott_insn (const rtx_insn* i)
11794 return i != NULL_RTX && PATTERN (i) != NULL_RTX
11795 && GET_CODE (PATTERN (i)) == SET
11796 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
11797 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
11801 sh_movt_set_dest (const rtx_insn* i)
11803 return i == NULL ? NULL : sh_movt_set_dest (PATTERN (i));
11807 sh_movt_set_dest (const_rtx pat)
11809 return GET_CODE (pat) == SET
11810 && arith_reg_dest (XEXP (pat, 0), SImode)
11811 && t_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11814 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
11815 that stores the negated T bit in a register, and return the destination
11816 register rtx, or null. */
11818 sh_movrt_set_dest (const rtx_insn* i)
11820 return i == NULL ? NULL : sh_movrt_set_dest (PATTERN (i));
11824 sh_movrt_set_dest (const_rtx pat)
11826 /* The negc movrt replacement is inside a parallel. */
11827 if (GET_CODE (pat) == PARALLEL)
11828 pat = XVECEXP (pat, 0, 0);
11830 return GET_CODE (pat) == SET
11831 && arith_reg_dest (XEXP (pat, 0), SImode)
11832 && negt_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11836 /* Given an insn and a reg number, tell whether the reg dies or is unused
11837 after the insn. */
11838 bool
11839 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
11841 return find_regno_note (i, REG_DEAD, regno) != NULL
11842 || find_regno_note (i, REG_UNUSED, regno) != NULL;
11845 /* Given an insn and a reg number, remove reg dead or reg unused notes to
11846 mark it as being used after the insn. */
11847 void
11848 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
11850 if (rtx n = find_regno_note (i, REG_DEAD, regno))
11851 remove_note (i, n);
11852 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
11853 remove_note (i, n);
11856 /* Given an insn check if it contains any post/pre inc/dec mem operands and
11857 add the REG_INC notes accordingly.
11858 FIXME: This function is very similar to lra.cc (add_auto_inc_notes).
11859 FIXME: This function is currently used by peephole2 patterns because
11860 the peephole2 pass does not preserve REG_INC notes. If the notes
11861 are dropped the following passes will do wrong things. */
11862 rtx_insn*
11863 sh_check_add_incdec_notes (rtx_insn* i)
11865 struct for_each_inc_dec_clb
11867 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
11868 rtx dest, rtx src ATTRIBUTE_UNUSED,
11869 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
11871 gcc_assert (REG_P (dest));
11873 rtx_insn* i = (rtx_insn*)arg;
11874 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
11875 add_reg_note (i, REG_INC, dest);
11877 return 0;
11881 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
11882 return i;
11885 /* Given a move insn destiation and a source, make sure that the move source
11886 operand is not a post-inc mem load with the same address reg as the
11887 destination. Returns the modified source operand with the post-inc removed
11888 if necessary. */
11890 sh_remove_overlapping_post_inc (rtx dst, rtx src)
11892 if (!MEM_P (src))
11893 return src;
11895 rtx addr = XEXP (src, 0);
11897 if (GET_CODE (addr) == POST_INC
11898 && reg_overlap_mentioned_p (XEXP (addr, 0), dst))
11899 return replace_equiv_address (src, XEXP (addr, 0));
11901 gcc_assert (GET_CODE (addr) != POST_MODIFY);
11902 return src;
11905 /* Emit a move insn that is safe to be used in peephole patterns. */
11906 rtx_insn*
11907 sh_peephole_emit_move_insn (rtx dst, rtx src)
11909 return sh_check_add_incdec_notes (
11910 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src)));
11913 /* Given an op rtx and an insn, try to find out whether the result of the
11914 specified op consists only of logical operations on T bit stores. */
11915 bool
11916 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
11918 if (!logical_operator (op, SImode))
11919 return false;
11921 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
11922 int op_is_t_count = 0;
11924 for (int i = 0; i < 2; ++i)
11926 if (t_reg_operand (ops[i], VOIDmode)
11927 || negt_reg_operand (ops[i], VOIDmode))
11928 op_is_t_count++;
11930 else
11932 set_of_reg op_set = sh_find_set_of_reg
11933 (ops[i], insn, prev_nonnote_nondebug_insn_bb);
11934 if (op_set.set_src == NULL_RTX)
11935 continue;
11937 if (t_reg_operand (op_set.set_src, VOIDmode)
11938 || negt_reg_operand (op_set.set_src, VOIDmode)
11939 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
11940 op_is_t_count++;
11944 return op_is_t_count == 2;
11947 /* Given the operand that is extended in a sign/zero extend insn, and the
11948 insn, try to figure out whether the sign/zero extension can be replaced
11949 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
11950 NULL_RTX otherwise. */
11952 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
11954 if (REG_P (extended_op))
11955 extended_op = extended_op;
11956 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
11957 extended_op = SUBREG_REG (extended_op);
11958 else
11959 return NULL_RTX;
11961 /* Reg moves must be of the same mode. */
11962 if (GET_MODE (extended_op) != SImode)
11963 return NULL_RTX;
11965 set_of_reg s = sh_find_set_of_reg (extended_op, insn,
11966 prev_nonnote_nondebug_insn_bb);
11967 if (s.set_src == NULL_RTX)
11968 return NULL_RTX;
11970 if (t_reg_operand (s.set_src, VOIDmode)
11971 || negt_reg_operand (s.set_src, VOIDmode))
11972 return extended_op;
11974 /* If the zero extended reg was formed by a logical operation, check the
11975 operands of the logical operation. If both originated from T bit
11976 stores the zero extension can be eliminated. */
11977 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
11978 return extended_op;
11980 return NULL_RTX;
11983 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
11984 figure out whether it should be converted into a movt-xor sequence in
11985 the movrt_negc splitter.
11986 Returns true if insns have been modified and the splitter has succeeded. */
11987 bool
11988 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
11990 /* In cases such as
11991 tst r4,r4
11992 mov #-1,r1
11993 negc r1,r1
11994 tst r4,r4
11995 we can replace the T bit clobbering negc with a movt-xor sequence and
11996 eliminate the redundant comparison.
11997 Because the xor insn depends on register allocation results, allow this
11998 only before reload. */
11999 if (!can_create_pseudo_p ())
12000 return false;
12002 set_of_reg t_before_negc = sh_find_set_of_reg
12003 (get_t_reg_rtx (), curr_insn, prev_nonnote_nondebug_insn_bb);
12004 set_of_reg t_after_negc = sh_find_set_of_reg
12005 (get_t_reg_rtx (), curr_insn, next_nonnote_nondebug_insn_bb);
12007 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
12008 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
12009 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
12010 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
12011 t_before_negc.insn,
12012 t_after_negc.insn)
12013 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
12014 && !sh_unspec_insn_p (t_after_negc.insn)
12015 && !volatile_insn_p (PATTERN (t_after_negc.insn))
12016 && !side_effects_p (PATTERN (t_after_negc.insn))
12017 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
12019 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
12020 set_insn_deleted (t_after_negc.insn);
12021 return true;
12023 else
12024 return false;
12027 /* Given a reg and the current insn, see if the value of the reg originated
12028 from a sign or zero extension and return the discovered information. */
12029 sh_extending_set_of_reg
12030 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
12032 if (reg == NULL)
12033 return sh_extending_set_of_reg (curr_insn);
12035 if (SUBREG_P (reg))
12036 reg = SUBREG_REG (reg);
12038 if (!REG_P (reg))
12039 return sh_extending_set_of_reg (curr_insn);
12041 /* FIXME: Also search the predecessor basic blocks. It seems that checking
12042 only the adjacent predecessor blocks would cover most of the cases.
12043 Also try to look through the first extension that we hit. There are some
12044 cases, where a zero_extend is followed an (implicit) sign_extend, and it
12045 fails to see the sign_extend. */
12046 sh_extending_set_of_reg result = sh_find_set_of_reg
12047 (reg, curr_insn, prev_nonnote_nondebug_insn_bb, true);
12049 if (result.set_src != NULL)
12051 if (GET_CODE (result.set_src) == SIGN_EXTEND
12052 || GET_CODE (result.set_src) == ZERO_EXTEND)
12054 if (dump_file)
12055 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12056 "explicitly sign/zero extended in insn %d\n",
12057 REGNO (reg), INSN_UID (result.insn));
12058 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
12059 result.ext_code = GET_CODE (result.set_src);
12061 else if (MEM_P (result.set_src)
12062 && (GET_MODE (result.set_src) == QImode
12063 || GET_MODE (result.set_src) == HImode)
12064 && !sh_unspec_insn_p (result.insn))
12066 /* On SH QIHImode memory loads always sign extend. However, in
12067 some cases where it seems that the higher bits are not
12068 interesting, the loads will not be expanded as sign extending
12069 insns, but as QIHImode loads into QIHImode regs. We report that
12070 the reg has been sign extended by the mem load. When it is used
12071 as such, we must convert the mem load into a sign extending insn,
12072 see also sh_extending_set_of_reg::use_as_extended_reg. */
12073 if (dump_file)
12074 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12075 "implicitly sign extended in insn %d\n",
12076 REGNO (reg), INSN_UID (result.insn));
12077 result.from_mode = GET_MODE (result.set_src);
12078 result.ext_code = SIGN_EXTEND;
12082 return result;
12085 /* Given a reg that is known to be sign or zero extended at some insn,
12086 take the appropriate measures so that the extended value can be used as
12087 a reg at the specified insn and return the resulting reg rtx. */
12089 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
12091 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
12092 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
12093 gcc_assert (from_mode == QImode || from_mode == HImode);
12095 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
12097 if (dump_file)
12098 fprintf (dump_file,
12099 "use_as_extended_reg: converting non-extending mem load in "
12100 "insn %d into sign-extending load\n", INSN_UID (insn));
12102 rtx r = gen_reg_rtx (SImode);
12103 rtx_insn* i0;
12104 if (from_mode == QImode)
12105 i0 = sh_check_add_incdec_notes (
12106 emit_insn_after (gen_extendqisi2 (r, set_src), insn));
12107 else if (from_mode == HImode)
12108 i0 = sh_check_add_incdec_notes (
12109 emit_insn_after (gen_extendhisi2 (r, set_src), insn));
12110 else
12111 gcc_unreachable ();
12113 emit_insn_after (
12114 gen_move_insn (XEXP (set_rtx, 0),
12115 gen_lowpart (GET_MODE (set_src), r)), i0);
12116 set_insn_deleted (insn);
12117 return r;
12119 else
12121 rtx extension_dst = XEXP (set_rtx, 0);
12122 if (GET_MODE (extension_dst) != SImode)
12123 extension_dst = simplify_gen_subreg (SImode, extension_dst,
12124 GET_MODE (extension_dst), 0);
12125 if (modified_between_p (extension_dst, insn, use_at_insn))
12127 if (dump_file)
12128 fprintf (dump_file,
12129 "use_as_extended_reg: dest reg %d of extending insn %d is "
12130 "modified, inserting a reg-reg copy\n",
12131 REGNO (extension_dst), INSN_UID (insn));
12133 rtx r = gen_reg_rtx (SImode);
12134 emit_insn_after (gen_move_insn (r, extension_dst), insn);
12135 return r;
12137 else
12139 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
12140 return extension_dst;
12145 bool
12146 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
12148 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
12149 && (from_mode == QImode || from_mode == HImode)
12150 && set_src != NULL)
12151 return arith_reg_operand (XEXP (set_src, 0), from_mode);
12152 else
12153 return false;
12157 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
12159 gcc_assert (can_use_as_unextended_reg ());
12161 rtx r = XEXP (set_src, 0);
12162 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
12164 if (modified_between_p (r, insn, use_at_insn))
12166 rtx r1 = gen_reg_rtx (SImode);
12167 emit_insn_after (gen_move_insn (r1, r0), insn);
12168 return r1;
12170 else
12172 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
12173 ? REGNO (SUBREG_REG (r))
12174 : REGNO (r));
12175 return r0;
12179 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
12180 perform the necessary checks on the operands and split it accordingly. */
12181 void
12182 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
12183 int subreg_offset, rtx operands[])
12185 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
12187 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
12188 curr_insn);
12189 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
12190 curr_insn);
12192 /* If one of the operands is known to be zero extended, that's already
12193 sufficient to mask out the unwanted high bits. */
12194 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
12196 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12197 operands[1]));
12198 return;
12200 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
12202 emit_insn (gen_tstsi_t (operands[0],
12203 eop1.use_as_extended_reg (curr_insn)));
12204 return;
12207 /* None of the operands seem to be zero extended.
12208 If both are sign extended it's OK, too. */
12209 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
12210 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
12212 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12213 eop1.use_as_extended_reg (curr_insn)));
12214 return;
12217 /* Otherwise we have to insert a zero extension on one of the operands to
12218 mask out the unwanted high bits.
12219 Prefer the operand that has no known extension. */
12220 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
12221 std::swap (operands[0], operands[1]);
12223 rtx tmp0 = gen_reg_rtx (SImode);
12224 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
12225 GET_MODE (operands[0]), subreg_offset);
12226 emit_insn (subreg_mode == QImode
12227 ? gen_zero_extendqisi2 (tmp0, tmp1)
12228 : gen_zero_extendhisi2 (tmp0, tmp1));
12229 emit_insn (gen_tstsi_t (tmp0, operands[1]));
12232 /* A helper class to increment/decrement a counter variable each time a
12233 function is entered/left. */
12234 class scope_counter
12236 public:
12237 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
12239 ~scope_counter (void)
12241 --m_counter;
12242 gcc_assert (m_counter >= 0);
12245 int count (void) const { return m_counter; }
12247 private:
12248 int& m_counter;
12251 /* Given an rtx x, determine whether the expression can be used to create
12252 an insn that calulates x and stores the result in the T bit.
12253 This is used by the 'treg_set_expr' predicate to construct insns sequences
12254 where T bit results are fed into other insns, such as addc, subc, negc
12255 insns.
12257 FIXME: The patterns that expand 'treg_set_expr' operands tend to
12258 distinguish between 'positive' and 'negative' forms. For now this has to
12259 be done in the preparation code. We could also introduce
12260 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
12261 two different patterns for the 'postive' and 'negative' forms. However,
12262 the total amount of lines of code seems to be about the same and the
12263 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
12264 recog function would need to look inside the expression by temporarily
12265 splitting it. */
12266 static int sh_recog_treg_set_expr_reent_count = 0;
12268 bool
12269 sh_recog_treg_set_expr (rtx op, machine_mode mode)
12271 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
12273 /* Limit the recursion count to avoid nested expressions which we can't
12274 resolve to a single treg set insn. */
12275 if (recursion.count () > 1)
12276 return false;
12278 /* Early accept known possible operands before doing recog. */
12279 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)
12280 || negt_reg_operand (op, mode))
12281 return true;
12283 /* Early reject impossible operands before doing recog.
12284 There are some (set ((t) (subreg ...))) patterns, but we must be careful
12285 not to allow any invalid reg-reg or mem-reg moves, or else other passes
12286 such as lower-subreg will bail out. Some insns such as SH4A movua are
12287 done with UNSPEC, so must reject those, too, or else it would result
12288 in an invalid reg -> treg move. */
12289 if (CONST_INT_P (op) || register_operand (op, mode)
12290 || memory_operand (op, mode) || sh_unspec_insn_p (op))
12291 return false;
12293 if (!can_create_pseudo_p ())
12294 return false;
12296 /* expand_debug_locations may call this to compute rtx costs at
12297 very early stage. In that case, don't make new insns here to
12298 avoid codegen differences with -g. */
12299 if (currently_expanding_to_rtl)
12300 return false;
12302 /* We are going to invoke recog in a re-entrant way and thus
12303 have to capture its current state and restore it afterwards. */
12304 recog_data_d prev_recog_data = recog_data;
12306 /* Note we can't use insn_raw here since that increases the uid
12307 and could cause debug compare differences; this insn never leaves
12308 this function so create a dummy one. */
12309 rtx_insn* i = as_a <rtx_insn *> (rtx_alloc (INSN));
12311 INSN_UID (i) = 1;
12312 PATTERN (i) = gen_rtx_SET (get_t_reg_rtx (), op);
12313 INSN_CODE (i) = -1;
12314 REG_NOTES (i) = NULL;
12315 INSN_LOCATION (i) = curr_insn_location ();
12316 BLOCK_FOR_INSN (i) = NULL;
12317 SET_PREV_INSN (i) = NULL;
12318 SET_NEXT_INSN (i) = NULL;
12320 /* If the comparison op doesn't have a result mode, set it to SImode. */
12321 machine_mode prev_op_mode = GET_MODE (op);
12322 if (COMPARISON_P (op) && prev_op_mode == VOIDmode)
12323 PUT_MODE (op, SImode);
12325 int result = recog (PATTERN (i), i, 0);
12327 /* It seems there is no insn like that. Create a negated version and
12328 try again. If we hit a negated form, we'll allow that and append a
12329 nott sequence when splitting out the insns. Insns that do the split
12330 can then remove the trailing nott if they know how to deal with it. */
12331 if (result < 0 && COMPARISON_P (op))
12333 machine_mode cmp_mode = GET_MODE (XEXP (op, 0));
12334 if (cmp_mode == VOIDmode)
12335 cmp_mode = GET_MODE (XEXP (op, 1));
12337 rtx_code prev_code = GET_CODE (op);
12338 PUT_CODE (op, reverse_condition (GET_CODE (op)));
12339 result = recog (PATTERN (i), i, 0);
12340 PUT_CODE (op, prev_code);
12343 PUT_MODE (op, prev_op_mode);
12344 recog_data = prev_recog_data;
12345 return result >= 0;
12348 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
12349 This can be used as a condition for insn/split patterns to allow certain
12350 T bit setting patters only to be matched as sub expressions of other
12351 patterns. */
12352 bool
12353 sh_in_recog_treg_set_expr (void)
12355 return sh_recog_treg_set_expr_reent_count > 0;
12358 /* Given an rtx x, which is assumed to be some expression that has been
12359 matched by the 'treg_set_expr' predicate before, split and emit the
12360 insns that are necessary to calculate the expression and store the result
12361 in the T bit.
12362 The splitting is done recursively similar to 'try_split' in emit-rt.c.
12363 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
12364 'delete_insn' which then causes the DF parts to bail out, because we
12365 currently are inside another gen_split* function and would invoke
12366 'try_split' in a reentrant way. */
12367 static std::pair<rtx_insn*, rtx_insn*>
12368 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
12370 if (dump_file)
12372 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
12373 print_rtl_single (dump_file, i);
12374 fprintf (dump_file, "\n");
12377 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
12379 if (seq == NULL)
12380 return std::make_pair (i, i);
12382 /* Avoid infinite splitter loops if any insn of the result matches
12383 the original pattern. */
12384 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
12385 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
12386 return std::make_pair (i, i);
12388 unshare_all_rtl_in_chain (seq);
12390 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
12391 a linked list, replace the single insn with the new insns. */
12392 rtx_insn* seqlast = seq;
12393 while (NEXT_INSN (seqlast) != NULL)
12394 seqlast = NEXT_INSN (seqlast);
12396 if (rtx_insn* iprev = PREV_INSN (i))
12397 SET_NEXT_INSN (iprev) = seq;
12398 if (rtx_insn* inext = NEXT_INSN (i))
12399 SET_PREV_INSN (inext) = seqlast;
12401 SET_PREV_INSN (seq) = PREV_INSN (i);
12402 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
12404 SET_PREV_INSN (i) = NULL;
12405 SET_NEXT_INSN (i) = NULL;
12407 /* Recursively split all insns. */
12408 for (i = seq; ; i = NEXT_INSN (i))
12410 std::pair<rtx_insn*, rtx_insn*> ii =
12411 sh_try_split_insn_simple (i, curr_insn, n + 1);
12412 if (i == seq)
12413 seq = ii.first;
12414 if (i == seqlast)
12416 seqlast = ii.second;
12417 break;
12419 i = ii.first;
12422 return std::make_pair (seq, seqlast);
12425 sh_treg_insns
12426 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
12428 if (t_reg_operand (x, VOIDmode))
12429 return sh_treg_insns ();
12431 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
12433 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
12434 SET_PREV_INSN (i) = NULL;
12435 SET_NEXT_INSN (i) = NULL;
12437 if (dump_file)
12439 fprintf (dump_file, "split_treg_set_expr insn:\n");
12440 print_rtl (dump_file, i);
12441 fprintf (dump_file, "\n");
12444 /* If the insn is not found, we will try a negated form and append
12445 a nott. */
12446 bool append_nott = false;
12448 /* We are going to invoke recog/split_insns in a re-entrant way and thus
12449 have to capture its current state and restore it afterwards. */
12450 recog_data_d prev_recog_data = recog_data;
12452 if (negt_reg_operand (x, GET_MODE (x)))
12454 /* This is a normal movt followed by a nott. It will be converted
12455 into a movrt after initial expansion. */
12456 XEXP (PATTERN (i), 1) = get_t_reg_rtx ();
12457 append_nott = true;
12459 else
12461 /* If the comparison op doesn't have a mode set, set it to SImode. */
12462 if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode)
12463 PUT_MODE (x, SImode);
12465 int insn_code = recog (PATTERN (i), i, 0);
12467 if (insn_code < 0 && COMPARISON_P (x))
12469 machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
12470 if (cmp_mode == VOIDmode)
12471 cmp_mode = GET_MODE (XEXP (x, 1));
12473 PUT_CODE (x, reverse_condition (GET_CODE (x)));
12474 insn_code = recog (PATTERN (i), i, 0);
12475 append_nott = true;
12478 gcc_assert (insn_code >= 0);
12481 /* Try to recursively split the insn. Some insns might refuse to split
12482 any further while we are in the treg_set_expr splitting phase. They
12483 will be emitted as part of the outer insn and then split again. */
12484 std::pair<rtx_insn*, rtx_insn*> insnlist =
12485 sh_try_split_insn_simple (i, curr_insn);
12487 /* Restore recog state. */
12488 recog_data = prev_recog_data;
12490 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
12491 ? insnlist.second
12492 : NULL;
12493 if (dump_file)
12495 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
12496 print_rtl (dump_file, insnlist.first);
12497 fprintf (dump_file, "\n");
12499 if (nott_insn != NULL)
12500 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
12503 emit_insn (insnlist.first);
12505 if (nott_insn != NULL && append_nott)
12507 if (dump_file)
12508 fprintf (dump_file, "removing trailing nott\n");
12509 remove_insn (nott_insn);
12510 nott_insn = NULL;
12511 append_nott = false;
12514 if (append_nott)
12515 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
12517 rtx_insn* first_insn = get_insns ();
12519 if (dump_file)
12521 fprintf (dump_file, "resulting insns:\n");
12522 print_rtl (dump_file, first_insn);
12523 fprintf (dump_file, "\n");
12526 return sh_treg_insns (first_insn, nott_insn);
12529 /*------------------------------------------------------------------------------
12530 Mode switching support code.
12533 static void
12534 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
12535 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
12537 if ((TARGET_SH4A_FP || TARGET_FPU_SH4_300)
12538 && prev_mode != FP_MODE_NONE && prev_mode != mode)
12540 emit_insn (gen_toggle_pr ());
12541 if (TARGET_FMOVD)
12542 emit_insn (gen_toggle_sz ());
12544 else if (mode != FP_MODE_NONE)
12546 rtx tmp = gen_reg_rtx (SImode);
12547 emit_insn (gen_sts_fpscr (tmp));
12548 rtx i = NULL;
12550 const unsigned HOST_WIDE_INT fpbits =
12551 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
12553 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
12554 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12555 else if (mode == FP_MODE_SINGLE)
12556 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
12557 else if (mode == FP_MODE_DOUBLE)
12558 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12559 else
12560 gcc_unreachable ();
12562 emit_insn (i);
12563 emit_insn (gen_lds_fpscr (tmp));
12567 static int
12568 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn, HARD_REG_SET)
12570 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
12573 static int
12574 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn,
12575 HARD_REG_SET)
12577 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
12578 get_attr_fp_set (insn) != FP_SET_NONE)
12579 return (int) get_attr_fp_set (insn);
12580 else
12581 return mode;
12584 static int
12585 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
12587 return NORMAL_MODE (entity);
12590 static int
12591 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
12593 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
12596 static int
12597 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
12599 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
12602 /*------------------------------------------------------------------------------
12603 Misc
12606 /* Return true if we use LRA instead of reload pass. */
12607 bool
12608 sh_lra_p (void)
12610 return sh_lra_flag;
12613 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
12615 static bool
12616 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
12617 unsigned int align,
12618 enum by_pieces_operation op,
12619 bool speed_p)
12621 switch (op)
12623 case MOVE_BY_PIECES:
12624 return by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1, op)
12625 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12626 case STORE_BY_PIECES:
12627 case SET_BY_PIECES:
12628 return by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1, op)
12629 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12630 default:
12631 return default_use_by_pieces_infrastructure_p (size, align,
12632 op, speed_p);
12636 bool
12637 sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
12638 rtx x ATTRIBUTE_UNUSED)
12640 return TARGET_FDPIC;
12643 /* Emit insns to load the function address from FUNCDESC (an FDPIC
12644 function descriptor) into r1 and the GOT address into r12,
12645 returning an rtx for r1. */
12648 sh_load_function_descriptor (rtx funcdesc)
12650 rtx r1 = gen_rtx_REG (Pmode, R1_REG);
12651 rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
12652 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
12653 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
12655 emit_move_insn (r1, fnaddr);
12656 /* The ABI requires the entry point address to be loaded first, so
12657 prevent the load from being moved after that of the GOT
12658 address. */
12659 emit_insn (gen_blockage ());
12660 emit_move_insn (pic_reg, gotaddr);
12661 return r1;
12664 /* Return an rtx holding the initial value of the FDPIC register (the
12665 FDPIC pointer passed in from the caller). */
12668 sh_get_fdpic_reg_initial_val (void)
12670 return get_hard_reg_initial_val (Pmode, PIC_REG);
12673 #include "gt-sh.h"