Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gcc4 / gcc / config / sh / sh.c
blobdf9e146ba7f9919e2a3f7098ca03198cf312ebda
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 tree sh_deferred_function_attributes;
74 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
76 /* Global variables for machine-dependent things. */
78 /* Which cpu are we scheduling for. */
79 enum processor_type sh_cpu;
81 /* Definitions used in ready queue reordering for first scheduling pass. */
83 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
84 static short *regmode_weight[2];
86 /* Total SFmode and SImode weights of scheduled insns. */
87 static int curr_regmode_pressure[2];
89 /* If true, skip cycles for Q -> R movement. */
90 static int skip_cycles = 0;
92 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
93 and returned from sh_reorder2. */
94 static short cached_can_issue_more;
96 /* Saved operands from the last compare to use when we generate an scc
97 or bcc insn. */
99 rtx sh_compare_op0;
100 rtx sh_compare_op1;
102 /* Provides the class number of the smallest class containing
103 reg number. */
105 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
107 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
108 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
124 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
144 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
145 GENERAL_REGS, GENERAL_REGS,
148 char sh_register_names[FIRST_PSEUDO_REGISTER] \
149 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
151 char sh_additional_register_names[ADDREGNAMES_SIZE] \
152 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
153 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
155 /* Provide reg_class from a letter such as appears in the machine
156 description. *: target independently reserved letter.
157 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
159 enum reg_class reg_class_from_letter[] =
161 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
162 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
163 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
164 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
165 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
166 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
167 /* y */ FPUL_REGS, /* z */ R0_REGS
170 int assembler_dialect;
172 static bool shmedia_space_reserved_for_target_registers;
174 static bool sh_handle_option (size_t, const char *, int);
175 static void split_branches (rtx);
176 static int branch_dest (rtx);
177 static void force_into (rtx, rtx);
178 static void print_slot (rtx);
179 static rtx add_constant (rtx, enum machine_mode, rtx);
180 static void dump_table (rtx, rtx);
181 static int hi_const (rtx);
182 static int broken_move (rtx);
183 static int mova_p (rtx);
184 static rtx find_barrier (int, rtx, rtx);
185 static int noncall_uses_reg (rtx, rtx, rtx *);
186 static rtx gen_block_redirect (rtx, int, int);
187 static void sh_reorg (void);
188 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
189 static rtx frame_insn (rtx);
190 static rtx push (int);
191 static void pop (int);
192 static void push_regs (HARD_REG_SET *, int);
193 static int calc_live_regs (HARD_REG_SET *);
194 static void mark_use (rtx, rtx *);
195 static HOST_WIDE_INT rounded_frame_size (int);
196 static rtx mark_constant_pool_use (rtx);
197 const struct attribute_spec sh_attribute_table[];
198 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
203 static void sh_insert_attributes (tree, tree *);
204 static const char *sh_check_pch_target_flags (int);
205 static int sh_adjust_cost (rtx, rtx, rtx, int);
206 static int sh_issue_rate (void);
207 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
208 static short find_set_regmode_weight (rtx, enum machine_mode);
209 static short find_insn_regmode_weight (rtx, enum machine_mode);
210 static void find_regmode_weight (int, enum machine_mode);
211 static void sh_md_init_global (FILE *, int, int);
212 static void sh_md_finish_global (FILE *, int);
213 static int rank_for_reorder (const void *, const void *);
214 static void swap_reorder (rtx *, int);
215 static void ready_reorder (rtx *, int);
216 static short high_pressure (enum machine_mode);
217 static int sh_reorder (FILE *, int, rtx *, int *, int);
218 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
219 static void sh_md_init (FILE *, int, int);
220 static int sh_variable_issue (FILE *, int, rtx, int);
222 static bool sh_function_ok_for_sibcall (tree, tree);
224 static bool sh_cannot_modify_jumps_p (void);
225 static int sh_target_reg_class (void);
226 static bool sh_optimize_target_register_callee_saved (bool);
227 static bool sh_ms_bitfield_layout_p (tree);
229 static void sh_init_builtins (void);
230 static void sh_media_init_builtins (void);
231 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
232 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
233 static void sh_file_start (void);
234 static int flow_dependent_p (rtx, rtx);
235 static void flow_dependent_p_1 (rtx, rtx, void *);
236 static int shiftcosts (rtx);
237 static int andcosts (rtx);
238 static int addsubcosts (rtx);
239 static int multcosts (rtx);
240 static bool unspec_caller_rtx_p (rtx);
241 static bool sh_cannot_copy_insn_p (rtx);
242 static bool sh_rtx_costs (rtx, int, int, int *);
243 static int sh_address_cost (rtx);
244 #ifdef TARGET_ADJUST_UNROLL_MAX
245 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
246 #endif
247 static int sh_pr_n_sets (void);
248 static rtx sh_allocate_initial_value (rtx);
249 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
250 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
251 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
252 static int scavenge_reg (HARD_REG_SET *s);
253 struct save_schedule_s;
254 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
255 struct save_schedule_s *, int);
257 static rtx sh_struct_value_rtx (tree, int);
258 static bool sh_return_in_memory (tree, tree);
259 static rtx sh_builtin_saveregs (void);
260 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
261 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
262 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
263 static tree sh_build_builtin_va_list (void);
264 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
266 tree, bool);
267 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
268 tree, bool);
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
270 tree, bool);
271 static int sh_dwarf_calling_convention (tree);
272 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
275 /* Initialize the GCC target structure. */
276 #undef TARGET_ATTRIBUTE_TABLE
277 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
279 /* The next two are used for debug info when compiling with -gdwarf. */
280 #undef TARGET_ASM_UNALIGNED_HI_OP
281 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
282 #undef TARGET_ASM_UNALIGNED_SI_OP
283 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
285 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
286 #undef TARGET_ASM_UNALIGNED_DI_OP
287 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
288 #undef TARGET_ASM_ALIGNED_DI_OP
289 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
291 #undef TARGET_ASM_FUNCTION_EPILOGUE
292 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
294 #undef TARGET_ASM_OUTPUT_MI_THUNK
295 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
297 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
298 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
300 #undef TARGET_ASM_FILE_START
301 #define TARGET_ASM_FILE_START sh_file_start
302 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
303 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
305 #undef TARGET_DEFAULT_TARGET_FLAGS
306 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
307 #undef TARGET_HANDLE_OPTION
308 #define TARGET_HANDLE_OPTION sh_handle_option
310 #undef TARGET_INSERT_ATTRIBUTES
311 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
313 #undef TARGET_SCHED_ADJUST_COST
314 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
316 #undef TARGET_SCHED_ISSUE_RATE
317 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
319 /* The next 5 hooks have been implemented for reenabling sched1. With the
320 help of these macros we are limiting the movement of insns in sched1 to
321 reduce the register pressure. The overall idea is to keep count of SImode
322 and SFmode regs required by already scheduled insns. When these counts
323 cross some threshold values; give priority to insns that free registers.
324 The insn that frees registers is most likely to be the insn with lowest
325 LUID (original insn order); but such an insn might be there in the stalled
326 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
327 upto a max of 8 cycles so that such insns may move from Q -> R.
329 The description of the hooks are as below:
331 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
332 scheduler; it is called inside the sched_init function just after
333 find_insn_reg_weights function call. It is used to calculate the SImode
334 and SFmode weights of insns of basic blocks; much similar to what
335 find_insn_reg_weights does.
336 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
338 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
339 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
340 (Q)->(R).
342 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
343 high; reorder the ready queue so that the insn with lowest LUID will be
344 issued next.
346 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
347 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
349 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
350 can be returned from TARGET_SCHED_REORDER2.
352 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
354 #undef TARGET_SCHED_DFA_NEW_CYCLE
355 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
357 #undef TARGET_SCHED_INIT_GLOBAL
358 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
360 #undef TARGET_SCHED_FINISH_GLOBAL
361 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
363 #undef TARGET_SCHED_VARIABLE_ISSUE
364 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
366 #undef TARGET_SCHED_REORDER
367 #define TARGET_SCHED_REORDER sh_reorder
369 #undef TARGET_SCHED_REORDER2
370 #define TARGET_SCHED_REORDER2 sh_reorder2
372 #undef TARGET_SCHED_INIT
373 #define TARGET_SCHED_INIT sh_md_init
375 #undef TARGET_CANNOT_MODIFY_JUMPS_P
376 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
377 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
378 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
379 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
380 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
381 sh_optimize_target_register_callee_saved
383 #undef TARGET_MS_BITFIELD_LAYOUT_P
384 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
386 #undef TARGET_INIT_BUILTINS
387 #define TARGET_INIT_BUILTINS sh_init_builtins
388 #undef TARGET_EXPAND_BUILTIN
389 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
391 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
392 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
394 #undef TARGET_CANNOT_COPY_INSN_P
395 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
396 #undef TARGET_RTX_COSTS
397 #define TARGET_RTX_COSTS sh_rtx_costs
398 #undef TARGET_ADDRESS_COST
399 #define TARGET_ADDRESS_COST sh_address_cost
400 #undef TARGET_ALLOCATE_INITIAL_VALUE
401 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
403 #undef TARGET_MACHINE_DEPENDENT_REORG
404 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
406 #ifdef HAVE_AS_TLS
407 #undef TARGET_HAVE_TLS
408 #define TARGET_HAVE_TLS true
409 #endif
411 #undef TARGET_PROMOTE_PROTOTYPES
412 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
413 #undef TARGET_PROMOTE_FUNCTION_ARGS
414 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
415 #undef TARGET_PROMOTE_FUNCTION_RETURN
416 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
418 #undef TARGET_STRUCT_VALUE_RTX
419 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
420 #undef TARGET_RETURN_IN_MEMORY
421 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
423 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
424 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
425 #undef TARGET_SETUP_INCOMING_VARARGS
426 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
427 #undef TARGET_STRICT_ARGUMENT_NAMING
428 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
429 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
430 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
431 #undef TARGET_MUST_PASS_IN_STACK
432 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
433 #undef TARGET_PASS_BY_REFERENCE
434 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
435 #undef TARGET_CALLEE_COPIES
436 #define TARGET_CALLEE_COPIES sh_callee_copies
437 #undef TARGET_ARG_PARTIAL_BYTES
438 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
440 #undef TARGET_BUILD_BUILTIN_VA_LIST
441 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
442 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
443 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
445 #undef TARGET_VECTOR_MODE_SUPPORTED_P
446 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
448 #undef TARGET_CHECK_PCH_TARGET_FLAGS
449 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
451 #undef TARGET_DWARF_CALLING_CONVENTION
452 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
454 /* Return regmode weight for insn. */
455 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
457 /* Return current register pressure for regmode. */
458 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
460 #ifdef SYMBIAN
462 #undef TARGET_ENCODE_SECTION_INFO
463 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
464 #undef TARGET_STRIP_NAME_ENCODING
465 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
466 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
467 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
469 #endif /* SYMBIAN */
471 #ifdef TARGET_ADJUST_UNROLL_MAX
472 #undef TARGET_ADJUST_UNROLL_MAX
473 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
474 #endif
476 struct gcc_target targetm = TARGET_INITIALIZER;
478 /* Implement TARGET_HANDLE_OPTION. */
480 static bool
481 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
482 int value ATTRIBUTE_UNUSED)
484 switch (code)
486 case OPT_m1:
487 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
488 return true;
490 case OPT_m2:
491 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
492 return true;
494 case OPT_m2a:
495 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
496 return true;
498 case OPT_m2a_nofpu:
499 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
500 return true;
502 case OPT_m2a_single:
503 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
504 return true;
506 case OPT_m2a_single_only:
507 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
508 return true;
510 case OPT_m2e:
511 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
512 return true;
514 case OPT_m3:
515 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
516 return true;
518 case OPT_m3e:
519 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
520 return true;
522 case OPT_m4:
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
524 return true;
526 case OPT_m4_nofpu:
527 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
528 return true;
530 case OPT_m4_single:
531 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
532 return true;
534 case OPT_m4_single_only:
535 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
536 return true;
538 case OPT_m4a:
539 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
540 return true;
542 case OPT_m4a_nofpu:
543 case OPT_m4al:
544 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
545 return true;
547 case OPT_m4a_single:
548 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
549 return true;
551 case OPT_m4a_single_only:
552 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
553 return true;
555 case OPT_m5_32media:
556 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
557 return true;
559 case OPT_m5_32media_nofpu:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
561 return true;
563 case OPT_m5_64media:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
565 return true;
567 case OPT_m5_64media_nofpu:
568 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
569 return true;
571 case OPT_m5_compact:
572 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
573 return true;
575 case OPT_m5_compact_nofpu:
576 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
577 return true;
579 default:
580 return true;
584 /* Print the operand address in x to the stream. */
586 void
587 print_operand_address (FILE *stream, rtx x)
589 switch (GET_CODE (x))
591 case REG:
592 case SUBREG:
593 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
594 break;
596 case PLUS:
598 rtx base = XEXP (x, 0);
599 rtx index = XEXP (x, 1);
601 switch (GET_CODE (index))
603 case CONST_INT:
604 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
605 reg_names[true_regnum (base)]);
606 break;
608 case REG:
609 case SUBREG:
611 int base_num = true_regnum (base);
612 int index_num = true_regnum (index);
614 fprintf (stream, "@(r0,%s)",
615 reg_names[MAX (base_num, index_num)]);
616 break;
619 default:
620 gcc_unreachable ();
623 break;
625 case PRE_DEC:
626 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
627 break;
629 case POST_INC:
630 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
631 break;
633 default:
634 x = mark_constant_pool_use (x);
635 output_addr_const (stream, x);
636 break;
640 /* Print operand x (an rtx) in assembler syntax to file stream
641 according to modifier code.
643 '.' print a .s if insn needs delay slot
644 ',' print LOCAL_LABEL_PREFIX
645 '@' print trap, rte or rts depending upon pragma interruptness
646 '#' output a nop if there is nothing to put in the delay slot
647 ''' print likelihood suffix (/u for unlikely).
648 '>' print branch target if -fverbose-asm
649 'O' print a constant without the #
650 'R' print the LSW of a dp value - changes if in little endian
651 'S' print the MSW of a dp value - changes if in little endian
652 'T' print the next word of a dp value - same as 'R' in big endian mode.
653 'M' print an `x' if `m' will print `base,index'.
654 'N' print 'r63' if the operand is (const_int 0).
655 'd' print a V2SF reg as dN instead of fpN.
656 'm' print a pair `base,offset' or `base,index', for LD and ST.
657 'U' Likewise for {LD,ST}{HI,LO}.
658 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
659 'o' output an operator. */
661 void
662 print_operand (FILE *stream, rtx x, int code)
664 int regno;
665 enum machine_mode mode;
667 switch (code)
669 tree trapa_attr;
671 case '.':
672 if (final_sequence
673 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
674 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
675 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
676 break;
677 case ',':
678 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
679 break;
680 case '@':
681 trapa_attr = lookup_attribute ("trap_exit",
682 DECL_ATTRIBUTES (current_function_decl));
683 if (trapa_attr)
684 fprintf (stream, "trapa #%ld",
685 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
686 else if (sh_cfun_interrupt_handler_p ())
687 fprintf (stream, "rte");
688 else
689 fprintf (stream, "rts");
690 break;
691 case '#':
692 /* Output a nop if there's nothing in the delay slot. */
693 if (dbr_sequence_length () == 0)
694 fprintf (stream, "\n\tnop");
695 break;
696 case '\'':
698 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
700 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
701 fputs ("/u", stream);
702 break;
704 case '>':
705 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
707 fputs ("\t! target: ", stream);
708 output_addr_const (stream, JUMP_LABEL (current_output_insn));
710 break;
711 case 'O':
712 x = mark_constant_pool_use (x);
713 output_addr_const (stream, x);
714 break;
715 /* N.B.: %R / %S / %T adjust memory addresses by four.
716 For SHMEDIA, that means they can be used to access the first and
717 second 32 bit part of a 64 bit (or larger) value that
718 might be held in floating point registers or memory.
719 While they can be used to access 64 bit parts of a larger value
720 held in general purpose registers, that won't work with memory -
721 neither for fp registers, since the frxx names are used. */
722 case 'R':
723 if (REG_P (x) || GET_CODE (x) == SUBREG)
725 regno = true_regnum (x);
726 regno += FP_REGISTER_P (regno) ? 1 : LSW;
727 fputs (reg_names[regno], (stream));
729 else if (MEM_P (x))
731 x = adjust_address (x, SImode, 4 * LSW);
732 print_operand_address (stream, XEXP (x, 0));
734 else
736 rtx sub = NULL_RTX;
738 mode = GET_MODE (x);
739 if (mode == VOIDmode)
740 mode = DImode;
741 if (GET_MODE_SIZE (mode) >= 8)
742 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
743 if (sub)
744 print_operand (stream, sub, 0);
745 else
746 output_operand_lossage ("invalid operand to %%R");
748 break;
749 case 'S':
750 if (REG_P (x) || GET_CODE (x) == SUBREG)
752 regno = true_regnum (x);
753 regno += FP_REGISTER_P (regno) ? 0 : MSW;
754 fputs (reg_names[regno], (stream));
756 else if (MEM_P (x))
758 x = adjust_address (x, SImode, 4 * MSW);
759 print_operand_address (stream, XEXP (x, 0));
761 else
763 rtx sub = NULL_RTX;
765 mode = GET_MODE (x);
766 if (mode == VOIDmode)
767 mode = DImode;
768 if (GET_MODE_SIZE (mode) >= 8)
769 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
770 if (sub)
771 print_operand (stream, sub, 0);
772 else
773 output_operand_lossage ("invalid operand to %%S");
775 break;
776 case 'T':
777 /* Next word of a double. */
778 switch (GET_CODE (x))
780 case REG:
781 fputs (reg_names[REGNO (x) + 1], (stream));
782 break;
783 case MEM:
784 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
785 && GET_CODE (XEXP (x, 0)) != POST_INC)
786 x = adjust_address (x, SImode, 4);
787 print_operand_address (stream, XEXP (x, 0));
788 break;
789 default:
790 break;
792 break;
793 case 'o':
794 switch (GET_CODE (x))
796 case PLUS: fputs ("add", stream); break;
797 case MINUS: fputs ("sub", stream); break;
798 case MULT: fputs ("mul", stream); break;
799 case DIV: fputs ("div", stream); break;
800 case EQ: fputs ("eq", stream); break;
801 case NE: fputs ("ne", stream); break;
802 case GT: case LT: fputs ("gt", stream); break;
803 case GE: case LE: fputs ("ge", stream); break;
804 case GTU: case LTU: fputs ("gtu", stream); break;
805 case GEU: case LEU: fputs ("geu", stream); break;
806 default:
807 break;
809 break;
810 case 'M':
811 if (GET_CODE (x) == MEM
812 && GET_CODE (XEXP (x, 0)) == PLUS
813 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
814 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
815 fputc ('x', stream);
816 break;
818 case 'm':
819 gcc_assert (GET_CODE (x) == MEM);
820 x = XEXP (x, 0);
821 /* Fall through. */
822 case 'U':
823 switch (GET_CODE (x))
825 case REG:
826 case SUBREG:
827 print_operand (stream, x, 0);
828 fputs (", 0", stream);
829 break;
831 case PLUS:
832 print_operand (stream, XEXP (x, 0), 0);
833 fputs (", ", stream);
834 print_operand (stream, XEXP (x, 1), 0);
835 break;
837 default:
838 gcc_unreachable ();
840 break;
842 case 'd':
843 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
845 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
846 break;
848 case 'N':
849 if (x == CONST0_RTX (GET_MODE (x)))
851 fprintf ((stream), "r63");
852 break;
854 goto default_output;
855 case 'u':
856 if (GET_CODE (x) == CONST_INT)
858 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
859 break;
861 /* Fall through. */
863 default_output:
864 default:
865 regno = 0;
866 mode = GET_MODE (x);
868 switch (GET_CODE (x))
870 case TRUNCATE:
872 rtx inner = XEXP (x, 0);
873 int offset = 0;
874 enum machine_mode inner_mode;
876 /* We might see SUBREGs with vector mode registers inside. */
877 if (GET_CODE (inner) == SUBREG
878 && (GET_MODE_SIZE (GET_MODE (inner))
879 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
880 && subreg_lowpart_p (inner))
881 inner = SUBREG_REG (inner);
882 if (GET_CODE (inner) == CONST_INT)
884 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
885 goto default_output;
887 inner_mode = GET_MODE (inner);
888 if (GET_CODE (inner) == SUBREG
889 && (GET_MODE_SIZE (GET_MODE (inner))
890 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
891 && GET_CODE (SUBREG_REG (inner)) == REG)
893 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
894 GET_MODE (SUBREG_REG (inner)),
895 SUBREG_BYTE (inner),
896 GET_MODE (inner));
897 inner = SUBREG_REG (inner);
899 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
900 abort ();
901 /* Floating point register pairs are always big endian;
902 general purpose registers are 64 bit wide. */
903 regno = REGNO (inner);
904 regno = (HARD_REGNO_NREGS (regno, inner_mode)
905 - HARD_REGNO_NREGS (regno, mode))
906 + offset;
907 x = inner;
908 goto reg;
910 case SIGN_EXTEND:
911 x = XEXP (x, 0);
912 goto reg;
913 /* FIXME: We need this on SHmedia32 because reload generates
914 some sign-extended HI or QI loads into DImode registers
915 but, because Pmode is SImode, the address ends up with a
916 subreg:SI of the DImode register. Maybe reload should be
917 fixed so as to apply alter_subreg to such loads? */
918 case IF_THEN_ELSE:
919 gcc_assert (trapping_target_operand (x, VOIDmode));
920 x = XEXP (XEXP (x, 2), 0);
921 goto default_output;
922 case SUBREG:
923 gcc_assert (SUBREG_BYTE (x) == 0
924 && GET_CODE (SUBREG_REG (x)) == REG);
926 x = SUBREG_REG (x);
927 /* Fall through. */
929 reg:
930 case REG:
931 regno += REGNO (x);
932 if (FP_REGISTER_P (regno)
933 && mode == V16SFmode)
934 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
935 else if (FP_REGISTER_P (REGNO (x))
936 && mode == V4SFmode)
937 fprintf ((stream), "fv%s", reg_names[regno] + 2);
938 else if (GET_CODE (x) == REG
939 && mode == V2SFmode)
940 fprintf ((stream), "fp%s", reg_names[regno] + 2);
941 else if (FP_REGISTER_P (REGNO (x))
942 && GET_MODE_SIZE (mode) > 4)
943 fprintf ((stream), "d%s", reg_names[regno] + 1);
944 else
945 fputs (reg_names[regno], (stream));
946 break;
948 case MEM:
949 output_address (XEXP (x, 0));
950 break;
952 case CONST:
953 if (TARGET_SHMEDIA
954 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
955 && (GET_MODE (XEXP (x, 0)) == DImode
956 || GET_MODE (XEXP (x, 0)) == SImode)
957 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
958 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
960 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
962 fputc ('(', stream);
963 if (GET_CODE (val) == ASHIFTRT)
965 fputc ('(', stream);
966 if (GET_CODE (XEXP (val, 0)) == CONST)
967 fputc ('(', stream);
968 output_addr_const (stream, XEXP (val, 0));
969 if (GET_CODE (XEXP (val, 0)) == CONST)
970 fputc (')', stream);
971 fputs (" >> ", stream);
972 output_addr_const (stream, XEXP (val, 1));
973 fputc (')', stream);
975 else
977 if (GET_CODE (val) == CONST)
978 fputc ('(', stream);
979 output_addr_const (stream, val);
980 if (GET_CODE (val) == CONST)
981 fputc (')', stream);
983 fputs (" & 65535)", stream);
984 break;
987 /* Fall through. */
988 default:
989 if (TARGET_SH1)
990 fputc ('#', stream);
991 output_addr_const (stream, x);
992 break;
994 break;
998 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
999 static void
1000 force_into (rtx value, rtx target)
1002 value = force_operand (value, target);
1003 if (! rtx_equal_p (value, target))
1004 emit_insn (gen_move_insn (target, value));
1007 /* Emit code to perform a block move. Choose the best method.
1009 OPERANDS[0] is the destination.
1010 OPERANDS[1] is the source.
1011 OPERANDS[2] is the size.
1012 OPERANDS[3] is the alignment safe to use. */
1015 expand_block_move (rtx *operands)
1017 int align = INTVAL (operands[3]);
1018 int constp = (GET_CODE (operands[2]) == CONST_INT);
1019 int bytes = (constp ? INTVAL (operands[2]) : 0);
1021 if (! constp)
1022 return 0;
1024 /* If we could use mov.l to move words and dest is word-aligned, we
1025 can use movua.l for loads and still generate a relatively short
1026 and efficient sequence. */
1027 if (TARGET_SH4A_ARCH && align < 4
1028 && MEM_ALIGN (operands[0]) >= 32
1029 && can_move_by_pieces (bytes, 32))
1031 rtx dest = copy_rtx (operands[0]);
1032 rtx src = copy_rtx (operands[1]);
1033 /* We could use different pseudos for each copied word, but
1034 since movua can only load into r0, it's kind of
1035 pointless. */
1036 rtx temp = gen_reg_rtx (SImode);
1037 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1038 int copied = 0;
1040 while (copied + 4 <= bytes)
1042 rtx to = adjust_address (dest, SImode, copied);
1043 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1045 emit_insn (gen_movua (temp, from));
1046 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1047 emit_move_insn (to, temp);
1048 copied += 4;
1051 if (copied < bytes)
1052 move_by_pieces (adjust_address (dest, BLKmode, copied),
1053 adjust_automodify_address (src, BLKmode,
1054 src_addr, copied),
1055 bytes - copied, align, 0);
1057 return 1;
1060 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1061 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1062 if (align < 4 || (bytes % 4 != 0))
1063 return 0;
1065 if (TARGET_HARD_SH4)
1067 if (bytes < 12)
1068 return 0;
1069 else if (bytes == 12)
1071 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1072 rtx r4 = gen_rtx_REG (SImode, 4);
1073 rtx r5 = gen_rtx_REG (SImode, 5);
1075 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1076 force_into (XEXP (operands[0], 0), r4);
1077 force_into (XEXP (operands[1], 0), r5);
1078 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1079 return 1;
1081 else if (! TARGET_SMALLCODE)
1083 const char *entry_name;
1084 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1085 int dwords;
1086 rtx r4 = gen_rtx_REG (SImode, 4);
1087 rtx r5 = gen_rtx_REG (SImode, 5);
1088 rtx r6 = gen_rtx_REG (SImode, 6);
1090 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1091 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1092 force_into (XEXP (operands[0], 0), r4);
1093 force_into (XEXP (operands[1], 0), r5);
1095 dwords = bytes >> 3;
1096 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1097 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1098 return 1;
1100 else
1101 return 0;
1103 if (bytes < 64)
1105 char entry[30];
1106 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1107 rtx r4 = gen_rtx_REG (SImode, 4);
1108 rtx r5 = gen_rtx_REG (SImode, 5);
1110 sprintf (entry, "__movmemSI%d", bytes);
1111 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1112 force_into (XEXP (operands[0], 0), r4);
1113 force_into (XEXP (operands[1], 0), r5);
1114 emit_insn (gen_block_move_real (func_addr_rtx));
1115 return 1;
1118 /* This is the same number of bytes as a memcpy call, but to a different
1119 less common function name, so this will occasionally use more space. */
1120 if (! TARGET_SMALLCODE)
1122 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1123 int final_switch, while_loop;
1124 rtx r4 = gen_rtx_REG (SImode, 4);
1125 rtx r5 = gen_rtx_REG (SImode, 5);
1126 rtx r6 = gen_rtx_REG (SImode, 6);
1128 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1129 force_into (XEXP (operands[0], 0), r4);
1130 force_into (XEXP (operands[1], 0), r5);
1132 /* r6 controls the size of the move. 16 is decremented from it
1133 for each 64 bytes moved. Then the negative bit left over is used
1134 as an index into a list of move instructions. e.g., a 72 byte move
1135 would be set up with size(r6) = 14, for one iteration through the
1136 big while loop, and a switch of -2 for the last part. */
1138 final_switch = 16 - ((bytes / 4) % 16);
1139 while_loop = ((bytes / 4) / 16 - 1) * 16;
1140 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1141 emit_insn (gen_block_lump_real (func_addr_rtx));
1142 return 1;
1145 return 0;
1148 /* Prepare operands for a move define_expand; specifically, one of the
1149 operands must be in a register. */
1152 prepare_move_operands (rtx operands[], enum machine_mode mode)
1154 if ((mode == SImode || mode == DImode)
1155 && flag_pic
1156 && ! ((mode == Pmode || mode == ptr_mode)
1157 && tls_symbolic_operand (operands[1], Pmode) != 0))
1159 rtx temp;
1160 if (SYMBOLIC_CONST_P (operands[1]))
1162 if (GET_CODE (operands[0]) == MEM)
1163 operands[1] = force_reg (Pmode, operands[1]);
1164 else if (TARGET_SHMEDIA
1165 && GET_CODE (operands[1]) == LABEL_REF
1166 && target_reg_operand (operands[0], mode))
1167 /* It's ok. */;
1168 else
1170 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1171 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1174 else if (GET_CODE (operands[1]) == CONST
1175 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1176 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1178 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1179 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1180 mode, temp);
1181 operands[1] = expand_binop (mode, add_optab, temp,
1182 XEXP (XEXP (operands[1], 0), 1),
1183 no_new_pseudos ? temp
1184 : gen_reg_rtx (Pmode),
1185 0, OPTAB_LIB_WIDEN);
1189 if (! reload_in_progress && ! reload_completed)
1191 /* Copy the source to a register if both operands aren't registers. */
1192 if (! register_operand (operands[0], mode)
1193 && ! sh_register_operand (operands[1], mode))
1194 operands[1] = copy_to_mode_reg (mode, operands[1]);
1196 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1198 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1199 except that we can't use that function because it is static. */
1200 rtx new = change_address (operands[0], mode, 0);
1201 MEM_COPY_ATTRIBUTES (new, operands[0]);
1202 operands[0] = new;
1205 /* This case can happen while generating code to move the result
1206 of a library call to the target. Reject `st r0,@(rX,rY)' because
1207 reload will fail to find a spill register for rX, since r0 is already
1208 being used for the source. */
1209 else if (TARGET_SH1
1210 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1211 && GET_CODE (operands[0]) == MEM
1212 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1213 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1214 operands[1] = copy_to_mode_reg (mode, operands[1]);
1217 if (mode == Pmode || mode == ptr_mode)
1219 rtx op0, op1, opc;
1220 enum tls_model tls_kind;
1222 op0 = operands[0];
1223 op1 = operands[1];
1224 if (GET_CODE (op1) == CONST
1225 && GET_CODE (XEXP (op1, 0)) == PLUS
1226 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1228 opc = XEXP (XEXP (op1, 0), 1);
1229 op1 = XEXP (XEXP (op1, 0), 0);
1231 else
1232 opc = NULL_RTX;
1234 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1236 rtx tga_op1, tga_ret, tmp, tmp2;
1238 switch (tls_kind)
1240 case TLS_MODEL_GLOBAL_DYNAMIC:
1241 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1242 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1243 op1 = tga_ret;
1244 break;
1246 case TLS_MODEL_LOCAL_DYNAMIC:
1247 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1248 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1250 tmp = gen_reg_rtx (Pmode);
1251 emit_move_insn (tmp, tga_ret);
1253 if (register_operand (op0, Pmode))
1254 tmp2 = op0;
1255 else
1256 tmp2 = gen_reg_rtx (Pmode);
1258 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1259 op1 = tmp2;
1260 break;
1262 case TLS_MODEL_INITIAL_EXEC:
1263 if (! flag_pic)
1265 /* Don't schedule insns for getting GOT address when
1266 the first scheduling is enabled, to avoid spill
1267 failures for R0. */
1268 if (flag_schedule_insns)
1269 emit_insn (gen_blockage ());
1270 emit_insn (gen_GOTaddr2picreg ());
1271 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1272 PIC_REG)));
1273 if (flag_schedule_insns)
1274 emit_insn (gen_blockage ());
1276 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1277 tmp = gen_sym2GOTTPOFF (op1);
1278 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1279 op1 = tga_op1;
1280 break;
1282 case TLS_MODEL_LOCAL_EXEC:
1283 tmp2 = gen_reg_rtx (Pmode);
1284 emit_insn (gen_load_gbr (tmp2));
1285 tmp = gen_reg_rtx (Pmode);
1286 emit_insn (gen_symTPOFF2reg (tmp, op1));
1288 if (register_operand (op0, Pmode))
1289 op1 = op0;
1290 else
1291 op1 = gen_reg_rtx (Pmode);
1293 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1294 break;
1296 default:
1297 gcc_unreachable ();
1299 if (opc)
1300 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1301 operands[1] = op1;
1305 return 0;
1308 /* Prepare the operands for an scc instruction; make sure that the
1309 compare has been done. */
1311 prepare_scc_operands (enum rtx_code code)
1313 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1314 enum rtx_code oldcode = code;
1315 enum machine_mode mode;
1317 /* First need a compare insn. */
1318 switch (code)
1320 case NE:
1321 /* It isn't possible to handle this case. */
1322 gcc_unreachable ();
1323 case LT:
1324 code = GT;
1325 break;
1326 case LE:
1327 code = GE;
1328 break;
1329 case LTU:
1330 code = GTU;
1331 break;
1332 case LEU:
1333 code = GEU;
1334 break;
1335 default:
1336 break;
1338 if (code != oldcode)
1340 rtx tmp = sh_compare_op0;
1341 sh_compare_op0 = sh_compare_op1;
1342 sh_compare_op1 = tmp;
1345 mode = GET_MODE (sh_compare_op0);
1346 if (mode == VOIDmode)
1347 mode = GET_MODE (sh_compare_op1);
1349 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1350 if ((code != EQ && code != NE
1351 && (sh_compare_op1 != const0_rtx
1352 || code == GTU || code == GEU || code == LTU || code == LEU))
1353 || (mode == DImode && sh_compare_op1 != const0_rtx)
1354 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1355 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1357 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1358 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1359 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1360 gen_rtx_SET (VOIDmode, t_reg,
1361 gen_rtx_fmt_ee (code, SImode,
1362 sh_compare_op0, sh_compare_op1)),
1363 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1364 else
1365 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1366 gen_rtx_fmt_ee (code, SImode,
1367 sh_compare_op0, sh_compare_op1)));
1369 return t_reg;
1372 /* Called from the md file, set up the operands of a compare instruction. */
1374 void
1375 from_compare (rtx *operands, int code)
1377 enum machine_mode mode = GET_MODE (sh_compare_op0);
1378 rtx insn;
1379 if (mode == VOIDmode)
1380 mode = GET_MODE (sh_compare_op1);
1381 if (code != EQ
1382 || mode == DImode
1383 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1385 /* Force args into regs, since we can't use constants here. */
1386 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1387 if (sh_compare_op1 != const0_rtx
1388 || code == GTU || code == GEU
1389 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1390 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1392 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1394 from_compare (operands, GT);
1395 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1397 else
1398 insn = gen_rtx_SET (VOIDmode,
1399 gen_rtx_REG (SImode, T_REG),
1400 gen_rtx_fmt_ee (code, SImode,
1401 sh_compare_op0, sh_compare_op1));
1402 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1404 insn = gen_rtx_PARALLEL (VOIDmode,
1405 gen_rtvec (2, insn,
1406 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1407 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1409 else
1410 emit_insn (insn);
1413 /* Functions to output assembly code. */
1415 /* Return a sequence of instructions to perform DI or DF move.
1417 Since the SH cannot move a DI or DF in one instruction, we have
1418 to take care when we see overlapping source and dest registers. */
1420 const char *
1421 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1422 enum machine_mode mode)
1424 rtx dst = operands[0];
1425 rtx src = operands[1];
1427 if (GET_CODE (dst) == MEM
1428 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1429 return "mov.l %T1,%0\n\tmov.l %1,%0";
1431 if (register_operand (dst, mode)
1432 && register_operand (src, mode))
1434 if (REGNO (src) == MACH_REG)
1435 return "sts mach,%S0\n\tsts macl,%R0";
1437 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1438 when mov.d r1,r0 do r1->r0 then r2->r1. */
1440 if (REGNO (src) + 1 == REGNO (dst))
1441 return "mov %T1,%T0\n\tmov %1,%0";
1442 else
1443 return "mov %1,%0\n\tmov %T1,%T0";
1445 else if (GET_CODE (src) == CONST_INT)
1447 if (INTVAL (src) < 0)
1448 output_asm_insn ("mov #-1,%S0", operands);
1449 else
1450 output_asm_insn ("mov #0,%S0", operands);
1452 return "mov %1,%R0";
1454 else if (GET_CODE (src) == MEM)
1456 int ptrreg = -1;
1457 int dreg = REGNO (dst);
1458 rtx inside = XEXP (src, 0);
1460 switch (GET_CODE (inside))
1462 case REG:
1463 ptrreg = REGNO (inside);
1464 break;
1466 case SUBREG:
1467 ptrreg = subreg_regno (inside);
1468 break;
1470 case PLUS:
1471 ptrreg = REGNO (XEXP (inside, 0));
1472 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1473 an offsettable address. Unfortunately, offsettable addresses use
1474 QImode to check the offset, and a QImode offsettable address
1475 requires r0 for the other operand, which is not currently
1476 supported, so we can't use the 'o' constraint.
1477 Thus we must check for and handle r0+REG addresses here.
1478 We punt for now, since this is likely very rare. */
1479 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1480 break;
1482 case LABEL_REF:
1483 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1484 case POST_INC:
1485 return "mov.l %1,%0\n\tmov.l %1,%T0";
1486 default:
1487 gcc_unreachable ();
1490 /* Work out the safe way to copy. Copy into the second half first. */
1491 if (dreg == ptrreg)
1492 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1495 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1498 /* Print an instruction which would have gone into a delay slot after
1499 another instruction, but couldn't because the other instruction expanded
1500 into a sequence where putting the slot insn at the end wouldn't work. */
1502 static void
1503 print_slot (rtx insn)
1505 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1507 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1510 const char *
1511 output_far_jump (rtx insn, rtx op)
1513 struct { rtx lab, reg, op; } this;
1514 rtx braf_base_lab = NULL_RTX;
1515 const char *jump;
1516 int far;
1517 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1518 rtx prev;
1520 this.lab = gen_label_rtx ();
1522 if (TARGET_SH2
1523 && offset >= -32764
1524 && offset - get_attr_length (insn) <= 32766)
1526 far = 0;
1527 jump = "mov.w %O0,%1; braf %1";
1529 else
1531 far = 1;
1532 if (flag_pic)
1534 if (TARGET_SH2)
1535 jump = "mov.l %O0,%1; braf %1";
1536 else
1537 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1539 else
1540 jump = "mov.l %O0,%1; jmp @%1";
1542 /* If we have a scratch register available, use it. */
1543 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1544 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1546 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1547 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1548 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1549 output_asm_insn (jump, &this.lab);
1550 if (dbr_sequence_length ())
1551 print_slot (final_sequence);
1552 else
1553 output_asm_insn ("nop", 0);
1555 else
1557 /* Output the delay slot insn first if any. */
1558 if (dbr_sequence_length ())
1559 print_slot (final_sequence);
1561 this.reg = gen_rtx_REG (SImode, 13);
1562 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1563 Fortunately, MACL is fixed and call-clobbered, and we never
1564 need its value across jumps, so save r13 in it instead of in
1565 the stack. */
1566 if (TARGET_SH5)
1567 output_asm_insn ("lds r13, macl", 0);
1568 else
1569 output_asm_insn ("mov.l r13,@-r15", 0);
1570 output_asm_insn (jump, &this.lab);
1571 if (TARGET_SH5)
1572 output_asm_insn ("sts macl, r13", 0);
1573 else
1574 output_asm_insn ("mov.l @r15+,r13", 0);
1576 if (far && flag_pic && TARGET_SH2)
1578 braf_base_lab = gen_label_rtx ();
1579 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1580 CODE_LABEL_NUMBER (braf_base_lab));
1582 if (far)
1583 output_asm_insn (".align 2", 0);
1584 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1585 this.op = op;
1586 if (far && flag_pic)
1588 if (TARGET_SH2)
1589 this.lab = braf_base_lab;
1590 output_asm_insn (".long %O2-%O0", &this.lab);
1592 else
1593 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1594 return "";
1597 /* Local label counter, used for constants in the pool and inside
1598 pattern branches. */
1600 static int lf = 100;
1602 /* Output code for ordinary branches. */
1604 const char *
1605 output_branch (int logic, rtx insn, rtx *operands)
1607 switch (get_attr_length (insn))
1609 case 6:
1610 /* This can happen if filling the delay slot has caused a forward
1611 branch to exceed its range (we could reverse it, but only
1612 when we know we won't overextend other branches; this should
1613 best be handled by relaxation).
1614 It can also happen when other condbranches hoist delay slot insn
1615 from their destination, thus leading to code size increase.
1616 But the branch will still be in the range -4092..+4098 bytes. */
1618 if (! TARGET_RELAX)
1620 int label = lf++;
1621 /* The call to print_slot will clobber the operands. */
1622 rtx op0 = operands[0];
1624 /* If the instruction in the delay slot is annulled (true), then
1625 there is no delay slot where we can put it now. The only safe
1626 place for it is after the label. final will do that by default. */
1628 if (final_sequence
1629 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1630 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1632 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1633 ASSEMBLER_DIALECT ? "/" : ".", label);
1634 print_slot (final_sequence);
1636 else
1637 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1639 output_asm_insn ("bra\t%l0", &op0);
1640 fprintf (asm_out_file, "\tnop\n");
1641 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1643 return "";
1645 /* When relaxing, handle this like a short branch. The linker
1646 will fix it up if it still doesn't fit after relaxation. */
1647 case 2:
1648 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1650 /* These are for SH2e, in which we have to account for the
1651 extra nop because of the hardware bug in annulled branches. */
1652 case 8:
1653 if (! TARGET_RELAX)
1655 int label = lf++;
1657 gcc_assert (!final_sequence
1658 || !(INSN_ANNULLED_BRANCH_P
1659 (XVECEXP (final_sequence, 0, 0))));
1660 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1661 logic ? "f" : "t",
1662 ASSEMBLER_DIALECT ? "/" : ".", label);
1663 fprintf (asm_out_file, "\tnop\n");
1664 output_asm_insn ("bra\t%l0", operands);
1665 fprintf (asm_out_file, "\tnop\n");
1666 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1668 return "";
1670 /* When relaxing, fall through. */
1671 case 4:
1673 char buffer[10];
1675 sprintf (buffer, "b%s%ss\t%%l0",
1676 logic ? "t" : "f",
1677 ASSEMBLER_DIALECT ? "/" : ".");
1678 output_asm_insn (buffer, &operands[0]);
1679 return "nop";
1682 default:
1683 /* There should be no longer branches now - that would
1684 indicate that something has destroyed the branches set
1685 up in machine_dependent_reorg. */
1686 gcc_unreachable ();
1690 const char *
1691 output_branchy_insn (enum rtx_code code, const char *template,
1692 rtx insn, rtx *operands)
1694 rtx next_insn = NEXT_INSN (insn);
1696 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1698 rtx src = SET_SRC (PATTERN (next_insn));
1699 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1701 /* Following branch not taken */
1702 operands[9] = gen_label_rtx ();
1703 emit_label_after (operands[9], next_insn);
1704 INSN_ADDRESSES_NEW (operands[9],
1705 INSN_ADDRESSES (INSN_UID (next_insn))
1706 + get_attr_length (next_insn));
1707 return template;
1709 else
1711 int offset = (branch_dest (next_insn)
1712 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1713 if (offset >= -252 && offset <= 258)
1715 if (GET_CODE (src) == IF_THEN_ELSE)
1716 /* branch_true */
1717 src = XEXP (src, 1);
1718 operands[9] = src;
1719 return template;
1723 operands[9] = gen_label_rtx ();
1724 emit_label_after (operands[9], insn);
1725 INSN_ADDRESSES_NEW (operands[9],
1726 INSN_ADDRESSES (INSN_UID (insn))
1727 + get_attr_length (insn));
1728 return template;
1731 const char *
1732 output_ieee_ccmpeq (rtx insn, rtx *operands)
1734 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1735 insn, operands);
1738 /* Output the start of the assembler file. */
1740 static void
1741 sh_file_start (void)
1743 default_file_start ();
1745 #ifdef SYMBIAN
1746 /* Declare the .directive section before it is used. */
1747 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1748 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1749 #endif
1751 if (TARGET_ELF)
1752 /* We need to show the text section with the proper
1753 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1754 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1755 will complain. We can teach GAS specifically about the
1756 default attributes for our choice of text section, but
1757 then we would have to change GAS again if/when we change
1758 the text section name. */
1759 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1760 else
1761 /* Switch to the data section so that the coffsem symbol
1762 isn't in the text section. */
1763 data_section ();
1765 if (TARGET_LITTLE_ENDIAN)
1766 fputs ("\t.little\n", asm_out_file);
1768 if (!TARGET_ELF)
1770 if (TARGET_SHCOMPACT)
1771 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1772 else if (TARGET_SHMEDIA)
1773 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1774 TARGET_SHMEDIA64 ? 64 : 32);
1778 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1780 static bool
1781 unspec_caller_rtx_p (rtx pat)
1783 switch (GET_CODE (pat))
1785 case CONST:
1786 return unspec_caller_rtx_p (XEXP (pat, 0));
1787 case PLUS:
1788 case MINUS:
1789 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1790 return true;
1791 return unspec_caller_rtx_p (XEXP (pat, 1));
1792 case UNSPEC:
1793 if (XINT (pat, 1) == UNSPEC_CALLER)
1794 return true;
1795 default:
1796 break;
1799 return false;
1802 /* Indicate that INSN cannot be duplicated. This is true for insn
1803 that generates a unique label. */
1805 static bool
1806 sh_cannot_copy_insn_p (rtx insn)
1808 rtx pat;
1810 if (!reload_completed || !flag_pic)
1811 return false;
1813 if (GET_CODE (insn) != INSN)
1814 return false;
1815 if (asm_noperands (insn) >= 0)
1816 return false;
1818 pat = PATTERN (insn);
1819 if (GET_CODE (pat) != SET)
1820 return false;
1821 pat = SET_SRC (pat);
1823 if (unspec_caller_rtx_p (pat))
1824 return true;
1826 return false;
1829 /* Actual number of instructions used to make a shift by N. */
1830 static const char ashiftrt_insns[] =
1831 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1833 /* Left shift and logical right shift are the same. */
1834 static const char shift_insns[] =
1835 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1837 /* Individual shift amounts needed to get the above length sequences.
1838 One bit right shifts clobber the T bit, so when possible, put one bit
1839 shifts in the middle of the sequence, so the ends are eligible for
1840 branch delay slots. */
1841 static const short shift_amounts[32][5] = {
1842 {0}, {1}, {2}, {2, 1},
1843 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1844 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1845 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1846 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1847 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1848 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1849 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1851 /* Likewise, but for shift amounts < 16, up to three highmost bits
1852 might be clobbered. This is typically used when combined with some
1853 kind of sign or zero extension. */
1855 static const char ext_shift_insns[] =
1856 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1858 static const short ext_shift_amounts[32][4] = {
1859 {0}, {1}, {2}, {2, 1},
1860 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1861 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1862 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1863 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1864 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1865 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1866 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1868 /* Assuming we have a value that has been sign-extended by at least one bit,
1869 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1870 to shift it by N without data loss, and quicker than by other means? */
1871 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1873 /* This is used in length attributes in sh.md to help compute the length
1874 of arbitrary constant shift instructions. */
1877 shift_insns_rtx (rtx insn)
1879 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1880 int shift_count = INTVAL (XEXP (set_src, 1));
1881 enum rtx_code shift_code = GET_CODE (set_src);
1883 switch (shift_code)
1885 case ASHIFTRT:
1886 return ashiftrt_insns[shift_count];
1887 case LSHIFTRT:
1888 case ASHIFT:
1889 return shift_insns[shift_count];
1890 default:
1891 gcc_unreachable ();
1895 /* Return the cost of a shift. */
1897 static inline int
1898 shiftcosts (rtx x)
1900 int value;
1902 if (TARGET_SHMEDIA)
1903 return 1;
1905 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1907 if (GET_MODE (x) == DImode
1908 && GET_CODE (XEXP (x, 1)) == CONST_INT
1909 && INTVAL (XEXP (x, 1)) == 1)
1910 return 2;
1912 /* Everything else is invalid, because there is no pattern for it. */
1913 return MAX_COST;
1915 /* If shift by a non constant, then this will be expensive. */
1916 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1917 return SH_DYNAMIC_SHIFT_COST;
1919 value = INTVAL (XEXP (x, 1));
1921 /* Otherwise, return the true cost in instructions. */
1922 if (GET_CODE (x) == ASHIFTRT)
1924 int cost = ashiftrt_insns[value];
1925 /* If SH3, then we put the constant in a reg and use shad. */
1926 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1927 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1928 return cost;
1930 else
1931 return shift_insns[value];
1934 /* Return the cost of an AND operation. */
1936 static inline int
1937 andcosts (rtx x)
1939 int i;
1941 /* Anding with a register is a single cycle and instruction. */
1942 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1943 return 1;
1945 i = INTVAL (XEXP (x, 1));
1947 if (TARGET_SHMEDIA)
1949 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1950 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1951 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1952 return 1;
1953 else
1954 return 2;
1957 /* These constants are single cycle extu.[bw] instructions. */
1958 if (i == 0xff || i == 0xffff)
1959 return 1;
1960 /* Constants that can be used in an and immediate instruction in a single
1961 cycle, but this requires r0, so make it a little more expensive. */
1962 if (CONST_OK_FOR_K08 (i))
1963 return 2;
1964 /* Constants that can be loaded with a mov immediate and an and.
1965 This case is probably unnecessary. */
1966 if (CONST_OK_FOR_I08 (i))
1967 return 2;
1968 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1969 This case is probably unnecessary. */
1970 return 3;
1973 /* Return the cost of an addition or a subtraction. */
1975 static inline int
1976 addsubcosts (rtx x)
1978 /* Adding a register is a single cycle insn. */
1979 if (GET_CODE (XEXP (x, 1)) == REG
1980 || GET_CODE (XEXP (x, 1)) == SUBREG)
1981 return 1;
1983 /* Likewise for small constants. */
1984 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1985 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1986 return 1;
1988 if (TARGET_SHMEDIA)
1989 switch (GET_CODE (XEXP (x, 1)))
1991 case CONST:
1992 case LABEL_REF:
1993 case SYMBOL_REF:
1994 return TARGET_SHMEDIA64 ? 5 : 3;
1996 case CONST_INT:
1997 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1998 return 2;
1999 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2000 return 3;
2001 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2002 return 4;
2004 /* Fall through. */
2005 default:
2006 return 5;
2009 /* Any other constant requires a 2 cycle pc-relative load plus an
2010 addition. */
2011 return 3;
2014 /* Return the cost of a multiply. */
2015 static inline int
2016 multcosts (rtx x ATTRIBUTE_UNUSED)
2018 if (sh_multcost >= 0)
2019 return sh_multcost;
2020 if (TARGET_SHMEDIA)
2021 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2022 accept constants. Ideally, we would use a cost of one or two and
2023 add the cost of the operand, but disregard the latter when inside loops
2024 and loop invariant code motion is still to follow.
2025 Using a multiply first and splitting it later if it's a loss
2026 doesn't work because of different sign / zero extension semantics
2027 of multiplies vs. shifts. */
2028 return TARGET_SMALLCODE ? 2 : 3;
2030 if (TARGET_SH2)
2032 /* We have a mul insn, so we can never take more than the mul and the
2033 read of the mac reg, but count more because of the latency and extra
2034 reg usage. */
2035 if (TARGET_SMALLCODE)
2036 return 2;
2037 return 3;
2040 /* If we're aiming at small code, then just count the number of
2041 insns in a multiply call sequence. */
2042 if (TARGET_SMALLCODE)
2043 return 5;
2045 /* Otherwise count all the insns in the routine we'd be calling too. */
2046 return 20;
2049 /* Compute a (partial) cost for rtx X. Return true if the complete
2050 cost has been computed, and false if subexpressions should be
2051 scanned. In either case, *TOTAL contains the cost result. */
2053 static bool
2054 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2056 switch (code)
2058 case CONST_INT:
2059 if (TARGET_SHMEDIA)
2061 if (INTVAL (x) == 0)
2062 *total = 0;
2063 else if (outer_code == AND && and_operand ((x), DImode))
2064 *total = 0;
2065 else if ((outer_code == IOR || outer_code == XOR
2066 || outer_code == PLUS)
2067 && CONST_OK_FOR_I10 (INTVAL (x)))
2068 *total = 0;
2069 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2070 *total = COSTS_N_INSNS (outer_code != SET);
2071 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2072 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2073 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2074 *total = COSTS_N_INSNS (3);
2075 else
2076 *total = COSTS_N_INSNS (4);
2077 return true;
2079 if (CONST_OK_FOR_I08 (INTVAL (x)))
2080 *total = 0;
2081 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2082 && CONST_OK_FOR_K08 (INTVAL (x)))
2083 *total = 1;
2084 else
2085 *total = 8;
2086 return true;
2088 case CONST:
2089 case LABEL_REF:
2090 case SYMBOL_REF:
2091 if (TARGET_SHMEDIA64)
2092 *total = COSTS_N_INSNS (4);
2093 else if (TARGET_SHMEDIA32)
2094 *total = COSTS_N_INSNS (2);
2095 else
2096 *total = 5;
2097 return true;
2099 case CONST_DOUBLE:
2100 if (TARGET_SHMEDIA)
2101 *total = COSTS_N_INSNS (4);
2102 else
2103 *total = 10;
2104 return true;
2105 case CONST_VECTOR:
2106 if (x == CONST0_RTX (GET_MODE (x)))
2107 *total = 0;
2108 else if (sh_1el_vec (x, VOIDmode))
2109 *total = outer_code != SET;
2110 if (sh_rep_vec (x, VOIDmode))
2111 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2112 + (outer_code != SET));
2113 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2114 return true;
2116 case PLUS:
2117 case MINUS:
2118 *total = COSTS_N_INSNS (addsubcosts (x));
2119 return true;
2121 case AND:
2122 *total = COSTS_N_INSNS (andcosts (x));
2123 return true;
2125 case MULT:
2126 *total = COSTS_N_INSNS (multcosts (x));
2127 return true;
2129 case ASHIFT:
2130 case ASHIFTRT:
2131 case LSHIFTRT:
2132 *total = COSTS_N_INSNS (shiftcosts (x));
2133 return true;
2135 case DIV:
2136 case UDIV:
2137 case MOD:
2138 case UMOD:
2139 *total = COSTS_N_INSNS (20);
2140 return true;
2142 case PARALLEL:
2143 if (sh_1el_vec (x, VOIDmode))
2144 *total = outer_code != SET;
2145 if (sh_rep_vec (x, VOIDmode))
2146 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2147 + (outer_code != SET));
2148 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2149 return true;
2151 case FLOAT:
2152 case FIX:
2153 *total = 100;
2154 return true;
2156 default:
2157 return false;
2161 /* Compute the cost of an address. For the SH, all valid addresses are
2162 the same cost. Use a slightly higher cost for reg + reg addressing,
2163 since it increases pressure on r0. */
2165 static int
2166 sh_address_cost (rtx X)
2168 return (GET_CODE (X) == PLUS
2169 && ! CONSTANT_P (XEXP (X, 1))
2170 && ! TARGET_SHMEDIA ? 1 : 0);
2173 /* Code to expand a shift. */
2175 void
2176 gen_ashift (int type, int n, rtx reg)
2178 /* Negative values here come from the shift_amounts array. */
2179 if (n < 0)
2181 if (type == ASHIFT)
2182 type = LSHIFTRT;
2183 else
2184 type = ASHIFT;
2185 n = -n;
2188 switch (type)
2190 case ASHIFTRT:
2191 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2192 break;
2193 case LSHIFTRT:
2194 if (n == 1)
2195 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2196 else
2197 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2198 break;
2199 case ASHIFT:
2200 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2201 break;
2205 /* Same for HImode */
2207 void
2208 gen_ashift_hi (int type, int n, rtx reg)
2210 /* Negative values here come from the shift_amounts array. */
2211 if (n < 0)
2213 if (type == ASHIFT)
2214 type = LSHIFTRT;
2215 else
2216 type = ASHIFT;
2217 n = -n;
2220 switch (type)
2222 case ASHIFTRT:
2223 case LSHIFTRT:
2224 /* We don't have HImode right shift operations because using the
2225 ordinary 32 bit shift instructions for that doesn't generate proper
2226 zero/sign extension.
2227 gen_ashift_hi is only called in contexts where we know that the
2228 sign extension works out correctly. */
2230 int offset = 0;
2231 if (GET_CODE (reg) == SUBREG)
2233 offset = SUBREG_BYTE (reg);
2234 reg = SUBREG_REG (reg);
2236 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2237 break;
2239 case ASHIFT:
2240 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2241 break;
2245 /* Output RTL to split a constant shift into its component SH constant
2246 shift instructions. */
2248 void
2249 gen_shifty_op (int code, rtx *operands)
2251 int value = INTVAL (operands[2]);
2252 int max, i;
2254 /* Truncate the shift count in case it is out of bounds. */
2255 value = value & 0x1f;
2257 if (value == 31)
2259 if (code == LSHIFTRT)
2261 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2262 emit_insn (gen_movt (operands[0]));
2263 return;
2265 else if (code == ASHIFT)
2267 /* There is a two instruction sequence for 31 bit left shifts,
2268 but it requires r0. */
2269 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2271 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2272 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2273 return;
2277 else if (value == 0)
2279 /* This can happen even when optimizing, if there were subregs before
2280 reload. Don't output a nop here, as this is never optimized away;
2281 use a no-op move instead. */
2282 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2283 return;
2286 max = shift_insns[value];
2287 for (i = 0; i < max; i++)
2288 gen_ashift (code, shift_amounts[value][i], operands[0]);
2291 /* Same as above, but optimized for values where the topmost bits don't
2292 matter. */
2294 void
2295 gen_shifty_hi_op (int code, rtx *operands)
2297 int value = INTVAL (operands[2]);
2298 int max, i;
2299 void (*gen_fun) (int, int, rtx);
2301 /* This operation is used by and_shl for SImode values with a few
2302 high bits known to be cleared. */
2303 value &= 31;
2304 if (value == 0)
2306 emit_insn (gen_nop ());
2307 return;
2310 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2311 if (code == ASHIFT)
2313 max = ext_shift_insns[value];
2314 for (i = 0; i < max; i++)
2315 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2317 else
2318 /* When shifting right, emit the shifts in reverse order, so that
2319 solitary negative values come first. */
2320 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2321 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2324 /* Output RTL for an arithmetic right shift. */
2326 /* ??? Rewrite to use super-optimizer sequences. */
2329 expand_ashiftrt (rtx *operands)
2331 rtx wrk;
2332 char func[18];
2333 int value;
2335 if (TARGET_SH3)
2337 if (GET_CODE (operands[2]) != CONST_INT)
2339 rtx count = copy_to_mode_reg (SImode, operands[2]);
2340 emit_insn (gen_negsi2 (count, count));
2341 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2342 return 1;
2344 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2345 > 1 + SH_DYNAMIC_SHIFT_COST)
2347 rtx count
2348 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2349 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2350 return 1;
2353 if (GET_CODE (operands[2]) != CONST_INT)
2354 return 0;
2356 value = INTVAL (operands[2]) & 31;
2358 if (value == 31)
2360 /* If we are called from abs expansion, arrange things so that we
2361 we can use a single MT instruction that doesn't clobber the source,
2362 if LICM can hoist out the load of the constant zero. */
2363 if (currently_expanding_to_rtl)
2365 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2366 operands[1]));
2367 emit_insn (gen_mov_neg_si_t (operands[0]));
2368 return 1;
2370 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2371 return 1;
2373 else if (value >= 16 && value <= 19)
2375 wrk = gen_reg_rtx (SImode);
2376 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2377 value -= 16;
2378 while (value--)
2379 gen_ashift (ASHIFTRT, 1, wrk);
2380 emit_move_insn (operands[0], wrk);
2381 return 1;
2383 /* Expand a short sequence inline, longer call a magic routine. */
2384 else if (value <= 5)
2386 wrk = gen_reg_rtx (SImode);
2387 emit_move_insn (wrk, operands[1]);
2388 while (value--)
2389 gen_ashift (ASHIFTRT, 1, wrk);
2390 emit_move_insn (operands[0], wrk);
2391 return 1;
2394 wrk = gen_reg_rtx (Pmode);
2396 /* Load the value into an arg reg and call a helper. */
2397 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2398 sprintf (func, "__ashiftrt_r4_%d", value);
2399 function_symbol (wrk, func, SFUNC_STATIC);
2400 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2401 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2402 return 1;
2406 sh_dynamicalize_shift_p (rtx count)
2408 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2411 /* Try to find a good way to implement the combiner pattern
2412 [(set (match_operand:SI 0 "register_operand" "r")
2413 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2414 (match_operand:SI 2 "const_int_operand" "n"))
2415 (match_operand:SI 3 "const_int_operand" "n"))) .
2416 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2417 return 0 for simple right / left or left/right shift combination.
2418 return 1 for a combination of shifts with zero_extend.
2419 return 2 for a combination of shifts with an AND that needs r0.
2420 return 3 for a combination of shifts with an AND that needs an extra
2421 scratch register, when the three highmost bits of the AND mask are clear.
2422 return 4 for a combination of shifts with an AND that needs an extra
2423 scratch register, when any of the three highmost bits of the AND mask
2424 is set.
2425 If ATTRP is set, store an initial right shift width in ATTRP[0],
2426 and the instruction length in ATTRP[1] . These values are not valid
2427 when returning 0.
2428 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2429 shift_amounts for the last shift value that is to be used before the
2430 sign extend. */
2432 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2434 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2435 int left = INTVAL (left_rtx), right;
2436 int best = 0;
2437 int cost, best_cost = 10000;
2438 int best_right = 0, best_len = 0;
2439 int i;
2440 int can_ext;
2442 if (left < 0 || left > 31)
2443 return 0;
2444 if (GET_CODE (mask_rtx) == CONST_INT)
2445 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2446 else
2447 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2448 /* Can this be expressed as a right shift / left shift pair? */
2449 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2450 right = exact_log2 (lsb);
2451 mask2 = ~(mask + lsb - 1);
2452 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2453 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2454 if (! mask2)
2455 best_cost = shift_insns[right] + shift_insns[right + left];
2456 /* mask has no trailing zeroes <==> ! right */
2457 else if (! right && mask2 == ~(lsb2 - 1))
2459 int late_right = exact_log2 (lsb2);
2460 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2462 /* Try to use zero extend. */
2463 if (mask2 == ~(lsb2 - 1))
2465 int width, first;
2467 for (width = 8; width <= 16; width += 8)
2469 /* Can we zero-extend right away? */
2470 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2472 cost
2473 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2474 if (cost < best_cost)
2476 best = 1;
2477 best_cost = cost;
2478 best_right = right;
2479 best_len = cost;
2480 if (attrp)
2481 attrp[2] = -1;
2483 continue;
2485 /* ??? Could try to put zero extend into initial right shift,
2486 or even shift a bit left before the right shift. */
2487 /* Determine value of first part of left shift, to get to the
2488 zero extend cut-off point. */
2489 first = width - exact_log2 (lsb2) + right;
2490 if (first >= 0 && right + left - first >= 0)
2492 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2493 + ext_shift_insns[right + left - first];
2494 if (cost < best_cost)
2496 best = 1;
2497 best_cost = cost;
2498 best_right = right;
2499 best_len = cost;
2500 if (attrp)
2501 attrp[2] = first;
2506 /* Try to use r0 AND pattern */
2507 for (i = 0; i <= 2; i++)
2509 if (i > right)
2510 break;
2511 if (! CONST_OK_FOR_K08 (mask >> i))
2512 continue;
2513 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2514 if (cost < best_cost)
2516 best = 2;
2517 best_cost = cost;
2518 best_right = i;
2519 best_len = cost - 1;
2522 /* Try to use a scratch register to hold the AND operand. */
2523 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2524 for (i = 0; i <= 2; i++)
2526 if (i > right)
2527 break;
2528 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2529 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2530 if (cost < best_cost)
2532 best = 4 - can_ext;
2533 best_cost = cost;
2534 best_right = i;
2535 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2539 if (attrp)
2541 attrp[0] = best_right;
2542 attrp[1] = best_len;
2544 return best;
2547 /* This is used in length attributes of the unnamed instructions
2548 corresponding to shl_and_kind return values of 1 and 2. */
2550 shl_and_length (rtx insn)
2552 rtx set_src, left_rtx, mask_rtx;
2553 int attributes[3];
2555 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2556 left_rtx = XEXP (XEXP (set_src, 0), 1);
2557 mask_rtx = XEXP (set_src, 1);
2558 shl_and_kind (left_rtx, mask_rtx, attributes);
2559 return attributes[1];
2562 /* This is used in length attribute of the and_shl_scratch instruction. */
2565 shl_and_scr_length (rtx insn)
2567 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2568 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2569 rtx op = XEXP (set_src, 0);
2570 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2571 op = XEXP (XEXP (op, 0), 0);
2572 return len + shift_insns[INTVAL (XEXP (op, 1))];
2575 /* Generate rtl for instructions for which shl_and_kind advised a particular
2576 method of generating them, i.e. returned zero. */
2579 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2581 int attributes[3];
2582 unsigned HOST_WIDE_INT mask;
2583 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2584 int right, total_shift;
2585 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2587 right = attributes[0];
2588 total_shift = INTVAL (left_rtx) + right;
2589 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2590 switch (kind)
2592 default:
2593 return -1;
2594 case 1:
2596 int first = attributes[2];
2597 rtx operands[3];
2599 if (first < 0)
2601 emit_insn ((mask << right) <= 0xff
2602 ? gen_zero_extendqisi2 (dest,
2603 gen_lowpart (QImode, source))
2604 : gen_zero_extendhisi2 (dest,
2605 gen_lowpart (HImode, source)));
2606 source = dest;
2608 if (source != dest)
2609 emit_insn (gen_movsi (dest, source));
2610 operands[0] = dest;
2611 if (right)
2613 operands[2] = GEN_INT (right);
2614 gen_shifty_hi_op (LSHIFTRT, operands);
2616 if (first > 0)
2618 operands[2] = GEN_INT (first);
2619 gen_shifty_hi_op (ASHIFT, operands);
2620 total_shift -= first;
2621 mask <<= first;
2623 if (first >= 0)
2624 emit_insn (mask <= 0xff
2625 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2626 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2627 if (total_shift > 0)
2629 operands[2] = GEN_INT (total_shift);
2630 gen_shifty_hi_op (ASHIFT, operands);
2632 break;
2634 case 4:
2635 shift_gen_fun = gen_shifty_op;
2636 case 3:
2637 /* If the topmost bit that matters is set, set the topmost bits
2638 that don't matter. This way, we might be able to get a shorter
2639 signed constant. */
2640 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2641 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2642 case 2:
2643 /* Don't expand fine-grained when combining, because that will
2644 make the pattern fail. */
2645 if (currently_expanding_to_rtl
2646 || reload_in_progress || reload_completed)
2648 rtx operands[3];
2650 /* Cases 3 and 4 should be handled by this split
2651 only while combining */
2652 gcc_assert (kind <= 2);
2653 if (right)
2655 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2656 source = dest;
2658 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2659 if (total_shift)
2661 operands[0] = dest;
2662 operands[1] = dest;
2663 operands[2] = GEN_INT (total_shift);
2664 shift_gen_fun (ASHIFT, operands);
2666 break;
2668 else
2670 int neg = 0;
2671 if (kind != 4 && total_shift < 16)
2673 neg = -ext_shift_amounts[total_shift][1];
2674 if (neg > 0)
2675 neg -= ext_shift_amounts[total_shift][2];
2676 else
2677 neg = 0;
2679 emit_insn (gen_and_shl_scratch (dest, source,
2680 GEN_INT (right),
2681 GEN_INT (mask),
2682 GEN_INT (total_shift + neg),
2683 GEN_INT (neg)));
2684 emit_insn (gen_movsi (dest, dest));
2685 break;
2688 return 0;
2691 /* Try to find a good way to implement the combiner pattern
2692 [(set (match_operand:SI 0 "register_operand" "=r")
2693 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2694 (match_operand:SI 2 "const_int_operand" "n")
2695 (match_operand:SI 3 "const_int_operand" "n")
2696 (const_int 0)))
2697 (clobber (reg:SI T_REG))]
2698 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2699 return 0 for simple left / right shift combination.
2700 return 1 for left shift / 8 bit sign extend / left shift.
2701 return 2 for left shift / 16 bit sign extend / left shift.
2702 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2703 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2704 return 5 for left shift / 16 bit sign extend / right shift
2705 return 6 for < 8 bit sign extend / left shift.
2706 return 7 for < 8 bit sign extend / left shift / single right shift.
2707 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2710 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2712 int left, size, insize, ext;
2713 int cost = 0, best_cost;
2714 int kind;
2716 left = INTVAL (left_rtx);
2717 size = INTVAL (size_rtx);
2718 insize = size - left;
2719 gcc_assert (insize > 0);
2720 /* Default to left / right shift. */
2721 kind = 0;
2722 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2723 if (size <= 16)
2725 /* 16 bit shift / sign extend / 16 bit shift */
2726 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2727 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2728 below, by alternative 3 or something even better. */
2729 if (cost < best_cost)
2731 kind = 5;
2732 best_cost = cost;
2735 /* Try a plain sign extend between two shifts. */
2736 for (ext = 16; ext >= insize; ext -= 8)
2738 if (ext <= size)
2740 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2741 if (cost < best_cost)
2743 kind = ext / (unsigned) 8;
2744 best_cost = cost;
2747 /* Check if we can do a sloppy shift with a final signed shift
2748 restoring the sign. */
2749 if (EXT_SHIFT_SIGNED (size - ext))
2750 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2751 /* If not, maybe it's still cheaper to do the second shift sloppy,
2752 and do a final sign extend? */
2753 else if (size <= 16)
2754 cost = ext_shift_insns[ext - insize] + 1
2755 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2756 else
2757 continue;
2758 if (cost < best_cost)
2760 kind = ext / (unsigned) 8 + 2;
2761 best_cost = cost;
2764 /* Check if we can sign extend in r0 */
2765 if (insize < 8)
2767 cost = 3 + shift_insns[left];
2768 if (cost < best_cost)
2770 kind = 6;
2771 best_cost = cost;
2773 /* Try the same with a final signed shift. */
2774 if (left < 31)
2776 cost = 3 + ext_shift_insns[left + 1] + 1;
2777 if (cost < best_cost)
2779 kind = 7;
2780 best_cost = cost;
2784 if (TARGET_SH3)
2786 /* Try to use a dynamic shift. */
2787 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2788 if (cost < best_cost)
2790 kind = 0;
2791 best_cost = cost;
2794 if (costp)
2795 *costp = cost;
2796 return kind;
2799 /* Function to be used in the length attribute of the instructions
2800 implementing this pattern. */
2803 shl_sext_length (rtx insn)
2805 rtx set_src, left_rtx, size_rtx;
2806 int cost;
2808 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2809 left_rtx = XEXP (XEXP (set_src, 0), 1);
2810 size_rtx = XEXP (set_src, 1);
2811 shl_sext_kind (left_rtx, size_rtx, &cost);
2812 return cost;
2815 /* Generate rtl for this pattern */
2818 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2820 int kind;
2821 int left, size, insize, cost;
2822 rtx operands[3];
2824 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2825 left = INTVAL (left_rtx);
2826 size = INTVAL (size_rtx);
2827 insize = size - left;
2828 switch (kind)
2830 case 1:
2831 case 2:
2832 case 3:
2833 case 4:
2835 int ext = kind & 1 ? 8 : 16;
2836 int shift2 = size - ext;
2838 /* Don't expand fine-grained when combining, because that will
2839 make the pattern fail. */
2840 if (! currently_expanding_to_rtl
2841 && ! reload_in_progress && ! reload_completed)
2843 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2844 emit_insn (gen_movsi (dest, source));
2845 break;
2847 if (dest != source)
2848 emit_insn (gen_movsi (dest, source));
2849 operands[0] = dest;
2850 if (ext - insize)
2852 operands[2] = GEN_INT (ext - insize);
2853 gen_shifty_hi_op (ASHIFT, operands);
2855 emit_insn (kind & 1
2856 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2857 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2858 if (kind <= 2)
2860 if (shift2)
2862 operands[2] = GEN_INT (shift2);
2863 gen_shifty_op (ASHIFT, operands);
2866 else
2868 if (shift2 > 0)
2870 if (EXT_SHIFT_SIGNED (shift2))
2872 operands[2] = GEN_INT (shift2 + 1);
2873 gen_shifty_op (ASHIFT, operands);
2874 operands[2] = const1_rtx;
2875 gen_shifty_op (ASHIFTRT, operands);
2876 break;
2878 operands[2] = GEN_INT (shift2);
2879 gen_shifty_hi_op (ASHIFT, operands);
2881 else if (shift2)
2883 operands[2] = GEN_INT (-shift2);
2884 gen_shifty_hi_op (LSHIFTRT, operands);
2886 emit_insn (size <= 8
2887 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2888 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2890 break;
2892 case 5:
2894 int i = 16 - size;
2895 if (! currently_expanding_to_rtl
2896 && ! reload_in_progress && ! reload_completed)
2897 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2898 else
2900 operands[0] = dest;
2901 operands[2] = GEN_INT (16 - insize);
2902 gen_shifty_hi_op (ASHIFT, operands);
2903 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2905 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2906 while (--i >= 0)
2907 gen_ashift (ASHIFTRT, 1, dest);
2908 break;
2910 case 6:
2911 case 7:
2912 /* Don't expand fine-grained when combining, because that will
2913 make the pattern fail. */
2914 if (! currently_expanding_to_rtl
2915 && ! reload_in_progress && ! reload_completed)
2917 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2918 emit_insn (gen_movsi (dest, source));
2919 break;
2921 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2922 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2923 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2924 operands[0] = dest;
2925 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2926 gen_shifty_op (ASHIFT, operands);
2927 if (kind == 7)
2928 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2929 break;
2930 default:
2931 return -1;
2933 return 0;
2936 /* Prefix a symbol_ref name with "datalabel". */
2939 gen_datalabel_ref (rtx sym)
2941 const char *str;
2943 if (GET_CODE (sym) == LABEL_REF)
2944 return gen_rtx_CONST (GET_MODE (sym),
2945 gen_rtx_UNSPEC (GET_MODE (sym),
2946 gen_rtvec (1, sym),
2947 UNSPEC_DATALABEL));
2949 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2951 str = XSTR (sym, 0);
2952 /* Share all SYMBOL_REF strings with the same value - that is important
2953 for cse. */
2954 str = IDENTIFIER_POINTER (get_identifier (str));
2955 XSTR (sym, 0) = str;
2957 return sym;
2961 /* The SH cannot load a large constant into a register, constants have to
2962 come from a pc relative load. The reference of a pc relative load
2963 instruction must be less than 1k in front of the instruction. This
2964 means that we often have to dump a constant inside a function, and
2965 generate code to branch around it.
2967 It is important to minimize this, since the branches will slow things
2968 down and make things bigger.
2970 Worst case code looks like:
2972 mov.l L1,rn
2973 bra L2
2975 align
2976 L1: .long value
2980 mov.l L3,rn
2981 bra L4
2983 align
2984 L3: .long value
2988 We fix this by performing a scan before scheduling, which notices which
2989 instructions need to have their operands fetched from the constant table
2990 and builds the table.
2992 The algorithm is:
2994 scan, find an instruction which needs a pcrel move. Look forward, find the
2995 last barrier which is within MAX_COUNT bytes of the requirement.
2996 If there isn't one, make one. Process all the instructions between
2997 the find and the barrier.
2999 In the above example, we can tell that L3 is within 1k of L1, so
3000 the first move can be shrunk from the 3 insn+constant sequence into
3001 just 1 insn, and the constant moved to L3 to make:
3003 mov.l L1,rn
3005 mov.l L3,rn
3006 bra L4
3008 align
3009 L3:.long value
3010 L4:.long value
3012 Then the second move becomes the target for the shortening process. */
3014 typedef struct
3016 rtx value; /* Value in table. */
3017 rtx label; /* Label of value. */
3018 rtx wend; /* End of window. */
3019 enum machine_mode mode; /* Mode of value. */
3021 /* True if this constant is accessed as part of a post-increment
3022 sequence. Note that HImode constants are never accessed in this way. */
3023 bool part_of_sequence_p;
3024 } pool_node;
3026 /* The maximum number of constants that can fit into one pool, since
3027 constants in the range 0..510 are at least 2 bytes long, and in the
3028 range from there to 1018 at least 4 bytes. */
3030 #define MAX_POOL_SIZE 372
3031 static pool_node pool_vector[MAX_POOL_SIZE];
3032 static int pool_size;
3033 static rtx pool_window_label;
3034 static int pool_window_last;
3036 /* ??? If we need a constant in HImode which is the truncated value of a
3037 constant we need in SImode, we could combine the two entries thus saving
3038 two bytes. Is this common enough to be worth the effort of implementing
3039 it? */
3041 /* ??? This stuff should be done at the same time that we shorten branches.
3042 As it is now, we must assume that all branches are the maximum size, and
3043 this causes us to almost always output constant pools sooner than
3044 necessary. */
3046 /* Add a constant to the pool and return its label. */
3048 static rtx
3049 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3051 int i;
3052 rtx lab, new, ref, newref;
3054 /* First see if we've already got it. */
3055 for (i = 0; i < pool_size; i++)
3057 if (x->code == pool_vector[i].value->code
3058 && mode == pool_vector[i].mode)
3060 if (x->code == CODE_LABEL)
3062 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3063 continue;
3065 if (rtx_equal_p (x, pool_vector[i].value))
3067 lab = new = 0;
3068 if (! last_value
3069 || ! i
3070 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3072 new = gen_label_rtx ();
3073 LABEL_REFS (new) = pool_vector[i].label;
3074 pool_vector[i].label = lab = new;
3076 if (lab && pool_window_label)
3078 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3079 ref = pool_vector[pool_window_last].wend;
3080 LABEL_NEXTREF (newref) = ref;
3081 pool_vector[pool_window_last].wend = newref;
3083 if (new)
3084 pool_window_label = new;
3085 pool_window_last = i;
3086 return lab;
3091 /* Need a new one. */
3092 pool_vector[pool_size].value = x;
3093 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3095 lab = 0;
3096 pool_vector[pool_size - 1].part_of_sequence_p = true;
3098 else
3099 lab = gen_label_rtx ();
3100 pool_vector[pool_size].mode = mode;
3101 pool_vector[pool_size].label = lab;
3102 pool_vector[pool_size].wend = NULL_RTX;
3103 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3104 if (lab && pool_window_label)
3106 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3107 ref = pool_vector[pool_window_last].wend;
3108 LABEL_NEXTREF (newref) = ref;
3109 pool_vector[pool_window_last].wend = newref;
3111 if (lab)
3112 pool_window_label = lab;
3113 pool_window_last = pool_size;
3114 pool_size++;
3115 return lab;
3118 /* Output the literal table. START, if nonzero, is the first instruction
3119 this table is needed for, and also indicates that there is at least one
3120 casesi_worker_2 instruction; We have to emit the operand3 labels from
3121 these insns at a 4-byte aligned position. BARRIER is the barrier
3122 after which we are to place the table. */
3124 static void
3125 dump_table (rtx start, rtx barrier)
3127 rtx scan = barrier;
3128 int i;
3129 int need_align = 1;
3130 rtx lab, ref;
3131 int have_df = 0;
3133 /* Do two passes, first time dump out the HI sized constants. */
3135 for (i = 0; i < pool_size; i++)
3137 pool_node *p = &pool_vector[i];
3139 if (p->mode == HImode)
3141 if (need_align)
3143 scan = emit_insn_after (gen_align_2 (), scan);
3144 need_align = 0;
3146 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3147 scan = emit_label_after (lab, scan);
3148 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3149 scan);
3150 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3152 lab = XEXP (ref, 0);
3153 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3156 else if (p->mode == DFmode)
3157 have_df = 1;
3160 need_align = 1;
3162 if (start)
3164 scan = emit_insn_after (gen_align_4 (), scan);
3165 need_align = 0;
3166 for (; start != barrier; start = NEXT_INSN (start))
3167 if (GET_CODE (start) == INSN
3168 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3170 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3171 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3173 scan = emit_label_after (lab, scan);
3176 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3178 rtx align_insn = NULL_RTX;
3180 scan = emit_label_after (gen_label_rtx (), scan);
3181 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3182 need_align = 0;
3184 for (i = 0; i < pool_size; i++)
3186 pool_node *p = &pool_vector[i];
3188 switch (p->mode)
3190 case HImode:
3191 break;
3192 case SImode:
3193 case SFmode:
3194 if (align_insn && !p->part_of_sequence_p)
3196 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3197 emit_label_before (lab, align_insn);
3198 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3199 align_insn);
3200 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3202 lab = XEXP (ref, 0);
3203 emit_insn_before (gen_consttable_window_end (lab),
3204 align_insn);
3206 delete_insn (align_insn);
3207 align_insn = NULL_RTX;
3208 continue;
3210 else
3212 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3213 scan = emit_label_after (lab, scan);
3214 scan = emit_insn_after (gen_consttable_4 (p->value,
3215 const0_rtx), scan);
3216 need_align = ! need_align;
3218 break;
3219 case DFmode:
3220 if (need_align)
3222 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3223 align_insn = scan;
3224 need_align = 0;
3226 case DImode:
3227 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3228 scan = emit_label_after (lab, scan);
3229 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3230 scan);
3231 break;
3232 default:
3233 gcc_unreachable ();
3236 if (p->mode != HImode)
3238 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3240 lab = XEXP (ref, 0);
3241 scan = emit_insn_after (gen_consttable_window_end (lab),
3242 scan);
3247 pool_size = 0;
3250 for (i = 0; i < pool_size; i++)
3252 pool_node *p = &pool_vector[i];
3254 switch (p->mode)
3256 case HImode:
3257 break;
3258 case SImode:
3259 case SFmode:
3260 if (need_align)
3262 need_align = 0;
3263 scan = emit_label_after (gen_label_rtx (), scan);
3264 scan = emit_insn_after (gen_align_4 (), scan);
3266 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3267 scan = emit_label_after (lab, scan);
3268 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3269 scan);
3270 break;
3271 case DFmode:
3272 case DImode:
3273 if (need_align)
3275 need_align = 0;
3276 scan = emit_label_after (gen_label_rtx (), scan);
3277 scan = emit_insn_after (gen_align_4 (), scan);
3279 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3280 scan = emit_label_after (lab, scan);
3281 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3282 scan);
3283 break;
3284 default:
3285 gcc_unreachable ();
3288 if (p->mode != HImode)
3290 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3292 lab = XEXP (ref, 0);
3293 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3298 scan = emit_insn_after (gen_consttable_end (), scan);
3299 scan = emit_barrier_after (scan);
3300 pool_size = 0;
3301 pool_window_label = NULL_RTX;
3302 pool_window_last = 0;
3305 /* Return nonzero if constant would be an ok source for a
3306 mov.w instead of a mov.l. */
3308 static int
3309 hi_const (rtx src)
3311 return (GET_CODE (src) == CONST_INT
3312 && INTVAL (src) >= -32768
3313 && INTVAL (src) <= 32767);
3316 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3318 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3319 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3320 need to fix it if the input value is CONST_OK_FOR_I08. */
3322 static int
3323 broken_move (rtx insn)
3325 if (GET_CODE (insn) == INSN)
3327 rtx pat = PATTERN (insn);
3328 if (GET_CODE (pat) == PARALLEL)
3329 pat = XVECEXP (pat, 0, 0);
3330 if (GET_CODE (pat) == SET
3331 /* We can load any 8 bit value if we don't care what the high
3332 order bits end up as. */
3333 && GET_MODE (SET_DEST (pat)) != QImode
3334 && (CONSTANT_P (SET_SRC (pat))
3335 /* Match mova_const. */
3336 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3337 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3338 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3339 && ! (TARGET_SH2E
3340 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3341 && (fp_zero_operand (SET_SRC (pat))
3342 || fp_one_operand (SET_SRC (pat)))
3343 /* ??? If this is a -m4 or -m4-single compilation, in general
3344 we don't know the current setting of fpscr, so disable fldi.
3345 There is an exception if this was a register-register move
3346 before reload - and hence it was ascertained that we have
3347 single precision setting - and in a post-reload optimization
3348 we changed this to do a constant load. In that case
3349 we don't have an r0 clobber, hence we must use fldi. */
3350 && (! TARGET_SH4 || TARGET_FMOVD
3351 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3352 == SCRATCH))
3353 && GET_CODE (SET_DEST (pat)) == REG
3354 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3355 && ! (TARGET_SH2A
3356 && GET_MODE (SET_DEST (pat)) == SImode
3357 && GET_CODE (SET_SRC (pat)) == CONST_INT
3358 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3359 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3360 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3361 return 1;
3364 return 0;
3367 static int
3368 mova_p (rtx insn)
3370 return (GET_CODE (insn) == INSN
3371 && GET_CODE (PATTERN (insn)) == SET
3372 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3373 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3374 /* Don't match mova_const. */
3375 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3378 /* Fix up a mova from a switch that went out of range. */
3379 static void
3380 fixup_mova (rtx mova)
3382 if (! flag_pic)
3384 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3385 INSN_CODE (mova) = -1;
3387 else
3389 rtx worker = mova;
3390 rtx lab = gen_label_rtx ();
3391 rtx wpat, wpat0, wpat1, wsrc, diff;
3395 worker = NEXT_INSN (worker);
3396 gcc_assert (worker
3397 && GET_CODE (worker) != CODE_LABEL
3398 && GET_CODE (worker) != JUMP_INSN);
3399 } while (GET_CODE (worker) == NOTE
3400 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3401 wpat = PATTERN (worker);
3402 wpat0 = XVECEXP (wpat, 0, 0);
3403 wpat1 = XVECEXP (wpat, 0, 1);
3404 wsrc = SET_SRC (wpat0);
3405 PATTERN (worker) = (gen_casesi_worker_2
3406 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3407 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3408 XEXP (wpat1, 0)));
3409 INSN_CODE (worker) = -1;
3410 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3411 gen_rtx_LABEL_REF (Pmode, lab));
3412 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3413 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3414 INSN_CODE (mova) = -1;
3418 /* Find the last barrier from insn FROM which is close enough to hold the
3419 constant pool. If we can't find one, then create one near the end of
3420 the range. */
3422 static rtx
3423 find_barrier (int num_mova, rtx mova, rtx from)
3425 int count_si = 0;
3426 int count_hi = 0;
3427 int found_hi = 0;
3428 int found_si = 0;
3429 int found_di = 0;
3430 int hi_align = 2;
3431 int si_align = 2;
3432 int leading_mova = num_mova;
3433 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3434 int si_limit;
3435 int hi_limit;
3437 /* For HImode: range is 510, add 4 because pc counts from address of
3438 second instruction after this one, subtract 2 for the jump instruction
3439 that we may need to emit before the table, subtract 2 for the instruction
3440 that fills the jump delay slot (in very rare cases, reorg will take an
3441 instruction from after the constant pool or will leave the delay slot
3442 empty). This gives 510.
3443 For SImode: range is 1020, add 4 because pc counts from address of
3444 second instruction after this one, subtract 2 in case pc is 2 byte
3445 aligned, subtract 2 for the jump instruction that we may need to emit
3446 before the table, subtract 2 for the instruction that fills the jump
3447 delay slot. This gives 1018. */
3449 /* The branch will always be shortened now that the reference address for
3450 forward branches is the successor address, thus we need no longer make
3451 adjustments to the [sh]i_limit for -O0. */
3453 si_limit = 1018;
3454 hi_limit = 510;
3456 while (from && count_si < si_limit && count_hi < hi_limit)
3458 int inc = get_attr_length (from);
3459 int new_align = 1;
3461 if (GET_CODE (from) == CODE_LABEL)
3463 if (optimize)
3464 new_align = 1 << label_to_alignment (from);
3465 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3466 new_align = 1 << barrier_align (from);
3467 else
3468 new_align = 1;
3469 inc = 0;
3472 if (GET_CODE (from) == BARRIER)
3475 found_barrier = from;
3477 /* If we are at the end of the function, or in front of an alignment
3478 instruction, we need not insert an extra alignment. We prefer
3479 this kind of barrier. */
3480 if (barrier_align (from) > 2)
3481 good_barrier = from;
3484 if (broken_move (from))
3486 rtx pat, src, dst;
3487 enum machine_mode mode;
3489 pat = PATTERN (from);
3490 if (GET_CODE (pat) == PARALLEL)
3491 pat = XVECEXP (pat, 0, 0);
3492 src = SET_SRC (pat);
3493 dst = SET_DEST (pat);
3494 mode = GET_MODE (dst);
3496 /* We must explicitly check the mode, because sometimes the
3497 front end will generate code to load unsigned constants into
3498 HImode targets without properly sign extending them. */
3499 if (mode == HImode
3500 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3502 found_hi += 2;
3503 /* We put the short constants before the long constants, so
3504 we must count the length of short constants in the range
3505 for the long constants. */
3506 /* ??? This isn't optimal, but is easy to do. */
3507 si_limit -= 2;
3509 else
3511 /* We dump DF/DI constants before SF/SI ones, because
3512 the limit is the same, but the alignment requirements
3513 are higher. We may waste up to 4 additional bytes
3514 for alignment, and the DF/DI constant may have
3515 another SF/SI constant placed before it. */
3516 if (TARGET_SHCOMPACT
3517 && ! found_di
3518 && (mode == DFmode || mode == DImode))
3520 found_di = 1;
3521 si_limit -= 8;
3523 while (si_align > 2 && found_si + si_align - 2 > count_si)
3524 si_align >>= 1;
3525 if (found_si > count_si)
3526 count_si = found_si;
3527 found_si += GET_MODE_SIZE (mode);
3528 if (num_mova)
3529 si_limit -= GET_MODE_SIZE (mode);
3533 if (mova_p (from))
3535 if (! num_mova++)
3537 leading_mova = 0;
3538 mova = from;
3539 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3541 if (found_si > count_si)
3542 count_si = found_si;
3544 else if (GET_CODE (from) == JUMP_INSN
3545 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3546 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3548 if (num_mova)
3549 num_mova--;
3550 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3552 /* We have just passed the barrier in front of the
3553 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3554 the ADDR_DIFF_VEC is accessed as data, just like our pool
3555 constants, this is a good opportunity to accommodate what
3556 we have gathered so far.
3557 If we waited any longer, we could end up at a barrier in
3558 front of code, which gives worse cache usage for separated
3559 instruction / data caches. */
3560 good_barrier = found_barrier;
3561 break;
3563 else
3565 rtx body = PATTERN (from);
3566 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3569 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3570 else if (GET_CODE (from) == JUMP_INSN
3571 && ! TARGET_SH2
3572 && ! TARGET_SMALLCODE)
3573 new_align = 4;
3575 if (found_si)
3577 count_si += inc;
3578 if (new_align > si_align)
3580 si_limit -= (count_si - 1) & (new_align - si_align);
3581 si_align = new_align;
3583 count_si = (count_si + new_align - 1) & -new_align;
3585 if (found_hi)
3587 count_hi += inc;
3588 if (new_align > hi_align)
3590 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3591 hi_align = new_align;
3593 count_hi = (count_hi + new_align - 1) & -new_align;
3595 from = NEXT_INSN (from);
3598 if (num_mova)
3600 if (leading_mova)
3602 /* Try as we might, the leading mova is out of range. Change
3603 it into a load (which will become a pcload) and retry. */
3604 fixup_mova (mova);
3605 return find_barrier (0, 0, mova);
3607 else
3609 /* Insert the constant pool table before the mova instruction,
3610 to prevent the mova label reference from going out of range. */
3611 from = mova;
3612 good_barrier = found_barrier = barrier_before_mova;
3616 if (found_barrier)
3618 if (good_barrier && next_real_insn (found_barrier))
3619 found_barrier = good_barrier;
3621 else
3623 /* We didn't find a barrier in time to dump our stuff,
3624 so we'll make one. */
3625 rtx label = gen_label_rtx ();
3627 /* If we exceeded the range, then we must back up over the last
3628 instruction we looked at. Otherwise, we just need to undo the
3629 NEXT_INSN at the end of the loop. */
3630 if (count_hi > hi_limit || count_si > si_limit)
3631 from = PREV_INSN (PREV_INSN (from));
3632 else
3633 from = PREV_INSN (from);
3635 /* Walk back to be just before any jump or label.
3636 Putting it before a label reduces the number of times the branch
3637 around the constant pool table will be hit. Putting it before
3638 a jump makes it more likely that the bra delay slot will be
3639 filled. */
3640 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3641 || GET_CODE (from) == CODE_LABEL)
3642 from = PREV_INSN (from);
3644 from = emit_jump_insn_after (gen_jump (label), from);
3645 JUMP_LABEL (from) = label;
3646 LABEL_NUSES (label) = 1;
3647 found_barrier = emit_barrier_after (from);
3648 emit_label_after (label, found_barrier);
3651 return found_barrier;
3654 /* If the instruction INSN is implemented by a special function, and we can
3655 positively find the register that is used to call the sfunc, and this
3656 register is not used anywhere else in this instruction - except as the
3657 destination of a set, return this register; else, return 0. */
3659 sfunc_uses_reg (rtx insn)
3661 int i;
3662 rtx pattern, part, reg_part, reg;
3664 if (GET_CODE (insn) != INSN)
3665 return 0;
3666 pattern = PATTERN (insn);
3667 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3668 return 0;
3670 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3672 part = XVECEXP (pattern, 0, i);
3673 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3674 reg_part = part;
3676 if (! reg_part)
3677 return 0;
3678 reg = XEXP (reg_part, 0);
3679 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3681 part = XVECEXP (pattern, 0, i);
3682 if (part == reg_part || GET_CODE (part) == CLOBBER)
3683 continue;
3684 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3685 && GET_CODE (SET_DEST (part)) == REG)
3686 ? SET_SRC (part) : part)))
3687 return 0;
3689 return reg;
3692 /* See if the only way in which INSN uses REG is by calling it, or by
3693 setting it while calling it. Set *SET to a SET rtx if the register
3694 is set by INSN. */
3696 static int
3697 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3699 rtx pattern, reg2;
3701 *set = NULL_RTX;
3703 reg2 = sfunc_uses_reg (insn);
3704 if (reg2 && REGNO (reg2) == REGNO (reg))
3706 pattern = single_set (insn);
3707 if (pattern
3708 && GET_CODE (SET_DEST (pattern)) == REG
3709 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3710 *set = pattern;
3711 return 0;
3713 if (GET_CODE (insn) != CALL_INSN)
3715 /* We don't use rtx_equal_p because we don't care if the mode is
3716 different. */
3717 pattern = single_set (insn);
3718 if (pattern
3719 && GET_CODE (SET_DEST (pattern)) == REG
3720 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3722 rtx par, part;
3723 int i;
3725 *set = pattern;
3726 par = PATTERN (insn);
3727 if (GET_CODE (par) == PARALLEL)
3728 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3730 part = XVECEXP (par, 0, i);
3731 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3732 return 1;
3734 return reg_mentioned_p (reg, SET_SRC (pattern));
3737 return 1;
3740 pattern = PATTERN (insn);
3742 if (GET_CODE (pattern) == PARALLEL)
3744 int i;
3746 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3747 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3748 return 1;
3749 pattern = XVECEXP (pattern, 0, 0);
3752 if (GET_CODE (pattern) == SET)
3754 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3756 /* We don't use rtx_equal_p, because we don't care if the
3757 mode is different. */
3758 if (GET_CODE (SET_DEST (pattern)) != REG
3759 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3760 return 1;
3762 *set = pattern;
3765 pattern = SET_SRC (pattern);
3768 if (GET_CODE (pattern) != CALL
3769 || GET_CODE (XEXP (pattern, 0)) != MEM
3770 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3771 return 1;
3773 return 0;
3776 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3777 general registers. Bits 0..15 mean that the respective registers
3778 are used as inputs in the instruction. Bits 16..31 mean that the
3779 registers 0..15, respectively, are used as outputs, or are clobbered.
3780 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3782 regs_used (rtx x, int is_dest)
3784 enum rtx_code code;
3785 const char *fmt;
3786 int i, used = 0;
3788 if (! x)
3789 return used;
3790 code = GET_CODE (x);
3791 switch (code)
3793 case REG:
3794 if (REGNO (x) < 16)
3795 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3796 << (REGNO (x) + is_dest));
3797 return 0;
3798 case SUBREG:
3800 rtx y = SUBREG_REG (x);
3802 if (GET_CODE (y) != REG)
3803 break;
3804 if (REGNO (y) < 16)
3805 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3806 << (REGNO (y) +
3807 subreg_regno_offset (REGNO (y),
3808 GET_MODE (y),
3809 SUBREG_BYTE (x),
3810 GET_MODE (x)) + is_dest));
3811 return 0;
3813 case SET:
3814 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3815 case RETURN:
3816 /* If there was a return value, it must have been indicated with USE. */
3817 return 0x00ffff00;
3818 case CLOBBER:
3819 is_dest = 1;
3820 break;
3821 case MEM:
3822 is_dest = 0;
3823 break;
3824 case CALL:
3825 used |= 0x00ff00f0;
3826 break;
3827 default:
3828 break;
3831 fmt = GET_RTX_FORMAT (code);
3833 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3835 if (fmt[i] == 'E')
3837 register int j;
3838 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3839 used |= regs_used (XVECEXP (x, i, j), is_dest);
3841 else if (fmt[i] == 'e')
3842 used |= regs_used (XEXP (x, i), is_dest);
3844 return used;
3847 /* Create an instruction that prevents redirection of a conditional branch
3848 to the destination of the JUMP with address ADDR.
3849 If the branch needs to be implemented as an indirect jump, try to find
3850 a scratch register for it.
3851 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3852 If any preceding insn that doesn't fit into a delay slot is good enough,
3853 pass 1. Pass 2 if a definite blocking insn is needed.
3854 -1 is used internally to avoid deep recursion.
3855 If a blocking instruction is made or recognized, return it. */
3857 static rtx
3858 gen_block_redirect (rtx jump, int addr, int need_block)
3860 int dead = 0;
3861 rtx prev = prev_nonnote_insn (jump);
3862 rtx dest;
3864 /* First, check if we already have an instruction that satisfies our need. */
3865 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3867 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3868 return prev;
3869 if (GET_CODE (PATTERN (prev)) == USE
3870 || GET_CODE (PATTERN (prev)) == CLOBBER
3871 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3872 prev = jump;
3873 else if ((need_block &= ~1) < 0)
3874 return prev;
3875 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3876 need_block = 0;
3878 if (GET_CODE (PATTERN (jump)) == RETURN)
3880 if (! need_block)
3881 return prev;
3882 /* Reorg even does nasty things with return insns that cause branches
3883 to go out of range - see find_end_label and callers. */
3884 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3886 /* We can't use JUMP_LABEL here because it might be undefined
3887 when not optimizing. */
3888 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3889 /* If the branch is out of range, try to find a scratch register for it. */
3890 if (optimize
3891 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3892 > 4092 + 4098))
3894 rtx scan;
3895 /* Don't look for the stack pointer as a scratch register,
3896 it would cause trouble if an interrupt occurred. */
3897 unsigned try = 0x7fff, used;
3898 int jump_left = flag_expensive_optimizations + 1;
3900 /* It is likely that the most recent eligible instruction is wanted for
3901 the delay slot. Therefore, find out which registers it uses, and
3902 try to avoid using them. */
3904 for (scan = jump; (scan = PREV_INSN (scan)); )
3906 enum rtx_code code;
3908 if (INSN_DELETED_P (scan))
3909 continue;
3910 code = GET_CODE (scan);
3911 if (code == CODE_LABEL || code == JUMP_INSN)
3912 break;
3913 if (code == INSN
3914 && GET_CODE (PATTERN (scan)) != USE
3915 && GET_CODE (PATTERN (scan)) != CLOBBER
3916 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3918 try &= ~regs_used (PATTERN (scan), 0);
3919 break;
3922 for (used = dead = 0, scan = JUMP_LABEL (jump);
3923 (scan = NEXT_INSN (scan)); )
3925 enum rtx_code code;
3927 if (INSN_DELETED_P (scan))
3928 continue;
3929 code = GET_CODE (scan);
3930 if (INSN_P (scan))
3932 used |= regs_used (PATTERN (scan), 0);
3933 if (code == CALL_INSN)
3934 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3935 dead |= (used >> 16) & ~used;
3936 if (dead & try)
3938 dead &= try;
3939 break;
3941 if (code == JUMP_INSN)
3943 if (jump_left-- && simplejump_p (scan))
3944 scan = JUMP_LABEL (scan);
3945 else
3946 break;
3950 /* Mask out the stack pointer again, in case it was
3951 the only 'free' register we have found. */
3952 dead &= 0x7fff;
3954 /* If the immediate destination is still in range, check for possible
3955 threading with a jump beyond the delay slot insn.
3956 Don't check if we are called recursively; the jump has been or will be
3957 checked in a different invocation then. */
3959 else if (optimize && need_block >= 0)
3961 rtx next = next_active_insn (next_active_insn (dest));
3962 if (next && GET_CODE (next) == JUMP_INSN
3963 && GET_CODE (PATTERN (next)) == SET
3964 && recog_memoized (next) == CODE_FOR_jump_compact)
3966 dest = JUMP_LABEL (next);
3967 if (dest
3968 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3969 > 4092 + 4098))
3970 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3974 if (dead)
3976 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3978 /* It would be nice if we could convert the jump into an indirect
3979 jump / far branch right now, and thus exposing all constituent
3980 instructions to further optimization. However, reorg uses
3981 simplejump_p to determine if there is an unconditional jump where
3982 it should try to schedule instructions from the target of the
3983 branch; simplejump_p fails for indirect jumps even if they have
3984 a JUMP_LABEL. */
3985 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3986 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3987 , jump);
3988 /* ??? We would like this to have the scope of the jump, but that
3989 scope will change when a delay slot insn of an inner scope is added.
3990 Hence, after delay slot scheduling, we'll have to expect
3991 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3992 the jump. */
3994 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3995 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3996 return insn;
3998 else if (need_block)
3999 /* We can't use JUMP_LABEL here because it might be undefined
4000 when not optimizing. */
4001 return emit_insn_before (gen_block_branch_redirect
4002 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4003 , jump);
4004 return prev;
4007 #define CONDJUMP_MIN -252
4008 #define CONDJUMP_MAX 262
4009 struct far_branch
4011 /* A label (to be placed) in front of the jump
4012 that jumps to our ultimate destination. */
4013 rtx near_label;
4014 /* Where we are going to insert it if we cannot move the jump any farther,
4015 or the jump itself if we have picked up an existing jump. */
4016 rtx insert_place;
4017 /* The ultimate destination. */
4018 rtx far_label;
4019 struct far_branch *prev;
4020 /* If the branch has already been created, its address;
4021 else the address of its first prospective user. */
4022 int address;
4025 static void gen_far_branch (struct far_branch *);
4026 enum mdep_reorg_phase_e mdep_reorg_phase;
4027 static void
4028 gen_far_branch (struct far_branch *bp)
4030 rtx insn = bp->insert_place;
4031 rtx jump;
4032 rtx label = gen_label_rtx ();
4033 int ok;
4035 emit_label_after (label, insn);
4036 if (bp->far_label)
4038 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4039 LABEL_NUSES (bp->far_label)++;
4041 else
4042 jump = emit_jump_insn_after (gen_return (), insn);
4043 /* Emit a barrier so that reorg knows that any following instructions
4044 are not reachable via a fall-through path.
4045 But don't do this when not optimizing, since we wouldn't suppress the
4046 alignment for the barrier then, and could end up with out-of-range
4047 pc-relative loads. */
4048 if (optimize)
4049 emit_barrier_after (jump);
4050 emit_label_after (bp->near_label, insn);
4051 JUMP_LABEL (jump) = bp->far_label;
4052 ok = invert_jump (insn, label, 1);
4053 gcc_assert (ok);
4055 /* If we are branching around a jump (rather than a return), prevent
4056 reorg from using an insn from the jump target as the delay slot insn -
4057 when reorg did this, it pessimized code (we rather hide the delay slot)
4058 and it could cause branches to go out of range. */
4059 if (bp->far_label)
4060 (emit_insn_after
4061 (gen_stuff_delay_slot
4062 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4063 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4064 insn));
4065 /* Prevent reorg from undoing our splits. */
4066 gen_block_redirect (jump, bp->address += 2, 2);
4069 /* Fix up ADDR_DIFF_VECs. */
4070 void
4071 fixup_addr_diff_vecs (rtx first)
4073 rtx insn;
4075 for (insn = first; insn; insn = NEXT_INSN (insn))
4077 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4079 if (GET_CODE (insn) != JUMP_INSN
4080 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4081 continue;
4082 pat = PATTERN (insn);
4083 vec_lab = XEXP (XEXP (pat, 0), 0);
4085 /* Search the matching casesi_jump_2. */
4086 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4088 if (GET_CODE (prev) != JUMP_INSN)
4089 continue;
4090 prevpat = PATTERN (prev);
4091 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4092 continue;
4093 x = XVECEXP (prevpat, 0, 1);
4094 if (GET_CODE (x) != USE)
4095 continue;
4096 x = XEXP (x, 0);
4097 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4098 break;
4100 /* FIXME: This is a bug in the optimizer, but it seems harmless
4101 to just avoid panicing. */
4102 if (!prev)
4103 continue;
4105 /* Emit the reference label of the braf where it belongs, right after
4106 the casesi_jump_2 (i.e. braf). */
4107 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4108 emit_label_after (braf_label, prev);
4110 /* Fix up the ADDR_DIF_VEC to be relative
4111 to the reference address of the braf. */
4112 XEXP (XEXP (pat, 0), 0) = braf_label;
4116 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4117 a barrier. Return the base 2 logarithm of the desired alignment. */
4119 barrier_align (rtx barrier_or_label)
4121 rtx next = next_real_insn (barrier_or_label), pat, prev;
4122 int slot, credit, jump_to_next = 0;
4124 if (! next)
4125 return 0;
4127 pat = PATTERN (next);
4129 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4130 return 2;
4132 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4133 /* This is a barrier in front of a constant table. */
4134 return 0;
4136 prev = prev_real_insn (barrier_or_label);
4137 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4139 pat = PATTERN (prev);
4140 /* If this is a very small table, we want to keep the alignment after
4141 the table to the minimum for proper code alignment. */
4142 return ((TARGET_SMALLCODE
4143 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4144 <= (unsigned) 1 << (CACHE_LOG - 2)))
4145 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4148 if (TARGET_SMALLCODE)
4149 return 0;
4151 if (! TARGET_SH2 || ! optimize)
4152 return align_jumps_log;
4154 /* When fixing up pcloads, a constant table might be inserted just before
4155 the basic block that ends with the barrier. Thus, we can't trust the
4156 instruction lengths before that. */
4157 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4159 /* Check if there is an immediately preceding branch to the insn beyond
4160 the barrier. We must weight the cost of discarding useful information
4161 from the current cache line when executing this branch and there is
4162 an alignment, against that of fetching unneeded insn in front of the
4163 branch target when there is no alignment. */
4165 /* There are two delay_slot cases to consider. One is the simple case
4166 where the preceding branch is to the insn beyond the barrier (simple
4167 delay slot filling), and the other is where the preceding branch has
4168 a delay slot that is a duplicate of the insn after the barrier
4169 (fill_eager_delay_slots) and the branch is to the insn after the insn
4170 after the barrier. */
4172 /* PREV is presumed to be the JUMP_INSN for the barrier under
4173 investigation. Skip to the insn before it. */
4174 prev = prev_real_insn (prev);
4176 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4177 credit >= 0 && prev && GET_CODE (prev) == INSN;
4178 prev = prev_real_insn (prev))
4180 jump_to_next = 0;
4181 if (GET_CODE (PATTERN (prev)) == USE
4182 || GET_CODE (PATTERN (prev)) == CLOBBER)
4183 continue;
4184 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4186 prev = XVECEXP (PATTERN (prev), 0, 1);
4187 if (INSN_UID (prev) == INSN_UID (next))
4189 /* Delay slot was filled with insn at jump target. */
4190 jump_to_next = 1;
4191 continue;
4195 if (slot &&
4196 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4197 slot = 0;
4198 credit -= get_attr_length (prev);
4200 if (prev
4201 && GET_CODE (prev) == JUMP_INSN
4202 && JUMP_LABEL (prev))
4204 rtx x;
4205 if (jump_to_next
4206 || next_real_insn (JUMP_LABEL (prev)) == next
4207 /* If relax_delay_slots() decides NEXT was redundant
4208 with some previous instruction, it will have
4209 redirected PREV's jump to the following insn. */
4210 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4211 /* There is no upper bound on redundant instructions
4212 that might have been skipped, but we must not put an
4213 alignment where none had been before. */
4214 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4215 (INSN_P (x)
4216 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4217 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4218 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4220 rtx pat = PATTERN (prev);
4221 if (GET_CODE (pat) == PARALLEL)
4222 pat = XVECEXP (pat, 0, 0);
4223 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4224 return 0;
4229 return align_jumps_log;
4232 /* If we are inside a phony loop, almost any kind of label can turn up as the
4233 first one in the loop. Aligning a braf label causes incorrect switch
4234 destination addresses; we can detect braf labels because they are
4235 followed by a BARRIER.
4236 Applying loop alignment to small constant or switch tables is a waste
4237 of space, so we suppress this too. */
4239 sh_loop_align (rtx label)
4241 rtx next = label;
4244 next = next_nonnote_insn (next);
4245 while (next && GET_CODE (next) == CODE_LABEL);
4247 if (! next
4248 || ! INSN_P (next)
4249 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4250 || recog_memoized (next) == CODE_FOR_consttable_2)
4251 return 0;
4253 return align_loops_log;
4256 /* Do a final pass over the function, just before delayed branch
4257 scheduling. */
4259 static void
4260 sh_reorg (void)
4262 rtx first, insn, mova = NULL_RTX;
4263 int num_mova;
4264 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4265 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4267 first = get_insns ();
4269 /* We must split call insns before introducing `mova's. If we're
4270 optimizing, they'll have already been split. Otherwise, make
4271 sure we don't split them too late. */
4272 if (! optimize)
4273 split_all_insns_noflow ();
4275 if (TARGET_SHMEDIA)
4276 return;
4278 /* If relaxing, generate pseudo-ops to associate function calls with
4279 the symbols they call. It does no harm to not generate these
4280 pseudo-ops. However, when we can generate them, it enables to
4281 linker to potentially relax the jsr to a bsr, and eliminate the
4282 register load and, possibly, the constant pool entry. */
4284 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4285 if (TARGET_RELAX)
4287 /* Remove all REG_LABEL notes. We want to use them for our own
4288 purposes. This works because none of the remaining passes
4289 need to look at them.
4291 ??? But it may break in the future. We should use a machine
4292 dependent REG_NOTE, or some other approach entirely. */
4293 for (insn = first; insn; insn = NEXT_INSN (insn))
4295 if (INSN_P (insn))
4297 rtx note;
4299 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4300 remove_note (insn, note);
4304 for (insn = first; insn; insn = NEXT_INSN (insn))
4306 rtx pattern, reg, link, set, scan, dies, label;
4307 int rescan = 0, foundinsn = 0;
4309 if (GET_CODE (insn) == CALL_INSN)
4311 pattern = PATTERN (insn);
4313 if (GET_CODE (pattern) == PARALLEL)
4314 pattern = XVECEXP (pattern, 0, 0);
4315 if (GET_CODE (pattern) == SET)
4316 pattern = SET_SRC (pattern);
4318 if (GET_CODE (pattern) != CALL
4319 || GET_CODE (XEXP (pattern, 0)) != MEM)
4320 continue;
4322 reg = XEXP (XEXP (pattern, 0), 0);
4324 else
4326 reg = sfunc_uses_reg (insn);
4327 if (! reg)
4328 continue;
4331 if (GET_CODE (reg) != REG)
4332 continue;
4334 /* This is a function call via REG. If the only uses of REG
4335 between the time that it is set and the time that it dies
4336 are in function calls, then we can associate all the
4337 function calls with the setting of REG. */
4339 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4341 if (REG_NOTE_KIND (link) != 0)
4342 continue;
4343 set = single_set (XEXP (link, 0));
4344 if (set && rtx_equal_p (reg, SET_DEST (set)))
4346 link = XEXP (link, 0);
4347 break;
4351 if (! link)
4353 /* ??? Sometimes global register allocation will have
4354 deleted the insn pointed to by LOG_LINKS. Try
4355 scanning backward to find where the register is set. */
4356 for (scan = PREV_INSN (insn);
4357 scan && GET_CODE (scan) != CODE_LABEL;
4358 scan = PREV_INSN (scan))
4360 if (! INSN_P (scan))
4361 continue;
4363 if (! reg_mentioned_p (reg, scan))
4364 continue;
4366 if (noncall_uses_reg (reg, scan, &set))
4367 break;
4369 if (set)
4371 link = scan;
4372 break;
4377 if (! link)
4378 continue;
4380 /* The register is set at LINK. */
4382 /* We can only optimize the function call if the register is
4383 being set to a symbol. In theory, we could sometimes
4384 optimize calls to a constant location, but the assembler
4385 and linker do not support that at present. */
4386 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4387 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4388 continue;
4390 /* Scan forward from LINK to the place where REG dies, and
4391 make sure that the only insns which use REG are
4392 themselves function calls. */
4394 /* ??? This doesn't work for call targets that were allocated
4395 by reload, since there may not be a REG_DEAD note for the
4396 register. */
4398 dies = NULL_RTX;
4399 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4401 rtx scanset;
4403 /* Don't try to trace forward past a CODE_LABEL if we haven't
4404 seen INSN yet. Ordinarily, we will only find the setting insn
4405 in LOG_LINKS if it is in the same basic block. However,
4406 cross-jumping can insert code labels in between the load and
4407 the call, and can result in situations where a single call
4408 insn may have two targets depending on where we came from. */
4410 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4411 break;
4413 if (! INSN_P (scan))
4414 continue;
4416 /* Don't try to trace forward past a JUMP. To optimize
4417 safely, we would have to check that all the
4418 instructions at the jump destination did not use REG. */
4420 if (GET_CODE (scan) == JUMP_INSN)
4421 break;
4423 if (! reg_mentioned_p (reg, scan))
4424 continue;
4426 if (noncall_uses_reg (reg, scan, &scanset))
4427 break;
4429 if (scan == insn)
4430 foundinsn = 1;
4432 if (scan != insn
4433 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4435 /* There is a function call to this register other
4436 than the one we are checking. If we optimize
4437 this call, we need to rescan again below. */
4438 rescan = 1;
4441 /* ??? We shouldn't have to worry about SCANSET here.
4442 We should just be able to check for a REG_DEAD note
4443 on a function call. However, the REG_DEAD notes are
4444 apparently not dependable around libcalls; c-torture
4445 execute/920501-2 is a test case. If SCANSET is set,
4446 then this insn sets the register, so it must have
4447 died earlier. Unfortunately, this will only handle
4448 the cases in which the register is, in fact, set in a
4449 later insn. */
4451 /* ??? We shouldn't have to use FOUNDINSN here.
4452 However, the LOG_LINKS fields are apparently not
4453 entirely reliable around libcalls;
4454 newlib/libm/math/e_pow.c is a test case. Sometimes
4455 an insn will appear in LOG_LINKS even though it is
4456 not the most recent insn which sets the register. */
4458 if (foundinsn
4459 && (scanset
4460 || find_reg_note (scan, REG_DEAD, reg)))
4462 dies = scan;
4463 break;
4467 if (! dies)
4469 /* Either there was a branch, or some insn used REG
4470 other than as a function call address. */
4471 continue;
4474 /* Create a code label, and put it in a REG_LABEL note on
4475 the insn which sets the register, and on each call insn
4476 which uses the register. In final_prescan_insn we look
4477 for the REG_LABEL notes, and output the appropriate label
4478 or pseudo-op. */
4480 label = gen_label_rtx ();
4481 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4482 REG_NOTES (link));
4483 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4484 REG_NOTES (insn));
4485 if (rescan)
4487 scan = link;
4490 rtx reg2;
4492 scan = NEXT_INSN (scan);
4493 if (scan != insn
4494 && ((GET_CODE (scan) == CALL_INSN
4495 && reg_mentioned_p (reg, scan))
4496 || ((reg2 = sfunc_uses_reg (scan))
4497 && REGNO (reg2) == REGNO (reg))))
4498 REG_NOTES (scan)
4499 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4501 while (scan != dies);
4506 if (TARGET_SH2)
4507 fixup_addr_diff_vecs (first);
4509 if (optimize)
4511 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4512 shorten_branches (first);
4514 /* Scan the function looking for move instructions which have to be
4515 changed to pc-relative loads and insert the literal tables. */
4517 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4518 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4520 if (mova_p (insn))
4522 /* ??? basic block reordering can move a switch table dispatch
4523 below the switch table. Check if that has happened.
4524 We only have the addresses available when optimizing; but then,
4525 this check shouldn't be needed when not optimizing. */
4526 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4527 if (optimize
4528 && (INSN_ADDRESSES (INSN_UID (insn))
4529 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4531 /* Change the mova into a load.
4532 broken_move will then return true for it. */
4533 fixup_mova (insn);
4535 else if (! num_mova++)
4536 mova = insn;
4538 else if (GET_CODE (insn) == JUMP_INSN
4539 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4540 && num_mova)
4542 rtx scan;
4543 int total;
4545 num_mova--;
4547 /* Some code might have been inserted between the mova and
4548 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4549 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4550 total += get_attr_length (scan);
4552 /* range of mova is 1020, add 4 because pc counts from address of
4553 second instruction after this one, subtract 2 in case pc is 2
4554 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4555 cancels out with alignment effects of the mova itself. */
4556 if (total > 1022)
4558 /* Change the mova into a load, and restart scanning
4559 there. broken_move will then return true for mova. */
4560 fixup_mova (mova);
4561 insn = mova;
4564 if (broken_move (insn)
4565 || (GET_CODE (insn) == INSN
4566 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4568 rtx scan;
4569 /* Scan ahead looking for a barrier to stick the constant table
4570 behind. */
4571 rtx barrier = find_barrier (num_mova, mova, insn);
4572 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4573 int need_aligned_label = 0;
4575 if (num_mova && ! mova_p (mova))
4577 /* find_barrier had to change the first mova into a
4578 pcload; thus, we have to start with this new pcload. */
4579 insn = mova;
4580 num_mova = 0;
4582 /* Now find all the moves between the points and modify them. */
4583 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4585 if (GET_CODE (scan) == CODE_LABEL)
4586 last_float = 0;
4587 if (GET_CODE (scan) == INSN
4588 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4589 need_aligned_label = 1;
4590 if (broken_move (scan))
4592 rtx *patp = &PATTERN (scan), pat = *patp;
4593 rtx src, dst;
4594 rtx lab;
4595 rtx newsrc;
4596 enum machine_mode mode;
4598 if (GET_CODE (pat) == PARALLEL)
4599 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4600 src = SET_SRC (pat);
4601 dst = SET_DEST (pat);
4602 mode = GET_MODE (dst);
4604 if (mode == SImode && hi_const (src)
4605 && REGNO (dst) != FPUL_REG)
4607 int offset = 0;
4609 mode = HImode;
4610 while (GET_CODE (dst) == SUBREG)
4612 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4613 GET_MODE (SUBREG_REG (dst)),
4614 SUBREG_BYTE (dst),
4615 GET_MODE (dst));
4616 dst = SUBREG_REG (dst);
4618 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4620 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4622 /* This must be an insn that clobbers r0. */
4623 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4624 XVECLEN (PATTERN (scan), 0)
4625 - 1);
4626 rtx clobber = *clobberp;
4628 gcc_assert (GET_CODE (clobber) == CLOBBER
4629 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4631 if (last_float
4632 && reg_set_between_p (r0_rtx, last_float_move, scan))
4633 last_float = 0;
4634 if (last_float
4635 && TARGET_SHCOMPACT
4636 && GET_MODE_SIZE (mode) != 4
4637 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4638 last_float = 0;
4639 lab = add_constant (src, mode, last_float);
4640 if (lab)
4641 emit_insn_before (gen_mova (lab), scan);
4642 else
4644 /* There will be a REG_UNUSED note for r0 on
4645 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4646 lest reorg:mark_target_live_regs will not
4647 consider r0 to be used, and we end up with delay
4648 slot insn in front of SCAN that clobbers r0. */
4649 rtx note
4650 = find_regno_note (last_float_move, REG_UNUSED, 0);
4652 /* If we are not optimizing, then there may not be
4653 a note. */
4654 if (note)
4655 PUT_MODE (note, REG_INC);
4657 *last_float_addr = r0_inc_rtx;
4659 last_float_move = scan;
4660 last_float = src;
4661 newsrc = gen_const_mem (mode,
4662 (((TARGET_SH4 && ! TARGET_FMOVD)
4663 || REGNO (dst) == FPUL_REG)
4664 ? r0_inc_rtx
4665 : r0_rtx));
4666 last_float_addr = &XEXP (newsrc, 0);
4668 /* Remove the clobber of r0. */
4669 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4670 gen_rtx_SCRATCH (Pmode));
4672 /* This is a mova needing a label. Create it. */
4673 else if (GET_CODE (src) == UNSPEC
4674 && XINT (src, 1) == UNSPEC_MOVA
4675 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4677 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4678 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4679 newsrc = gen_rtx_UNSPEC (SImode,
4680 gen_rtvec (1, newsrc),
4681 UNSPEC_MOVA);
4683 else
4685 lab = add_constant (src, mode, 0);
4686 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4687 newsrc = gen_const_mem (mode, newsrc);
4689 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4690 INSN_CODE (scan) = -1;
4693 dump_table (need_aligned_label ? insn : 0, barrier);
4694 insn = barrier;
4698 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4699 INSN_ADDRESSES_FREE ();
4700 split_branches (first);
4702 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4703 also has an effect on the register that holds the address of the sfunc.
4704 Insert an extra dummy insn in front of each sfunc that pretends to
4705 use this register. */
4706 if (flag_delayed_branch)
4708 for (insn = first; insn; insn = NEXT_INSN (insn))
4710 rtx reg = sfunc_uses_reg (insn);
4712 if (! reg)
4713 continue;
4714 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4717 #if 0
4718 /* fpscr is not actually a user variable, but we pretend it is for the
4719 sake of the previous optimization passes, since we want it handled like
4720 one. However, we don't have any debugging information for it, so turn
4721 it into a non-user variable now. */
4722 if (TARGET_SH4)
4723 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4724 #endif
4725 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4729 get_dest_uid (rtx label, int max_uid)
4731 rtx dest = next_real_insn (label);
4732 int dest_uid;
4733 if (! dest)
4734 /* This can happen for an undefined label. */
4735 return 0;
4736 dest_uid = INSN_UID (dest);
4737 /* If this is a newly created branch redirection blocking instruction,
4738 we cannot index the branch_uid or insn_addresses arrays with its
4739 uid. But then, we won't need to, because the actual destination is
4740 the following branch. */
4741 while (dest_uid >= max_uid)
4743 dest = NEXT_INSN (dest);
4744 dest_uid = INSN_UID (dest);
4746 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4747 return 0;
4748 return dest_uid;
4751 /* Split condbranches that are out of range. Also add clobbers for
4752 scratch registers that are needed in far jumps.
4753 We do this before delay slot scheduling, so that it can take our
4754 newly created instructions into account. It also allows us to
4755 find branches with common targets more easily. */
4757 static void
4758 split_branches (rtx first)
4760 rtx insn;
4761 struct far_branch **uid_branch, *far_branch_list = 0;
4762 int max_uid = get_max_uid ();
4763 int ok;
4765 /* Find out which branches are out of range. */
4766 shorten_branches (first);
4768 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4769 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4771 for (insn = first; insn; insn = NEXT_INSN (insn))
4772 if (! INSN_P (insn))
4773 continue;
4774 else if (INSN_DELETED_P (insn))
4776 /* Shorten_branches would split this instruction again,
4777 so transform it into a note. */
4778 PUT_CODE (insn, NOTE);
4779 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4780 NOTE_SOURCE_FILE (insn) = 0;
4782 else if (GET_CODE (insn) == JUMP_INSN
4783 /* Don't mess with ADDR_DIFF_VEC */
4784 && (GET_CODE (PATTERN (insn)) == SET
4785 || GET_CODE (PATTERN (insn)) == RETURN))
4787 enum attr_type type = get_attr_type (insn);
4788 if (type == TYPE_CBRANCH)
4790 rtx next, beyond;
4792 if (get_attr_length (insn) > 4)
4794 rtx src = SET_SRC (PATTERN (insn));
4795 rtx olabel = XEXP (XEXP (src, 1), 0);
4796 int addr = INSN_ADDRESSES (INSN_UID (insn));
4797 rtx label = 0;
4798 int dest_uid = get_dest_uid (olabel, max_uid);
4799 struct far_branch *bp = uid_branch[dest_uid];
4801 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4802 the label if the LABEL_NUSES count drops to zero. There is
4803 always a jump_optimize pass that sets these values, but it
4804 proceeds to delete unreferenced code, and then if not
4805 optimizing, to un-delete the deleted instructions, thus
4806 leaving labels with too low uses counts. */
4807 if (! optimize)
4809 JUMP_LABEL (insn) = olabel;
4810 LABEL_NUSES (olabel)++;
4812 if (! bp)
4814 bp = (struct far_branch *) alloca (sizeof *bp);
4815 uid_branch[dest_uid] = bp;
4816 bp->prev = far_branch_list;
4817 far_branch_list = bp;
4818 bp->far_label
4819 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4820 LABEL_NUSES (bp->far_label)++;
4822 else
4824 label = bp->near_label;
4825 if (! label && bp->address - addr >= CONDJUMP_MIN)
4827 rtx block = bp->insert_place;
4829 if (GET_CODE (PATTERN (block)) == RETURN)
4830 block = PREV_INSN (block);
4831 else
4832 block = gen_block_redirect (block,
4833 bp->address, 2);
4834 label = emit_label_after (gen_label_rtx (),
4835 PREV_INSN (block));
4836 bp->near_label = label;
4838 else if (label && ! NEXT_INSN (label))
4840 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4841 bp->insert_place = insn;
4842 else
4843 gen_far_branch (bp);
4846 if (! label
4847 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4849 bp->near_label = label = gen_label_rtx ();
4850 bp->insert_place = insn;
4851 bp->address = addr;
4853 ok = redirect_jump (insn, label, 1);
4854 gcc_assert (ok);
4856 else
4858 /* get_attr_length (insn) == 2 */
4859 /* Check if we have a pattern where reorg wants to redirect
4860 the branch to a label from an unconditional branch that
4861 is too far away. */
4862 /* We can't use JUMP_LABEL here because it might be undefined
4863 when not optimizing. */
4864 /* A syntax error might cause beyond to be NULL_RTX. */
4865 beyond
4866 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4867 0));
4869 if (beyond
4870 && (GET_CODE (beyond) == JUMP_INSN
4871 || ((beyond = next_active_insn (beyond))
4872 && GET_CODE (beyond) == JUMP_INSN))
4873 && GET_CODE (PATTERN (beyond)) == SET
4874 && recog_memoized (beyond) == CODE_FOR_jump_compact
4875 && ((INSN_ADDRESSES
4876 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4877 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4878 > 252 + 258 + 2))
4879 gen_block_redirect (beyond,
4880 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4883 next = next_active_insn (insn);
4885 if ((GET_CODE (next) == JUMP_INSN
4886 || ((next = next_active_insn (next))
4887 && GET_CODE (next) == JUMP_INSN))
4888 && GET_CODE (PATTERN (next)) == SET
4889 && recog_memoized (next) == CODE_FOR_jump_compact
4890 && ((INSN_ADDRESSES
4891 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4892 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4893 > 252 + 258 + 2))
4894 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4896 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4898 int addr = INSN_ADDRESSES (INSN_UID (insn));
4899 rtx far_label = 0;
4900 int dest_uid = 0;
4901 struct far_branch *bp;
4903 if (type == TYPE_JUMP)
4905 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4906 dest_uid = get_dest_uid (far_label, max_uid);
4907 if (! dest_uid)
4909 /* Parse errors can lead to labels outside
4910 the insn stream. */
4911 if (! NEXT_INSN (far_label))
4912 continue;
4914 if (! optimize)
4916 JUMP_LABEL (insn) = far_label;
4917 LABEL_NUSES (far_label)++;
4919 redirect_jump (insn, NULL_RTX, 1);
4920 far_label = 0;
4923 bp = uid_branch[dest_uid];
4924 if (! bp)
4926 bp = (struct far_branch *) alloca (sizeof *bp);
4927 uid_branch[dest_uid] = bp;
4928 bp->prev = far_branch_list;
4929 far_branch_list = bp;
4930 bp->near_label = 0;
4931 bp->far_label = far_label;
4932 if (far_label)
4933 LABEL_NUSES (far_label)++;
4935 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4936 if (addr - bp->address <= CONDJUMP_MAX)
4937 emit_label_after (bp->near_label, PREV_INSN (insn));
4938 else
4940 gen_far_branch (bp);
4941 bp->near_label = 0;
4943 else
4944 bp->near_label = 0;
4945 bp->address = addr;
4946 bp->insert_place = insn;
4947 if (! far_label)
4948 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4949 else
4950 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4953 /* Generate all pending far branches,
4954 and free our references to the far labels. */
4955 while (far_branch_list)
4957 if (far_branch_list->near_label
4958 && ! NEXT_INSN (far_branch_list->near_label))
4959 gen_far_branch (far_branch_list);
4960 if (optimize
4961 && far_branch_list->far_label
4962 && ! --LABEL_NUSES (far_branch_list->far_label))
4963 delete_insn (far_branch_list->far_label);
4964 far_branch_list = far_branch_list->prev;
4967 /* Instruction length information is no longer valid due to the new
4968 instructions that have been generated. */
4969 init_insn_lengths ();
4972 /* Dump out instruction addresses, which is useful for debugging the
4973 constant pool table stuff.
4975 If relaxing, output the label and pseudo-ops used to link together
4976 calls and the instruction which set the registers. */
4978 /* ??? The addresses printed by this routine for insns are nonsense for
4979 insns which are inside of a sequence where none of the inner insns have
4980 variable length. This is because the second pass of shorten_branches
4981 does not bother to update them. */
4983 void
4984 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4985 int noperands ATTRIBUTE_UNUSED)
4987 if (TARGET_DUMPISIZE)
4988 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4990 if (TARGET_RELAX)
4992 rtx note;
4994 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4995 if (note)
4997 rtx pattern;
4999 pattern = PATTERN (insn);
5000 if (GET_CODE (pattern) == PARALLEL)
5001 pattern = XVECEXP (pattern, 0, 0);
5002 switch (GET_CODE (pattern))
5004 case SET:
5005 if (GET_CODE (SET_SRC (pattern)) != CALL
5006 && get_attr_type (insn) != TYPE_SFUNC)
5008 targetm.asm_out.internal_label
5009 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5010 break;
5012 /* else FALLTHROUGH */
5013 case CALL:
5014 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5015 CODE_LABEL_NUMBER (XEXP (note, 0)));
5016 break;
5018 default:
5019 gcc_unreachable ();
5025 /* Dump out any constants accumulated in the final pass. These will
5026 only be labels. */
5028 const char *
5029 output_jump_label_table (void)
5031 int i;
5033 if (pool_size)
5035 fprintf (asm_out_file, "\t.align 2\n");
5036 for (i = 0; i < pool_size; i++)
5038 pool_node *p = &pool_vector[i];
5040 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5041 CODE_LABEL_NUMBER (p->label));
5042 output_asm_insn (".long %O0", &p->value);
5044 pool_size = 0;
5047 return "";
5050 /* A full frame looks like:
5052 arg-5
5053 arg-4
5054 [ if current_function_anonymous_args
5055 arg-3
5056 arg-2
5057 arg-1
5058 arg-0 ]
5059 saved-fp
5060 saved-r10
5061 saved-r11
5062 saved-r12
5063 saved-pr
5064 local-n
5066 local-1
5067 local-0 <- fp points here. */
5069 /* Number of bytes pushed for anonymous args, used to pass information
5070 between expand_prologue and expand_epilogue. */
5072 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5073 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5074 for an epilogue and a negative value means that it's for a sibcall
5075 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5076 all the registers that are about to be restored, and hence dead. */
5078 static void
5079 output_stack_adjust (int size, rtx reg, int epilogue_p,
5080 HARD_REG_SET *live_regs_mask)
5082 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5083 if (size)
5085 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5087 /* This test is bogus, as output_stack_adjust is used to re-align the
5088 stack. */
5089 #if 0
5090 gcc_assert (!(size % align));
5091 #endif
5093 if (CONST_OK_FOR_ADD (size))
5094 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5095 /* Try to do it with two partial adjustments; however, we must make
5096 sure that the stack is properly aligned at all times, in case
5097 an interrupt occurs between the two partial adjustments. */
5098 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5099 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5101 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5102 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5104 else
5106 rtx const_reg;
5107 rtx insn;
5108 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5109 int i;
5111 /* If TEMP is invalid, we could temporarily save a general
5112 register to MACL. However, there is currently no need
5113 to handle this case, so just die when we see it. */
5114 if (epilogue_p < 0
5115 || current_function_interrupt
5116 || ! call_really_used_regs[temp] || fixed_regs[temp])
5117 temp = -1;
5118 if (temp < 0 && ! current_function_interrupt
5119 && (TARGET_SHMEDIA || epilogue_p >= 0))
5121 HARD_REG_SET temps;
5122 COPY_HARD_REG_SET (temps, call_used_reg_set);
5123 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5124 if (epilogue_p > 0)
5126 int nreg = 0;
5127 if (current_function_return_rtx)
5129 enum machine_mode mode;
5130 mode = GET_MODE (current_function_return_rtx);
5131 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5132 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5134 for (i = 0; i < nreg; i++)
5135 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5136 if (current_function_calls_eh_return)
5138 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5139 for (i = 0; i <= 3; i++)
5140 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5143 if (TARGET_SHMEDIA && epilogue_p < 0)
5144 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5145 CLEAR_HARD_REG_BIT (temps, i);
5146 if (epilogue_p <= 0)
5148 for (i = FIRST_PARM_REG;
5149 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5150 CLEAR_HARD_REG_BIT (temps, i);
5151 if (cfun->static_chain_decl != NULL)
5152 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5154 temp = scavenge_reg (&temps);
5156 if (temp < 0 && live_regs_mask)
5158 HARD_REG_SET temps;
5160 COPY_HARD_REG_SET (temps, *live_regs_mask);
5161 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
5162 temp = scavenge_reg (&temps);
5164 if (temp < 0)
5166 rtx adj_reg, tmp_reg, mem;
5168 /* If we reached here, the most likely case is the (sibcall)
5169 epilogue for non SHmedia. Put a special push/pop sequence
5170 for such case as the last resort. This looks lengthy but
5171 would not be problem because it seems to be very
5172 rare. */
5174 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5177 /* ??? There is still the slight possibility that r4 or
5178 r5 have been reserved as fixed registers or assigned
5179 as global registers, and they change during an
5180 interrupt. There are possible ways to handle this:
5182 - If we are adjusting the frame pointer (r14), we can do
5183 with a single temp register and an ordinary push / pop
5184 on the stack.
5185 - Grab any call-used or call-saved registers (i.e. not
5186 fixed or globals) for the temps we need. We might
5187 also grab r14 if we are adjusting the stack pointer.
5188 If we can't find enough available registers, issue
5189 a diagnostic and die - the user must have reserved
5190 way too many registers.
5191 But since all this is rather unlikely to happen and
5192 would require extra testing, we just die if r4 / r5
5193 are not available. */
5194 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5195 && !global_regs[4] && !global_regs[5]);
5197 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5198 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5199 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5200 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5201 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5202 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5203 emit_move_insn (mem, tmp_reg);
5204 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5205 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5206 emit_move_insn (mem, tmp_reg);
5207 emit_move_insn (reg, adj_reg);
5208 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5209 emit_move_insn (adj_reg, mem);
5210 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5211 emit_move_insn (tmp_reg, mem);
5212 /* Tell flow the insns that pop r4/r5 aren't dead. */
5213 emit_insn (gen_rtx_USE (VOIDmode, tmp_reg));
5214 emit_insn (gen_rtx_USE (VOIDmode, adj_reg));
5215 return;
5217 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5219 /* If SIZE is negative, subtract the positive value.
5220 This sometimes allows a constant pool entry to be shared
5221 between prologue and epilogue code. */
5222 if (size < 0)
5224 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5225 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5227 else
5229 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5230 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5232 if (! epilogue_p)
5233 REG_NOTES (insn)
5234 = (gen_rtx_EXPR_LIST
5235 (REG_FRAME_RELATED_EXPR,
5236 gen_rtx_SET (VOIDmode, reg,
5237 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5238 REG_NOTES (insn)));
5243 static rtx
5244 frame_insn (rtx x)
5246 x = emit_insn (x);
5247 RTX_FRAME_RELATED_P (x) = 1;
5248 return x;
5251 /* Output RTL to push register RN onto the stack. */
5253 static rtx
5254 push (int rn)
5256 rtx x;
5257 if (rn == FPUL_REG)
5258 x = gen_push_fpul ();
5259 else if (rn == FPSCR_REG)
5260 x = gen_push_fpscr ();
5261 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5262 && FP_OR_XD_REGISTER_P (rn))
5264 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5265 return NULL_RTX;
5266 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5268 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5269 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5270 else
5271 x = gen_push (gen_rtx_REG (SImode, rn));
5273 x = frame_insn (x);
5274 REG_NOTES (x)
5275 = gen_rtx_EXPR_LIST (REG_INC,
5276 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5277 return x;
5280 /* Output RTL to pop register RN from the stack. */
5282 static void
5283 pop (int rn)
5285 rtx x;
5286 if (rn == FPUL_REG)
5287 x = gen_pop_fpul ();
5288 else if (rn == FPSCR_REG)
5289 x = gen_pop_fpscr ();
5290 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5291 && FP_OR_XD_REGISTER_P (rn))
5293 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5294 return;
5295 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5297 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5298 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5299 else
5300 x = gen_pop (gen_rtx_REG (SImode, rn));
5302 x = emit_insn (x);
5303 REG_NOTES (x)
5304 = gen_rtx_EXPR_LIST (REG_INC,
5305 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5308 /* Generate code to push the regs specified in the mask. */
5310 static void
5311 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5313 int i;
5314 int skip_fpscr = 0;
5316 /* Push PR last; this gives better latencies after the prologue, and
5317 candidates for the return delay slot when there are no general
5318 registers pushed. */
5319 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5321 /* If this is an interrupt handler, and the SZ bit varies,
5322 and we have to push any floating point register, we need
5323 to switch to the correct precision first. */
5324 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5325 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5327 HARD_REG_SET unsaved;
5329 push (FPSCR_REG);
5330 COMPL_HARD_REG_SET (unsaved, *mask);
5331 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5332 skip_fpscr = 1;
5334 if (i != PR_REG
5335 && (i != FPSCR_REG || ! skip_fpscr)
5336 && TEST_HARD_REG_BIT (*mask, i))
5337 push (i);
5339 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5340 push (PR_REG);
5343 /* Calculate how much extra space is needed to save all callee-saved
5344 target registers.
5345 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5347 static int
5348 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5350 int reg;
5351 int stack_space = 0;
5352 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5354 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5355 if ((! call_really_used_regs[reg] || interrupt_handler)
5356 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5357 /* Leave space to save this target register on the stack,
5358 in case target register allocation wants to use it. */
5359 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5360 return stack_space;
5363 /* Decide whether we should reserve space for callee-save target registers,
5364 in case target register allocation wants to use them. REGS_SAVED is
5365 the space, in bytes, that is already required for register saves.
5366 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5368 static int
5369 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5370 HARD_REG_SET *live_regs_mask)
5372 if (optimize_size)
5373 return 0;
5374 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5377 /* Decide how much space to reserve for callee-save target registers
5378 in case target register allocation wants to use them.
5379 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5381 static int
5382 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5384 if (shmedia_space_reserved_for_target_registers)
5385 return shmedia_target_regs_stack_space (live_regs_mask);
5386 else
5387 return 0;
5390 /* Work out the registers which need to be saved, both as a mask and a
5391 count of saved words. Return the count.
5393 If doing a pragma interrupt function, then push all regs used by the
5394 function, and if we call another function (we can tell by looking at PR),
5395 make sure that all the regs it clobbers are safe too. */
5397 static int
5398 calc_live_regs (HARD_REG_SET *live_regs_mask)
5400 unsigned int reg;
5401 int count;
5402 tree attrs;
5403 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5404 bool nosave_low_regs;
5405 int pr_live, has_call;
5407 attrs = DECL_ATTRIBUTES (current_function_decl);
5408 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5409 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5410 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5411 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5413 CLEAR_HARD_REG_SET (*live_regs_mask);
5414 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5415 && regs_ever_live[FPSCR_REG])
5416 target_flags &= ~MASK_FPU_SINGLE;
5417 /* If we can save a lot of saves by switching to double mode, do that. */
5418 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5419 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5420 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5421 && (! call_really_used_regs[reg]
5422 || interrupt_handler)
5423 && ++count > 2)
5425 target_flags &= ~MASK_FPU_SINGLE;
5426 break;
5428 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5429 knows how to use it. That means the pseudo originally allocated for
5430 the initial value can become the PR_MEDIA_REG hard register, as seen for
5431 execute/20010122-1.c:test9. */
5432 if (TARGET_SHMEDIA)
5433 /* ??? this function is called from initial_elimination_offset, hence we
5434 can't use the result of sh_media_register_for_return here. */
5435 pr_live = sh_pr_n_sets ();
5436 else
5438 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5439 pr_live = (pr_initial
5440 ? (GET_CODE (pr_initial) != REG
5441 || REGNO (pr_initial) != (PR_REG))
5442 : regs_ever_live[PR_REG]);
5443 /* For Shcompact, if not optimizing, we end up with a memory reference
5444 using the return address pointer for __builtin_return_address even
5445 though there is no actual need to put the PR register on the stack. */
5446 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5448 /* Force PR to be live if the prologue has to call the SHmedia
5449 argument decoder or register saver. */
5450 if (TARGET_SHCOMPACT
5451 && ((current_function_args_info.call_cookie
5452 & ~ CALL_COOKIE_RET_TRAMP (1))
5453 || current_function_has_nonlocal_label))
5454 pr_live = 1;
5455 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5456 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5458 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5459 ? pr_live
5460 : interrupt_handler
5461 ? (/* Need to save all the regs ever live. */
5462 (regs_ever_live[reg]
5463 || (call_really_used_regs[reg]
5464 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5465 || reg == PIC_OFFSET_TABLE_REGNUM)
5466 && has_call)
5467 || (TARGET_SHMEDIA && has_call
5468 && REGISTER_NATURAL_MODE (reg) == SImode
5469 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5470 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5471 && reg != RETURN_ADDRESS_POINTER_REGNUM
5472 && reg != T_REG && reg != GBR_REG
5473 /* Push fpscr only on targets which have FPU */
5474 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5475 : (/* Only push those regs which are used and need to be saved. */
5476 (TARGET_SHCOMPACT
5477 && flag_pic
5478 && current_function_args_info.call_cookie
5479 && reg == PIC_OFFSET_TABLE_REGNUM)
5480 || (regs_ever_live[reg]
5481 && (!call_really_used_regs[reg]
5482 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5483 || (current_function_calls_eh_return
5484 && (reg == EH_RETURN_DATA_REGNO (0)
5485 || reg == EH_RETURN_DATA_REGNO (1)
5486 || reg == EH_RETURN_DATA_REGNO (2)
5487 || reg == EH_RETURN_DATA_REGNO (3)))
5488 || ((reg == MACL_REG || reg == MACH_REG)
5489 && regs_ever_live[reg]
5490 && sh_cfun_attr_renesas_p ())
5493 SET_HARD_REG_BIT (*live_regs_mask, reg);
5494 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5496 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5497 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5499 if (FP_REGISTER_P (reg))
5501 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5503 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5504 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5507 else if (XD_REGISTER_P (reg))
5509 /* Must switch to double mode to access these registers. */
5510 target_flags &= ~MASK_FPU_SINGLE;
5514 if (nosave_low_regs && reg == R8_REG)
5515 break;
5517 /* If we have a target register optimization pass after prologue / epilogue
5518 threading, we need to assume all target registers will be live even if
5519 they aren't now. */
5520 if (flag_branch_target_load_optimize2
5521 && TARGET_SAVE_ALL_TARGET_REGS
5522 && shmedia_space_reserved_for_target_registers)
5523 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5524 if ((! call_really_used_regs[reg] || interrupt_handler)
5525 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5527 SET_HARD_REG_BIT (*live_regs_mask, reg);
5528 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5530 /* If this is an interrupt handler, we don't have any call-clobbered
5531 registers we can conveniently use for target register save/restore.
5532 Make sure we save at least one general purpose register when we need
5533 to save target registers. */
5534 if (interrupt_handler
5535 && hard_regs_intersect_p (live_regs_mask,
5536 &reg_class_contents[TARGET_REGS])
5537 && ! hard_regs_intersect_p (live_regs_mask,
5538 &reg_class_contents[GENERAL_REGS]))
5540 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5541 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5544 return count;
5547 /* Code to generate prologue and epilogue sequences */
5549 /* PUSHED is the number of bytes that are being pushed on the
5550 stack for register saves. Return the frame size, padded
5551 appropriately so that the stack stays properly aligned. */
5552 static HOST_WIDE_INT
5553 rounded_frame_size (int pushed)
5555 HOST_WIDE_INT size = get_frame_size ();
5556 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5558 return ((size + pushed + align - 1) & -align) - pushed;
5561 /* Choose a call-clobbered target-branch register that remains
5562 unchanged along the whole function. We set it up as the return
5563 value in the prologue. */
5565 sh_media_register_for_return (void)
5567 int regno;
5568 int tr0_used;
5570 if (! current_function_is_leaf)
5571 return -1;
5572 if (lookup_attribute ("interrupt_handler",
5573 DECL_ATTRIBUTES (current_function_decl)))
5574 return -1;
5575 if (sh_cfun_interrupt_handler_p ())
5576 return -1;
5578 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5580 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5581 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5582 return regno;
5584 return -1;
5587 /* The maximum registers we need to save are:
5588 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5589 - 32 floating point registers (for each pair, we save none,
5590 one single precision value, or a double precision value).
5591 - 8 target registers
5592 - add 1 entry for a delimiter. */
5593 #define MAX_SAVED_REGS (62+32+8)
5595 typedef struct save_entry_s
5597 unsigned char reg;
5598 unsigned char mode;
5599 short offset;
5600 } save_entry;
5602 #define MAX_TEMPS 4
5604 /* There will be a delimiter entry with VOIDmode both at the start and the
5605 end of a filled in schedule. The end delimiter has the offset of the
5606 save with the smallest (i.e. most negative) offset. */
5607 typedef struct save_schedule_s
5609 save_entry entries[MAX_SAVED_REGS + 2];
5610 int temps[MAX_TEMPS+1];
5611 } save_schedule;
5613 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5614 use reverse order. Returns the last entry written to (not counting
5615 the delimiter). OFFSET_BASE is a number to be added to all offset
5616 entries. */
5618 static save_entry *
5619 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5620 int offset_base)
5622 int align, i;
5623 save_entry *entry = schedule->entries;
5624 int tmpx = 0;
5625 int offset;
5627 if (! current_function_interrupt)
5628 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5629 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5630 && ! FUNCTION_ARG_REGNO_P (i)
5631 && i != FIRST_RET_REG
5632 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5633 && ! (current_function_calls_eh_return
5634 && (i == EH_RETURN_STACKADJ_REGNO
5635 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5636 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5637 schedule->temps[tmpx++] = i;
5638 entry->reg = -1;
5639 entry->mode = VOIDmode;
5640 entry->offset = offset_base;
5641 entry++;
5642 /* We loop twice: first, we save 8-byte aligned registers in the
5643 higher addresses, that are known to be aligned. Then, we
5644 proceed to saving 32-bit registers that don't need 8-byte
5645 alignment.
5646 If this is an interrupt function, all registers that need saving
5647 need to be saved in full. moreover, we need to postpone saving
5648 target registers till we have saved some general purpose registers
5649 we can then use as scratch registers. */
5650 offset = offset_base;
5651 for (align = 1; align >= 0; align--)
5653 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5654 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5656 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5657 int reg = i;
5659 if (current_function_interrupt)
5661 if (TARGET_REGISTER_P (i))
5662 continue;
5663 if (GENERAL_REGISTER_P (i))
5664 mode = DImode;
5666 if (mode == SFmode && (i % 2) == 1
5667 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5668 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5670 mode = DFmode;
5671 i--;
5672 reg--;
5675 /* If we're doing the aligned pass and this is not aligned,
5676 or we're doing the unaligned pass and this is aligned,
5677 skip it. */
5678 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5679 != align)
5680 continue;
5682 if (current_function_interrupt
5683 && GENERAL_REGISTER_P (i)
5684 && tmpx < MAX_TEMPS)
5685 schedule->temps[tmpx++] = i;
5687 offset -= GET_MODE_SIZE (mode);
5688 entry->reg = i;
5689 entry->mode = mode;
5690 entry->offset = offset;
5691 entry++;
5693 if (align && current_function_interrupt)
5694 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5695 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5697 offset -= GET_MODE_SIZE (DImode);
5698 entry->reg = i;
5699 entry->mode = DImode;
5700 entry->offset = offset;
5701 entry++;
5704 entry->reg = -1;
5705 entry->mode = VOIDmode;
5706 entry->offset = offset;
5707 schedule->temps[tmpx] = -1;
5708 return entry - 1;
5711 void
5712 sh_expand_prologue (void)
5714 HARD_REG_SET live_regs_mask;
5715 int d, i;
5716 int d_rounding = 0;
5717 int save_flags = target_flags;
5718 int pretend_args;
5719 tree sp_switch_attr
5720 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
5722 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5724 /* We have pretend args if we had an object sent partially in registers
5725 and partially on the stack, e.g. a large structure. */
5726 pretend_args = current_function_pretend_args_size;
5727 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5728 && (NPARM_REGS(SImode)
5729 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5730 pretend_args = 0;
5731 output_stack_adjust (-pretend_args
5732 - current_function_args_info.stack_regs * 8,
5733 stack_pointer_rtx, 0, NULL);
5735 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5736 /* We're going to use the PIC register to load the address of the
5737 incoming-argument decoder and/or of the return trampoline from
5738 the GOT, so make sure the PIC register is preserved and
5739 initialized. */
5740 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5742 if (TARGET_SHCOMPACT
5743 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5745 int reg;
5747 /* First, make all registers with incoming arguments that will
5748 be pushed onto the stack live, so that register renaming
5749 doesn't overwrite them. */
5750 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5751 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5752 >= NPARM_REGS (SImode) - reg)
5753 for (; reg < NPARM_REGS (SImode); reg++)
5754 emit_insn (gen_shcompact_preserve_incoming_args
5755 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5756 else if (CALL_COOKIE_INT_REG_GET
5757 (current_function_args_info.call_cookie, reg) == 1)
5758 emit_insn (gen_shcompact_preserve_incoming_args
5759 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5761 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5762 stack_pointer_rtx);
5763 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5764 GEN_INT (current_function_args_info.call_cookie));
5765 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5766 gen_rtx_REG (SImode, R0_REG));
5768 else if (TARGET_SHMEDIA)
5770 int tr = sh_media_register_for_return ();
5772 if (tr >= 0)
5774 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5775 gen_rtx_REG (DImode, PR_MEDIA_REG));
5777 /* ??? We should suppress saving pr when we don't need it, but this
5778 is tricky because of builtin_return_address. */
5780 /* If this function only exits with sibcalls, this copy
5781 will be flagged as dead. */
5782 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5783 const0_rtx,
5784 REG_NOTES (insn));
5788 /* Emit the code for SETUP_VARARGS. */
5789 if (current_function_stdarg)
5791 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5793 /* Push arg regs as if they'd been provided by caller in stack. */
5794 for (i = 0; i < NPARM_REGS(SImode); i++)
5796 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5797 rtx insn;
5799 if (i >= (NPARM_REGS(SImode)
5800 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5802 break;
5803 insn = push (rn);
5804 RTX_FRAME_RELATED_P (insn) = 0;
5809 /* If we're supposed to switch stacks at function entry, do so now. */
5810 if (sp_switch_attr)
5812 /* The argument specifies a variable holding the address of the
5813 stack the interrupt function should switch to/from at entry/exit. */
5814 const char *s
5815 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
5816 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
5818 emit_insn (gen_sp_switch_1 (sp_switch));
5821 d = calc_live_regs (&live_regs_mask);
5822 /* ??? Maybe we could save some switching if we can move a mode switch
5823 that already happens to be at the function start into the prologue. */
5824 if (target_flags != save_flags && ! current_function_interrupt)
5825 emit_insn (gen_toggle_sz ());
5827 if (TARGET_SH5)
5829 int offset_base, offset;
5830 rtx r0 = NULL_RTX;
5831 int offset_in_r0 = -1;
5832 int sp_in_r0 = 0;
5833 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5834 int total_size, save_size;
5835 save_schedule schedule;
5836 save_entry *entry;
5837 int *tmp_pnt;
5839 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5840 && ! current_function_interrupt)
5841 r0 = gen_rtx_REG (Pmode, R0_REG);
5843 /* D is the actual number of bytes that we need for saving registers,
5844 however, in initial_elimination_offset we have committed to using
5845 an additional TREGS_SPACE amount of bytes - in order to keep both
5846 addresses to arguments supplied by the caller and local variables
5847 valid, we must keep this gap. Place it between the incoming
5848 arguments and the actually saved registers in a bid to optimize
5849 locality of reference. */
5850 total_size = d + tregs_space;
5851 total_size += rounded_frame_size (total_size);
5852 save_size = total_size - rounded_frame_size (d);
5853 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5854 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5855 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5857 /* If adjusting the stack in a single step costs nothing extra, do so.
5858 I.e. either if a single addi is enough, or we need a movi anyway,
5859 and we don't exceed the maximum offset range (the test for the
5860 latter is conservative for simplicity). */
5861 if (TARGET_SHMEDIA
5862 && (CONST_OK_FOR_I10 (-total_size)
5863 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5864 && total_size <= 2044)))
5865 d_rounding = total_size - save_size;
5867 offset_base = d + d_rounding;
5869 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5870 0, NULL);
5872 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5873 tmp_pnt = schedule.temps;
5874 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5876 enum machine_mode mode = entry->mode;
5877 unsigned int reg = entry->reg;
5878 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5879 rtx orig_reg_rtx;
5881 offset = entry->offset;
5883 reg_rtx = gen_rtx_REG (mode, reg);
5885 mem_rtx = gen_frame_mem (mode,
5886 gen_rtx_PLUS (Pmode,
5887 stack_pointer_rtx,
5888 GEN_INT (offset)));
5890 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5892 gcc_assert (r0);
5893 mem_rtx = NULL_RTX;
5895 try_pre_dec:
5897 if (HAVE_PRE_DECREMENT
5898 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5899 || mem_rtx == NULL_RTX
5900 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5902 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
5904 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5905 pre_dec_ok);
5907 pre_dec = NULL_RTX;
5909 break;
5911 pre_dec_ok:
5912 mem_rtx = NULL_RTX;
5913 offset += GET_MODE_SIZE (mode);
5915 while (0);
5917 if (mem_rtx != NULL_RTX)
5918 goto addr_ok;
5920 if (offset_in_r0 == -1)
5922 emit_move_insn (r0, GEN_INT (offset));
5923 offset_in_r0 = offset;
5925 else if (offset != offset_in_r0)
5927 emit_move_insn (r0,
5928 gen_rtx_PLUS
5929 (Pmode, r0,
5930 GEN_INT (offset - offset_in_r0)));
5931 offset_in_r0 += offset - offset_in_r0;
5934 if (pre_dec != NULL_RTX)
5936 if (! sp_in_r0)
5938 emit_move_insn (r0,
5939 gen_rtx_PLUS
5940 (Pmode, r0, stack_pointer_rtx));
5941 sp_in_r0 = 1;
5944 offset -= GET_MODE_SIZE (mode);
5945 offset_in_r0 -= GET_MODE_SIZE (mode);
5947 mem_rtx = pre_dec;
5949 else if (sp_in_r0)
5950 mem_rtx = gen_frame_mem (mode, r0);
5951 else
5952 mem_rtx = gen_frame_mem (mode,
5953 gen_rtx_PLUS (Pmode,
5954 stack_pointer_rtx,
5955 r0));
5957 /* We must not use an r0-based address for target-branch
5958 registers or for special registers without pre-dec
5959 memory addresses, since we store their values in r0
5960 first. */
5961 gcc_assert (!TARGET_REGISTER_P (reg)
5962 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5963 || mem_rtx == pre_dec));
5965 addr_ok:
5966 orig_reg_rtx = reg_rtx;
5967 if (TARGET_REGISTER_P (reg)
5968 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5969 && mem_rtx != pre_dec))
5971 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5973 emit_move_insn (tmp_reg, reg_rtx);
5975 if (REGNO (tmp_reg) == R0_REG)
5977 offset_in_r0 = -1;
5978 sp_in_r0 = 0;
5979 gcc_assert (!refers_to_regno_p
5980 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5983 if (*++tmp_pnt <= 0)
5984 tmp_pnt = schedule.temps;
5986 reg_rtx = tmp_reg;
5989 rtx insn;
5991 /* Mark as interesting for dwarf cfi generator */
5992 insn = emit_move_insn (mem_rtx, reg_rtx);
5993 RTX_FRAME_RELATED_P (insn) = 1;
5994 /* If we use an intermediate register for the save, we can't
5995 describe this exactly in cfi as a copy of the to-be-saved
5996 register into the temporary register and then the temporary
5997 register on the stack, because the temporary register can
5998 have a different natural size than the to-be-saved register.
5999 Thus, we gloss over the intermediate copy and pretend we do
6000 a direct save from the to-be-saved register. */
6001 if (REGNO (reg_rtx) != reg)
6003 rtx set, note_rtx;
6005 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6006 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6007 REG_NOTES (insn));
6008 REG_NOTES (insn) = note_rtx;
6011 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6013 rtx reg_rtx = gen_rtx_REG (mode, reg);
6014 rtx set, note_rtx;
6015 rtx mem_rtx = gen_frame_mem (mode,
6016 gen_rtx_PLUS (Pmode,
6017 stack_pointer_rtx,
6018 GEN_INT (offset)));
6020 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6021 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6022 REG_NOTES (insn));
6023 REG_NOTES (insn) = note_rtx;
6028 gcc_assert (entry->offset == d_rounding);
6030 else
6031 push_regs (&live_regs_mask, current_function_interrupt);
6033 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6035 rtx insn = get_last_insn ();
6036 rtx last = emit_insn (gen_GOTaddr2picreg ());
6038 /* Mark these insns as possibly dead. Sometimes, flow2 may
6039 delete all uses of the PIC register. In this case, let it
6040 delete the initialization too. */
6043 insn = NEXT_INSN (insn);
6045 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6046 const0_rtx,
6047 REG_NOTES (insn));
6049 while (insn != last);
6052 if (SHMEDIA_REGS_STACK_ADJUST ())
6054 /* This must NOT go through the PLT, otherwise mach and macl
6055 may be clobbered. */
6056 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6057 (TARGET_FPU_ANY
6058 ? "__GCC_push_shmedia_regs"
6059 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6060 emit_insn (gen_shmedia_save_restore_regs_compact
6061 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6064 if (target_flags != save_flags && ! current_function_interrupt)
6066 rtx insn = emit_insn (gen_toggle_sz ());
6068 /* If we're lucky, a mode switch in the function body will
6069 overwrite fpscr, turning this insn dead. Tell flow this
6070 insn is ok to delete. */
6071 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6072 const0_rtx,
6073 REG_NOTES (insn));
6076 target_flags = save_flags;
6078 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6079 stack_pointer_rtx, 0, NULL);
6081 if (frame_pointer_needed)
6082 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6084 if (TARGET_SHCOMPACT
6085 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6087 /* This must NOT go through the PLT, otherwise mach and macl
6088 may be clobbered. */
6089 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6090 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6091 emit_insn (gen_shcompact_incoming_args ());
6095 void
6096 sh_expand_epilogue (bool sibcall_p)
6098 HARD_REG_SET live_regs_mask;
6099 int d, i;
6100 int d_rounding = 0;
6102 int save_flags = target_flags;
6103 int frame_size, save_size;
6104 int fpscr_deferred = 0;
6105 int e = sibcall_p ? -1 : 1;
6107 d = calc_live_regs (&live_regs_mask);
6109 save_size = d;
6110 frame_size = rounded_frame_size (d);
6112 if (TARGET_SH5)
6114 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6115 int total_size;
6116 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6117 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6118 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6120 total_size = d + tregs_space;
6121 total_size += rounded_frame_size (total_size);
6122 save_size = total_size - frame_size;
6124 /* If adjusting the stack in a single step costs nothing extra, do so.
6125 I.e. either if a single addi is enough, or we need a movi anyway,
6126 and we don't exceed the maximum offset range (the test for the
6127 latter is conservative for simplicity). */
6128 if (TARGET_SHMEDIA
6129 && ! frame_pointer_needed
6130 && (CONST_OK_FOR_I10 (total_size)
6131 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6132 && total_size <= 2044)))
6133 d_rounding = frame_size;
6135 frame_size -= d_rounding;
6138 if (frame_pointer_needed)
6140 /* We must avoid scheduling the epilogue with previous basic blocks
6141 when exception handling is enabled. See PR/18032. */
6142 if (flag_exceptions)
6143 emit_insn (gen_blockage ());
6144 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6145 &live_regs_mask);
6147 /* We must avoid moving the stack pointer adjustment past code
6148 which reads from the local frame, else an interrupt could
6149 occur after the SP adjustment and clobber data in the local
6150 frame. */
6151 emit_insn (gen_blockage ());
6152 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6154 else if (frame_size)
6156 /* We must avoid moving the stack pointer adjustment past code
6157 which reads from the local frame, else an interrupt could
6158 occur after the SP adjustment and clobber data in the local
6159 frame. */
6160 emit_insn (gen_blockage ());
6161 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6164 if (SHMEDIA_REGS_STACK_ADJUST ())
6166 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6167 (TARGET_FPU_ANY
6168 ? "__GCC_pop_shmedia_regs"
6169 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6170 /* This must NOT go through the PLT, otherwise mach and macl
6171 may be clobbered. */
6172 emit_insn (gen_shmedia_save_restore_regs_compact
6173 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6176 /* Pop all the registers. */
6178 if (target_flags != save_flags && ! current_function_interrupt)
6179 emit_insn (gen_toggle_sz ());
6180 if (TARGET_SH5)
6182 int offset_base, offset;
6183 int offset_in_r0 = -1;
6184 int sp_in_r0 = 0;
6185 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6186 save_schedule schedule;
6187 save_entry *entry;
6188 int *tmp_pnt;
6190 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6191 offset_base = -entry[1].offset + d_rounding;
6192 tmp_pnt = schedule.temps;
6193 for (; entry->mode != VOIDmode; entry--)
6195 enum machine_mode mode = entry->mode;
6196 int reg = entry->reg;
6197 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6199 offset = offset_base + entry->offset;
6200 reg_rtx = gen_rtx_REG (mode, reg);
6202 mem_rtx = gen_frame_mem (mode,
6203 gen_rtx_PLUS (Pmode,
6204 stack_pointer_rtx,
6205 GEN_INT (offset)));
6207 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6209 mem_rtx = NULL_RTX;
6211 try_post_inc:
6213 if (HAVE_POST_INCREMENT
6214 && (offset == offset_in_r0
6215 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6216 && mem_rtx == NULL_RTX)
6217 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6219 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6221 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6222 post_inc_ok);
6224 post_inc = NULL_RTX;
6226 break;
6228 post_inc_ok:
6229 mem_rtx = NULL_RTX;
6231 while (0);
6233 if (mem_rtx != NULL_RTX)
6234 goto addr_ok;
6236 if (offset_in_r0 == -1)
6238 emit_move_insn (r0, GEN_INT (offset));
6239 offset_in_r0 = offset;
6241 else if (offset != offset_in_r0)
6243 emit_move_insn (r0,
6244 gen_rtx_PLUS
6245 (Pmode, r0,
6246 GEN_INT (offset - offset_in_r0)));
6247 offset_in_r0 += offset - offset_in_r0;
6250 if (post_inc != NULL_RTX)
6252 if (! sp_in_r0)
6254 emit_move_insn (r0,
6255 gen_rtx_PLUS
6256 (Pmode, r0, stack_pointer_rtx));
6257 sp_in_r0 = 1;
6260 mem_rtx = post_inc;
6262 offset_in_r0 += GET_MODE_SIZE (mode);
6264 else if (sp_in_r0)
6265 mem_rtx = gen_frame_mem (mode, r0);
6266 else
6267 mem_rtx = gen_frame_mem (mode,
6268 gen_rtx_PLUS (Pmode,
6269 stack_pointer_rtx,
6270 r0));
6272 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6273 || mem_rtx == post_inc);
6275 addr_ok:
6276 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6277 && mem_rtx != post_inc)
6279 insn = emit_move_insn (r0, mem_rtx);
6280 mem_rtx = r0;
6282 else if (TARGET_REGISTER_P (reg))
6284 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6286 /* Give the scheduler a bit of freedom by using up to
6287 MAX_TEMPS registers in a round-robin fashion. */
6288 insn = emit_move_insn (tmp_reg, mem_rtx);
6289 mem_rtx = tmp_reg;
6290 if (*++tmp_pnt < 0)
6291 tmp_pnt = schedule.temps;
6294 insn = emit_move_insn (reg_rtx, mem_rtx);
6295 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6296 /* This is dead, unless we return with a sibcall. */
6297 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6298 const0_rtx,
6299 REG_NOTES (insn));
6302 gcc_assert (entry->offset + offset_base == d + d_rounding);
6304 else /* ! TARGET_SH5 */
6306 save_size = 0;
6307 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6308 pop (PR_REG);
6309 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6311 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6313 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6314 && hard_regs_intersect_p (&live_regs_mask,
6315 &reg_class_contents[DF_REGS]))
6316 fpscr_deferred = 1;
6317 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6318 pop (j);
6319 if (j == FIRST_FP_REG && fpscr_deferred)
6320 pop (FPSCR_REG);
6324 if (target_flags != save_flags && ! current_function_interrupt)
6325 emit_insn (gen_toggle_sz ());
6326 target_flags = save_flags;
6328 output_stack_adjust (current_function_pretend_args_size
6329 + save_size + d_rounding
6330 + current_function_args_info.stack_regs * 8,
6331 stack_pointer_rtx, e, NULL);
6333 if (current_function_calls_eh_return)
6334 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6335 EH_RETURN_STACKADJ_RTX));
6337 /* Switch back to the normal stack if necessary. */
6338 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6339 emit_insn (gen_sp_switch_2 ());
6341 /* Tell flow the insn that pops PR isn't dead. */
6342 /* PR_REG will never be live in SHmedia mode, and we don't need to
6343 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6344 by the return pattern. */
6345 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6346 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6349 static int sh_need_epilogue_known = 0;
6352 sh_need_epilogue (void)
6354 if (! sh_need_epilogue_known)
6356 rtx epilogue;
6358 start_sequence ();
6359 sh_expand_epilogue (0);
6360 epilogue = get_insns ();
6361 end_sequence ();
6362 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6364 return sh_need_epilogue_known > 0;
6367 /* Emit code to change the current function's return address to RA.
6368 TEMP is available as a scratch register, if needed. */
6370 void
6371 sh_set_return_address (rtx ra, rtx tmp)
6373 HARD_REG_SET live_regs_mask;
6374 int d;
6375 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6376 int pr_offset;
6378 d = calc_live_regs (&live_regs_mask);
6380 /* If pr_reg isn't life, we can set it (or the register given in
6381 sh_media_register_for_return) directly. */
6382 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6384 rtx rr;
6386 if (TARGET_SHMEDIA)
6388 int rr_regno = sh_media_register_for_return ();
6390 if (rr_regno < 0)
6391 rr_regno = pr_reg;
6393 rr = gen_rtx_REG (DImode, rr_regno);
6395 else
6396 rr = gen_rtx_REG (SImode, pr_reg);
6398 emit_insn (GEN_MOV (rr, ra));
6399 /* Tell flow the register for return isn't dead. */
6400 emit_insn (gen_rtx_USE (VOIDmode, rr));
6401 return;
6404 if (TARGET_SH5)
6406 int offset;
6407 save_schedule schedule;
6408 save_entry *entry;
6410 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6411 offset = entry[1].offset;
6412 for (; entry->mode != VOIDmode; entry--)
6413 if (entry->reg == pr_reg)
6414 goto found;
6416 /* We can't find pr register. */
6417 gcc_unreachable ();
6419 found:
6420 offset = entry->offset - offset;
6421 pr_offset = (rounded_frame_size (d) + offset
6422 + SHMEDIA_REGS_STACK_ADJUST ());
6424 else
6425 pr_offset = rounded_frame_size (d);
6427 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6428 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6430 tmp = gen_frame_mem (Pmode, tmp);
6431 emit_insn (GEN_MOV (tmp, ra));
6434 /* Clear variables at function end. */
6436 static void
6437 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6438 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6440 sh_need_epilogue_known = 0;
6443 static rtx
6444 sh_builtin_saveregs (void)
6446 /* First unnamed integer register. */
6447 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6448 /* Number of integer registers we need to save. */
6449 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6450 /* First unnamed SFmode float reg */
6451 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6452 /* Number of SFmode float regs to save. */
6453 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6454 rtx regbuf, fpregs;
6455 int bufsize, regno;
6456 HOST_WIDE_INT alias_set;
6458 if (TARGET_SH5)
6460 if (n_intregs)
6462 int pushregs = n_intregs;
6464 while (pushregs < NPARM_REGS (SImode) - 1
6465 && (CALL_COOKIE_INT_REG_GET
6466 (current_function_args_info.call_cookie,
6467 NPARM_REGS (SImode) - pushregs)
6468 == 1))
6470 current_function_args_info.call_cookie
6471 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6472 - pushregs, 1);
6473 pushregs++;
6476 if (pushregs == NPARM_REGS (SImode))
6477 current_function_args_info.call_cookie
6478 |= (CALL_COOKIE_INT_REG (0, 1)
6479 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6480 else
6481 current_function_args_info.call_cookie
6482 |= CALL_COOKIE_STACKSEQ (pushregs);
6484 current_function_pretend_args_size += 8 * n_intregs;
6486 if (TARGET_SHCOMPACT)
6487 return const0_rtx;
6490 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6492 error ("__builtin_saveregs not supported by this subtarget");
6493 return const0_rtx;
6496 if (TARGET_SHMEDIA)
6497 n_floatregs = 0;
6499 /* Allocate block of memory for the regs. */
6500 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6501 Or can assign_stack_local accept a 0 SIZE argument? */
6502 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6504 if (TARGET_SHMEDIA)
6505 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6506 else if (n_floatregs & 1)
6508 rtx addr;
6510 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6511 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6512 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6513 regbuf = change_address (regbuf, BLKmode, addr);
6515 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6517 rtx addr, mask;
6519 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6520 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6521 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6522 emit_insn (gen_andsi3 (addr, addr, mask));
6523 regbuf = change_address (regbuf, BLKmode, addr);
6525 else
6526 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6527 alias_set = get_varargs_alias_set ();
6528 set_mem_alias_set (regbuf, alias_set);
6530 /* Save int args.
6531 This is optimized to only save the regs that are necessary. Explicitly
6532 named args need not be saved. */
6533 if (n_intregs > 0)
6534 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6535 adjust_address (regbuf, BLKmode,
6536 n_floatregs * UNITS_PER_WORD),
6537 n_intregs);
6539 if (TARGET_SHMEDIA)
6540 /* Return the address of the regbuf. */
6541 return XEXP (regbuf, 0);
6543 /* Save float args.
6544 This is optimized to only save the regs that are necessary. Explicitly
6545 named args need not be saved.
6546 We explicitly build a pointer to the buffer because it halves the insn
6547 count when not optimizing (otherwise the pointer is built for each reg
6548 saved).
6549 We emit the moves in reverse order so that we can use predecrement. */
6551 fpregs = copy_to_mode_reg (Pmode,
6552 plus_constant (XEXP (regbuf, 0),
6553 n_floatregs * UNITS_PER_WORD));
6554 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6556 rtx mem;
6557 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6559 emit_insn (gen_addsi3 (fpregs, fpregs,
6560 GEN_INT (-2 * UNITS_PER_WORD)));
6561 mem = change_address (regbuf, DFmode, fpregs);
6562 emit_move_insn (mem,
6563 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6565 regno = first_floatreg;
6566 if (regno & 1)
6568 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6569 mem = change_address (regbuf, SFmode, fpregs);
6570 emit_move_insn (mem,
6571 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6572 - (TARGET_LITTLE_ENDIAN != 0)));
6575 else
6576 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6578 rtx mem;
6580 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6581 mem = change_address (regbuf, SFmode, fpregs);
6582 emit_move_insn (mem,
6583 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6586 /* Return the address of the regbuf. */
6587 return XEXP (regbuf, 0);
6590 /* Define the `__builtin_va_list' type for the ABI. */
6592 static tree
6593 sh_build_builtin_va_list (void)
6595 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6596 tree record;
6598 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6599 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6600 return ptr_type_node;
6602 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6604 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6605 ptr_type_node);
6606 f_next_o_limit = build_decl (FIELD_DECL,
6607 get_identifier ("__va_next_o_limit"),
6608 ptr_type_node);
6609 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6610 ptr_type_node);
6611 f_next_fp_limit = build_decl (FIELD_DECL,
6612 get_identifier ("__va_next_fp_limit"),
6613 ptr_type_node);
6614 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6615 ptr_type_node);
6617 DECL_FIELD_CONTEXT (f_next_o) = record;
6618 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6619 DECL_FIELD_CONTEXT (f_next_fp) = record;
6620 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6621 DECL_FIELD_CONTEXT (f_next_stack) = record;
6623 TYPE_FIELDS (record) = f_next_o;
6624 TREE_CHAIN (f_next_o) = f_next_o_limit;
6625 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6626 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6627 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6629 layout_type (record);
6631 return record;
6634 /* Implement `va_start' for varargs and stdarg. */
6636 void
6637 sh_va_start (tree valist, rtx nextarg)
6639 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6640 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6641 tree t, u;
6642 int nfp, nint;
6644 if (TARGET_SH5)
6646 expand_builtin_saveregs ();
6647 std_expand_builtin_va_start (valist, nextarg);
6648 return;
6651 if ((! TARGET_SH2E && ! TARGET_SH4)
6652 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6654 std_expand_builtin_va_start (valist, nextarg);
6655 return;
6658 f_next_o = TYPE_FIELDS (va_list_type_node);
6659 f_next_o_limit = TREE_CHAIN (f_next_o);
6660 f_next_fp = TREE_CHAIN (f_next_o_limit);
6661 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6662 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6664 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6665 NULL_TREE);
6666 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6667 valist, f_next_o_limit, NULL_TREE);
6668 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6669 NULL_TREE);
6670 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6671 valist, f_next_fp_limit, NULL_TREE);
6672 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6673 valist, f_next_stack, NULL_TREE);
6675 /* Call __builtin_saveregs. */
6676 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6677 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6678 TREE_SIDE_EFFECTS (t) = 1;
6679 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6681 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6682 if (nfp < 8)
6683 nfp = 8 - nfp;
6684 else
6685 nfp = 0;
6686 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6687 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6688 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6689 TREE_SIDE_EFFECTS (t) = 1;
6690 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6692 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6693 TREE_SIDE_EFFECTS (t) = 1;
6694 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6696 nint = current_function_args_info.arg_count[SH_ARG_INT];
6697 if (nint < 4)
6698 nint = 4 - nint;
6699 else
6700 nint = 0;
6701 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6702 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6703 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6704 TREE_SIDE_EFFECTS (t) = 1;
6705 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6707 u = make_tree (ptr_type_node, nextarg);
6708 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6709 TREE_SIDE_EFFECTS (t) = 1;
6710 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6713 /* TYPE is a RECORD_TYPE. If there is only a single non-zero-sized
6714 member, return it. */
6715 static tree
6716 find_sole_member (tree type)
6718 tree field, member = NULL_TREE;
6720 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6722 if (TREE_CODE (field) != FIELD_DECL)
6723 continue;
6724 if (!DECL_SIZE (field))
6725 return NULL_TREE;
6726 if (integer_zerop (DECL_SIZE (field)))
6727 continue;
6728 if (member)
6729 return NULL_TREE;
6730 member = field;
6732 return member;
6734 /* Implement `va_arg'. */
6736 static tree
6737 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6738 tree *post_p ATTRIBUTE_UNUSED)
6740 HOST_WIDE_INT size, rsize;
6741 tree tmp, pptr_type_node;
6742 tree addr, lab_over = NULL, result = NULL;
6743 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6744 tree eff_type;
6746 if (pass_by_ref)
6747 type = build_pointer_type (type);
6749 size = int_size_in_bytes (type);
6750 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6751 pptr_type_node = build_pointer_type (ptr_type_node);
6753 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6754 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6756 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6757 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6758 int pass_as_float;
6759 tree lab_false;
6760 tree member;
6762 f_next_o = TYPE_FIELDS (va_list_type_node);
6763 f_next_o_limit = TREE_CHAIN (f_next_o);
6764 f_next_fp = TREE_CHAIN (f_next_o_limit);
6765 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6766 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6768 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6769 NULL_TREE);
6770 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6771 valist, f_next_o_limit, NULL_TREE);
6772 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6773 valist, f_next_fp, NULL_TREE);
6774 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6775 valist, f_next_fp_limit, NULL_TREE);
6776 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6777 valist, f_next_stack, NULL_TREE);
6779 /* Structures with a single member with a distinct mode are passed
6780 like their member. This is relevant if the latter has a REAL_TYPE
6781 or COMPLEX_TYPE type. */
6782 eff_type = type;
6783 while (TREE_CODE (eff_type) == RECORD_TYPE
6784 && (member = find_sole_member (eff_type))
6785 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
6786 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
6787 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
6789 tree field_type = TREE_TYPE (member);
6791 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
6792 eff_type = field_type;
6793 else
6795 gcc_assert ((TYPE_ALIGN (eff_type)
6796 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
6797 || (TYPE_ALIGN (eff_type)
6798 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
6799 break;
6803 if (TARGET_SH4)
6805 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
6806 || (TREE_CODE (eff_type) == COMPLEX_TYPE
6807 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
6808 && size <= 16));
6810 else
6812 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
6815 addr = create_tmp_var (pptr_type_node, NULL);
6816 lab_false = create_artificial_label ();
6817 lab_over = create_artificial_label ();
6819 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6821 if (pass_as_float)
6823 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
6824 tree cmp;
6825 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
6827 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6828 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6829 gimplify_and_add (tmp, pre_p);
6831 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6832 gimplify_and_add (tmp, pre_p);
6833 tmp = next_fp_limit;
6834 if (size > 4 && !is_double)
6835 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
6836 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
6837 tmp = build (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
6838 cmp = build (COND_EXPR, void_type_node, tmp,
6839 build (GOTO_EXPR, void_type_node, lab_false),
6840 NULL);
6841 if (!is_double)
6842 gimplify_and_add (cmp, pre_p);
6844 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
6845 || (is_double || size == 16))
6847 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6848 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
6849 tmp = build (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
6850 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp_tmp, tmp);
6851 gimplify_and_add (tmp, pre_p);
6853 if (is_double)
6854 gimplify_and_add (cmp, pre_p);
6856 #ifdef FUNCTION_ARG_SCmode_WART
6857 if (TYPE_MODE (eff_type) == SCmode
6858 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6860 tree subtype = TREE_TYPE (eff_type);
6861 tree real, imag;
6863 imag
6864 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6865 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6867 real
6868 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6869 real = get_initialized_tmp_var (real, pre_p, NULL);
6871 result = build (COMPLEX_EXPR, type, real, imag);
6872 result = get_initialized_tmp_var (result, pre_p, NULL);
6874 #endif /* FUNCTION_ARG_SCmode_WART */
6876 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6877 gimplify_and_add (tmp, pre_p);
6879 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6880 gimplify_and_add (tmp, pre_p);
6882 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6883 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6884 gimplify_and_add (tmp, pre_p);
6885 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6886 gimplify_and_add (tmp, pre_p);
6888 tmp = build2 (MODIFY_EXPR, ptr_type_node, valist, next_fp_tmp);
6889 gimplify_and_add (tmp, post_p);
6890 valist = next_fp_tmp;
6892 else
6894 tmp = fold_convert (ptr_type_node, size_int (rsize));
6895 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6896 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6897 tmp = build (COND_EXPR, void_type_node, tmp,
6898 build (GOTO_EXPR, void_type_node, lab_false),
6899 NULL);
6900 gimplify_and_add (tmp, pre_p);
6902 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6903 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6904 gimplify_and_add (tmp, pre_p);
6906 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6907 gimplify_and_add (tmp, pre_p);
6909 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6910 gimplify_and_add (tmp, pre_p);
6912 if (size > 4 && ! TARGET_SH4)
6914 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6915 gimplify_and_add (tmp, pre_p);
6918 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6919 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6920 gimplify_and_add (tmp, pre_p);
6923 if (!result)
6925 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6926 gimplify_and_add (tmp, pre_p);
6930 /* ??? In va-sh.h, there had been code to make values larger than
6931 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6933 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6934 if (result)
6936 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6937 gimplify_and_add (tmp, pre_p);
6939 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6940 gimplify_and_add (tmp, pre_p);
6942 else
6943 result = tmp;
6945 if (pass_by_ref)
6946 result = build_va_arg_indirect_ref (result);
6948 return result;
6951 bool
6952 sh_promote_prototypes (tree type)
6954 if (TARGET_HITACHI)
6955 return 0;
6956 if (! type)
6957 return 1;
6958 return ! sh_attr_renesas_p (type);
6961 /* Whether an argument must be passed by reference. On SHcompact, we
6962 pretend arguments wider than 32-bits that would have been passed in
6963 registers are passed by reference, so that an SHmedia trampoline
6964 loads them into the full 64-bits registers. */
6966 static int
6967 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6968 tree type, bool named)
6970 unsigned HOST_WIDE_INT size;
6972 if (type)
6973 size = int_size_in_bytes (type);
6974 else
6975 size = GET_MODE_SIZE (mode);
6977 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6978 && (!named
6979 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6980 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6981 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6982 && size > 4
6983 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6984 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6985 return size;
6986 else
6987 return 0;
6990 static bool
6991 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6992 tree type, bool named)
6994 if (targetm.calls.must_pass_in_stack (mode, type))
6995 return true;
6997 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6998 wants to know about pass-by-reference semantics for incoming
6999 arguments. */
7000 if (! cum)
7001 return false;
7003 if (TARGET_SHCOMPACT)
7005 cum->byref = shcompact_byref (cum, mode, type, named);
7006 return cum->byref != 0;
7009 return false;
7012 static bool
7013 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7014 tree type, bool named ATTRIBUTE_UNUSED)
7016 /* ??? How can it possibly be correct to return true only on the
7017 caller side of the equation? Is there someplace else in the
7018 sh backend that's magically producing the copies? */
7019 return (cum->outgoing
7020 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7021 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7024 static int
7025 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7026 tree type, bool named ATTRIBUTE_UNUSED)
7028 int words = 0;
7030 if (!TARGET_SH5
7031 && PASS_IN_REG_P (*cum, mode, type)
7032 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7033 && (ROUND_REG (*cum, mode)
7034 + (mode != BLKmode
7035 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7036 : ROUND_ADVANCE (int_size_in_bytes (type)))
7037 > NPARM_REGS (mode)))
7038 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7040 else if (!TARGET_SHCOMPACT
7041 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7042 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7044 return words * UNITS_PER_WORD;
7048 /* Define where to put the arguments to a function.
7049 Value is zero to push the argument on the stack,
7050 or a hard register in which to store the argument.
7052 MODE is the argument's machine mode.
7053 TYPE is the data type of the argument (as a tree).
7054 This is null for libcalls where that information may
7055 not be available.
7056 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7057 the preceding args and about the function being called.
7058 NAMED is nonzero if this argument is a named parameter
7059 (otherwise it is an extra parameter matching an ellipsis).
7061 On SH the first args are normally in registers
7062 and the rest are pushed. Any arg that starts within the first
7063 NPARM_REGS words is at least partially passed in a register unless
7064 its data type forbids. */
7068 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7069 tree type, int named)
7071 if (! TARGET_SH5 && mode == VOIDmode)
7072 return GEN_INT (ca->renesas_abi ? 1 : 0);
7074 if (! TARGET_SH5
7075 && PASS_IN_REG_P (*ca, mode, type)
7076 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7078 int regno;
7080 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7081 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7083 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7084 gen_rtx_REG (SFmode,
7085 BASE_ARG_REG (mode)
7086 + (ROUND_REG (*ca, mode) ^ 1)),
7087 const0_rtx);
7088 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7089 gen_rtx_REG (SFmode,
7090 BASE_ARG_REG (mode)
7091 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7092 GEN_INT (4));
7093 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7096 /* If the alignment of a DF value causes an SF register to be
7097 skipped, we will use that skipped register for the next SF
7098 value. */
7099 if ((TARGET_HITACHI || ca->renesas_abi)
7100 && ca->free_single_fp_reg
7101 && mode == SFmode)
7102 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7104 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7105 ^ (mode == SFmode && TARGET_SH4
7106 && TARGET_LITTLE_ENDIAN != 0
7107 && ! TARGET_HITACHI && ! ca->renesas_abi);
7108 return gen_rtx_REG (mode, regno);
7112 if (TARGET_SH5)
7114 if (mode == VOIDmode && TARGET_SHCOMPACT)
7115 return GEN_INT (ca->call_cookie);
7117 /* The following test assumes unnamed arguments are promoted to
7118 DFmode. */
7119 if (mode == SFmode && ca->free_single_fp_reg)
7120 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7122 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7123 && (named || ! ca->prototype_p)
7124 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7126 if (! ca->prototype_p && TARGET_SHMEDIA)
7127 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7129 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7130 FIRST_FP_PARM_REG
7131 + ca->arg_count[(int) SH_ARG_FLOAT]);
7134 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7135 && (! TARGET_SHCOMPACT
7136 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7137 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7138 type, named))))
7140 return gen_rtx_REG (mode, (FIRST_PARM_REG
7141 + ca->arg_count[(int) SH_ARG_INT]));
7144 return 0;
7147 return 0;
7150 /* Update the data in CUM to advance over an argument
7151 of mode MODE and data type TYPE.
7152 (TYPE is null for libcalls where that information may not be
7153 available.) */
7155 void
7156 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7157 tree type, int named)
7159 if (ca->force_mem)
7160 ca->force_mem = 0;
7161 else if (TARGET_SH5)
7163 tree type2 = (ca->byref && type
7164 ? TREE_TYPE (type)
7165 : type);
7166 enum machine_mode mode2 = (ca->byref && type
7167 ? TYPE_MODE (type2)
7168 : mode);
7169 int dwords = ((ca->byref
7170 ? ca->byref
7171 : mode2 == BLKmode
7172 ? int_size_in_bytes (type2)
7173 : GET_MODE_SIZE (mode2)) + 7) / 8;
7174 int numregs = MIN (dwords, NPARM_REGS (SImode)
7175 - ca->arg_count[(int) SH_ARG_INT]);
7177 if (numregs)
7179 ca->arg_count[(int) SH_ARG_INT] += numregs;
7180 if (TARGET_SHCOMPACT
7181 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7183 ca->call_cookie
7184 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7185 - numregs, 1);
7186 /* N.B. We want this also for outgoing. */
7187 ca->stack_regs += numregs;
7189 else if (ca->byref)
7191 if (! ca->outgoing)
7192 ca->stack_regs += numregs;
7193 ca->byref_regs += numregs;
7194 ca->byref = 0;
7196 ca->call_cookie
7197 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7198 - numregs, 2);
7199 while (--numregs);
7200 ca->call_cookie
7201 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7202 - 1, 1);
7204 else if (dwords > numregs)
7206 int pushregs = numregs;
7208 if (TARGET_SHCOMPACT)
7209 ca->stack_regs += numregs;
7210 while (pushregs < NPARM_REGS (SImode) - 1
7211 && (CALL_COOKIE_INT_REG_GET
7212 (ca->call_cookie,
7213 NPARM_REGS (SImode) - pushregs)
7214 == 1))
7216 ca->call_cookie
7217 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7218 - pushregs, 1);
7219 pushregs++;
7221 if (numregs == NPARM_REGS (SImode))
7222 ca->call_cookie
7223 |= CALL_COOKIE_INT_REG (0, 1)
7224 | CALL_COOKIE_STACKSEQ (numregs - 1);
7225 else
7226 ca->call_cookie
7227 |= CALL_COOKIE_STACKSEQ (numregs);
7230 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7231 && (named || ! ca->prototype_p))
7233 if (mode2 == SFmode && ca->free_single_fp_reg)
7234 ca->free_single_fp_reg = 0;
7235 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7236 < NPARM_REGS (SFmode))
7238 int numfpregs
7239 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7240 NPARM_REGS (SFmode)
7241 - ca->arg_count[(int) SH_ARG_FLOAT]);
7243 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7245 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7247 if (ca->outgoing && numregs > 0)
7250 ca->call_cookie
7251 |= (CALL_COOKIE_INT_REG
7252 (ca->arg_count[(int) SH_ARG_INT]
7253 - numregs + ((numfpregs - 2) / 2),
7254 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7255 - numfpregs) / 2));
7257 while (numfpregs -= 2);
7259 else if (mode2 == SFmode && (named)
7260 && (ca->arg_count[(int) SH_ARG_FLOAT]
7261 < NPARM_REGS (SFmode)))
7262 ca->free_single_fp_reg
7263 = FIRST_FP_PARM_REG - numfpregs
7264 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7267 return;
7270 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7272 /* Note that we've used the skipped register. */
7273 if (mode == SFmode && ca->free_single_fp_reg)
7275 ca->free_single_fp_reg = 0;
7276 return;
7278 /* When we have a DF after an SF, there's an SF register that get
7279 skipped in order to align the DF value. We note this skipped
7280 register, because the next SF value will use it, and not the
7281 SF that follows the DF. */
7282 if (mode == DFmode
7283 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7285 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7286 + BASE_ARG_REG (mode));
7290 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7291 || PASS_IN_REG_P (*ca, mode, type))
7292 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7293 = (ROUND_REG (*ca, mode)
7294 + (mode == BLKmode
7295 ? ROUND_ADVANCE (int_size_in_bytes (type))
7296 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7299 /* The Renesas calling convention doesn't quite fit into this scheme since
7300 the address is passed like an invisible argument, but one that is always
7301 passed in memory. */
7302 static rtx
7303 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7305 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7306 return 0;
7307 return gen_rtx_REG (Pmode, 2);
7310 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7312 static bool
7313 sh_return_in_memory (tree type, tree fndecl)
7315 if (TARGET_SH5)
7317 if (TYPE_MODE (type) == BLKmode)
7318 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7319 else
7320 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7322 else
7324 return (TYPE_MODE (type) == BLKmode
7325 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7326 && TREE_CODE (type) == RECORD_TYPE));
7330 /* We actually emit the code in sh_expand_prologue. We used to use
7331 a static variable to flag that we need to emit this code, but that
7332 doesn't when inlining, when functions are deferred and then emitted
7333 later. Fortunately, we already have two flags that are part of struct
7334 function that tell if a function uses varargs or stdarg. */
7335 static void
7336 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7337 enum machine_mode mode,
7338 tree type,
7339 int *pretend_arg_size,
7340 int second_time ATTRIBUTE_UNUSED)
7342 gcc_assert (current_function_stdarg);
7343 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7345 int named_parm_regs, anon_parm_regs;
7347 named_parm_regs = (ROUND_REG (*ca, mode)
7348 + (mode == BLKmode
7349 ? ROUND_ADVANCE (int_size_in_bytes (type))
7350 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7351 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7352 if (anon_parm_regs > 0)
7353 *pretend_arg_size = anon_parm_regs * 4;
7357 static bool
7358 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7360 return TARGET_SH5;
7363 static bool
7364 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7366 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7370 /* Define the offset between two registers, one to be eliminated, and
7371 the other its replacement, at the start of a routine. */
7374 initial_elimination_offset (int from, int to)
7376 int regs_saved;
7377 int regs_saved_rounding = 0;
7378 int total_saved_regs_space;
7379 int total_auto_space;
7380 int save_flags = target_flags;
7381 int copy_flags;
7382 HARD_REG_SET live_regs_mask;
7384 shmedia_space_reserved_for_target_registers = false;
7385 regs_saved = calc_live_regs (&live_regs_mask);
7386 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7388 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7390 shmedia_space_reserved_for_target_registers = true;
7391 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7394 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7395 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7396 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7398 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7399 copy_flags = target_flags;
7400 target_flags = save_flags;
7402 total_saved_regs_space = regs_saved + regs_saved_rounding;
7404 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7405 return total_saved_regs_space + total_auto_space
7406 + current_function_args_info.byref_regs * 8;
7408 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7409 return total_saved_regs_space + total_auto_space
7410 + current_function_args_info.byref_regs * 8;
7412 /* Initial gap between fp and sp is 0. */
7413 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7414 return 0;
7416 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7417 return rounded_frame_size (0);
7419 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7420 return rounded_frame_size (0);
7422 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7423 && (to == HARD_FRAME_POINTER_REGNUM
7424 || to == STACK_POINTER_REGNUM));
7425 if (TARGET_SH5)
7427 int n = total_saved_regs_space;
7428 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7429 save_schedule schedule;
7430 save_entry *entry;
7432 n += total_auto_space;
7434 /* If it wasn't saved, there's not much we can do. */
7435 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7436 return n;
7438 target_flags = copy_flags;
7440 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7441 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7442 if (entry->reg == pr_reg)
7444 target_flags = save_flags;
7445 return entry->offset;
7447 gcc_unreachable ();
7449 else
7450 return total_auto_space;
7453 /* Insert any deferred function attributes from earlier pragmas. */
7454 static void
7455 sh_insert_attributes (tree node, tree *attributes)
7457 tree attrs;
7459 if (TREE_CODE (node) != FUNCTION_DECL)
7460 return;
7462 /* We are only interested in fields. */
7463 if (!DECL_P (node))
7464 return;
7466 /* Append the attributes to the deferred attributes. */
7467 *sh_deferred_function_attributes_tail = *attributes;
7468 attrs = sh_deferred_function_attributes;
7469 if (!attrs)
7470 return;
7472 /* Some attributes imply or require the interrupt attribute. */
7473 if (!lookup_attribute ("interrupt_handler", attrs)
7474 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7476 /* If we have a trapa_handler, but no interrupt_handler attribute,
7477 insert an interrupt_handler attribute. */
7478 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7479 /* We can't use sh_pr_interrupt here because that's not in the
7480 java frontend. */
7481 attrs
7482 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7483 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7484 interrupt attribute is missing, we ignore the attribute and warn. */
7485 else if (lookup_attribute ("sp_switch", attrs)
7486 || lookup_attribute ("trap_exit", attrs)
7487 || lookup_attribute ("nosave_low_regs", attrs))
7489 tree *tail;
7491 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7493 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7494 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7495 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7496 warning (OPT_Wattributes,
7497 "%qs attribute only applies to interrupt functions",
7498 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7499 else
7501 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7502 NULL_TREE);
7503 tail = &TREE_CHAIN (*tail);
7506 attrs = *attributes;
7510 /* Install the processed list. */
7511 *attributes = attrs;
7513 /* Clear deferred attributes. */
7514 sh_deferred_function_attributes = NULL_TREE;
7515 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7517 return;
7520 /* Supported attributes:
7522 interrupt_handler -- specifies this function is an interrupt handler.
7524 trapa_handler - like above, but don't save all registers.
7526 sp_switch -- specifies an alternate stack for an interrupt handler
7527 to run on.
7529 trap_exit -- use a trapa to exit an interrupt function instead of
7530 an rte instruction.
7532 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7533 This is useful on the SH3 and upwards,
7534 which has a separate set of low regs for User and Supervisor modes.
7535 This should only be used for the lowest level of interrupts. Higher levels
7536 of interrupts must save the registers in case they themselves are
7537 interrupted.
7539 renesas -- use Renesas calling/layout conventions (functions and
7540 structures).
7544 const struct attribute_spec sh_attribute_table[] =
7546 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7547 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7548 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7549 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7550 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7551 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7552 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7553 #ifdef SYMBIAN
7554 /* Symbian support adds three new attributes:
7555 dllexport - for exporting a function/variable that will live in a dll
7556 dllimport - for importing a function/variable from a dll
7558 Microsoft allows multiple declspecs in one __declspec, separating
7559 them with spaces. We do NOT support this. Instead, use __declspec
7560 multiple times. */
7561 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7562 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7563 #endif
7564 { NULL, 0, 0, false, false, false, NULL }
7567 /* Handle an "interrupt_handler" attribute; arguments as in
7568 struct attribute_spec.handler. */
7569 static tree
7570 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7571 tree args ATTRIBUTE_UNUSED,
7572 int flags ATTRIBUTE_UNUSED,
7573 bool *no_add_attrs)
7575 if (TREE_CODE (*node) != FUNCTION_DECL)
7577 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7578 IDENTIFIER_POINTER (name));
7579 *no_add_attrs = true;
7581 else if (TARGET_SHCOMPACT)
7583 error ("attribute interrupt_handler is not compatible with -m5-compact");
7584 *no_add_attrs = true;
7587 return NULL_TREE;
7590 /* Handle an "sp_switch" attribute; arguments as in
7591 struct attribute_spec.handler. */
7592 static tree
7593 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7594 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7596 if (TREE_CODE (*node) != FUNCTION_DECL)
7598 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7599 IDENTIFIER_POINTER (name));
7600 *no_add_attrs = true;
7602 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7604 /* The argument must be a constant string. */
7605 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7606 IDENTIFIER_POINTER (name));
7607 *no_add_attrs = true;
7610 return NULL_TREE;
7613 /* Handle an "trap_exit" attribute; arguments as in
7614 struct attribute_spec.handler. */
7615 static tree
7616 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7617 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7619 if (TREE_CODE (*node) != FUNCTION_DECL)
7621 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7622 IDENTIFIER_POINTER (name));
7623 *no_add_attrs = true;
7625 /* The argument specifies a trap number to be used in a trapa instruction
7626 at function exit (instead of an rte instruction). */
7627 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7629 /* The argument must be a constant integer. */
7630 warning (OPT_Wattributes, "%qs attribute argument not an "
7631 "integer constant", IDENTIFIER_POINTER (name));
7632 *no_add_attrs = true;
7635 return NULL_TREE;
7638 static tree
7639 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7640 tree name ATTRIBUTE_UNUSED,
7641 tree args ATTRIBUTE_UNUSED,
7642 int flags ATTRIBUTE_UNUSED,
7643 bool *no_add_attrs ATTRIBUTE_UNUSED)
7645 return NULL_TREE;
7648 /* True if __attribute__((renesas)) or -mrenesas. */
7650 sh_attr_renesas_p (tree td)
7652 if (TARGET_HITACHI)
7653 return 1;
7654 if (td == 0)
7655 return 0;
7656 if (DECL_P (td))
7657 td = TREE_TYPE (td);
7658 if (td == error_mark_node)
7659 return 0;
7660 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7661 != NULL_TREE);
7664 /* True if __attribute__((renesas)) or -mrenesas, for the current
7665 function. */
7667 sh_cfun_attr_renesas_p (void)
7669 return sh_attr_renesas_p (current_function_decl);
7673 sh_cfun_interrupt_handler_p (void)
7675 return (lookup_attribute ("interrupt_handler",
7676 DECL_ATTRIBUTES (current_function_decl))
7677 != NULL_TREE);
7680 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7682 static const char *
7683 sh_check_pch_target_flags (int old_flags)
7685 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7686 | MASK_SH_E | MASK_HARD_SH4
7687 | MASK_FPU_SINGLE | MASK_SH4))
7688 return _("created and used with different architectures / ABIs");
7689 if ((old_flags ^ target_flags) & MASK_HITACHI)
7690 return _("created and used with different ABIs");
7691 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7692 return _("created and used with different endianness");
7693 return NULL;
7696 /* Predicates used by the templates. */
7698 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7699 Used only in general_movsrc_operand. */
7702 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7704 switch (REGNO (op))
7706 case PR_REG:
7707 case MACL_REG:
7708 case MACH_REG:
7709 return 1;
7711 return 0;
7714 /* Nonzero if OP is a floating point value with value 0.0. */
7717 fp_zero_operand (rtx op)
7719 REAL_VALUE_TYPE r;
7721 if (GET_MODE (op) != SFmode)
7722 return 0;
7724 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7725 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7728 /* Nonzero if OP is a floating point value with value 1.0. */
7731 fp_one_operand (rtx op)
7733 REAL_VALUE_TYPE r;
7735 if (GET_MODE (op) != SFmode)
7736 return 0;
7738 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7739 return REAL_VALUES_EQUAL (r, dconst1);
7742 /* For -m4 and -m4-single-only, mode switching is used. If we are
7743 compiling without -mfmovd, movsf_ie isn't taken into account for
7744 mode switching. We could check in machine_dependent_reorg for
7745 cases where we know we are in single precision mode, but there is
7746 interface to find that out during reload, so we must avoid
7747 choosing an fldi alternative during reload and thus failing to
7748 allocate a scratch register for the constant loading. */
7750 fldi_ok (void)
7752 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7756 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7758 enum rtx_code code = GET_CODE (op);
7759 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7762 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7764 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7766 if (GET_CODE (op) != SYMBOL_REF)
7767 return 0;
7768 return SYMBOL_REF_TLS_MODEL (op);
7771 /* Return the destination address of a branch. */
7773 static int
7774 branch_dest (rtx branch)
7776 rtx dest = SET_SRC (PATTERN (branch));
7777 int dest_uid;
7779 if (GET_CODE (dest) == IF_THEN_ELSE)
7780 dest = XEXP (dest, 1);
7781 dest = XEXP (dest, 0);
7782 dest_uid = INSN_UID (dest);
7783 return INSN_ADDRESSES (dest_uid);
7786 /* Return nonzero if REG is not used after INSN.
7787 We assume REG is a reload reg, and therefore does
7788 not live past labels. It may live past calls or jumps though. */
7790 reg_unused_after (rtx reg, rtx insn)
7792 enum rtx_code code;
7793 rtx set;
7795 /* If the reg is set by this instruction, then it is safe for our
7796 case. Disregard the case where this is a store to memory, since
7797 we are checking a register used in the store address. */
7798 set = single_set (insn);
7799 if (set && GET_CODE (SET_DEST (set)) != MEM
7800 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7801 return 1;
7803 while ((insn = NEXT_INSN (insn)))
7805 rtx set;
7806 if (!INSN_P (insn))
7807 continue;
7809 code = GET_CODE (insn);
7811 #if 0
7812 /* If this is a label that existed before reload, then the register
7813 if dead here. However, if this is a label added by reorg, then
7814 the register may still be live here. We can't tell the difference,
7815 so we just ignore labels completely. */
7816 if (code == CODE_LABEL)
7817 return 1;
7818 /* else */
7819 #endif
7821 if (code == JUMP_INSN)
7822 return 0;
7824 /* If this is a sequence, we must handle them all at once.
7825 We could have for instance a call that sets the target register,
7826 and an insn in a delay slot that uses the register. In this case,
7827 we must return 0. */
7828 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7830 int i;
7831 int retval = 0;
7833 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7835 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7836 rtx set = single_set (this_insn);
7838 if (GET_CODE (this_insn) == CALL_INSN)
7839 code = CALL_INSN;
7840 else if (GET_CODE (this_insn) == JUMP_INSN)
7842 if (INSN_ANNULLED_BRANCH_P (this_insn))
7843 return 0;
7844 code = JUMP_INSN;
7847 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7848 return 0;
7849 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7851 if (GET_CODE (SET_DEST (set)) != MEM)
7852 retval = 1;
7853 else
7854 return 0;
7856 if (set == 0
7857 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7858 return 0;
7860 if (retval == 1)
7861 return 1;
7862 else if (code == JUMP_INSN)
7863 return 0;
7866 set = single_set (insn);
7867 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7868 return 0;
7869 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7870 return GET_CODE (SET_DEST (set)) != MEM;
7871 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7872 return 0;
7874 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
7875 return 1;
7877 return 1;
7880 #include "ggc.h"
7882 static GTY(()) rtx fpscr_rtx;
7884 get_fpscr_rtx (void)
7886 if (! fpscr_rtx)
7888 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7889 REG_USERVAR_P (fpscr_rtx) = 1;
7890 mark_user_reg (fpscr_rtx);
7892 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7893 mark_user_reg (fpscr_rtx);
7894 return fpscr_rtx;
7897 static GTY(()) tree fpscr_values;
7899 static void
7900 emit_fpu_switch (rtx scratch, int index)
7902 rtx dst, src;
7904 if (fpscr_values == NULL)
7906 tree t;
7908 t = build_index_type (integer_one_node);
7909 t = build_array_type (integer_type_node, t);
7910 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
7911 DECL_ARTIFICIAL (t) = 1;
7912 DECL_IGNORED_P (t) = 1;
7913 DECL_EXTERNAL (t) = 1;
7914 TREE_STATIC (t) = 1;
7915 TREE_PUBLIC (t) = 1;
7916 TREE_USED (t) = 1;
7918 fpscr_values = t;
7921 src = DECL_RTL (fpscr_values);
7922 if (no_new_pseudos)
7924 emit_move_insn (scratch, XEXP (src, 0));
7925 if (index != 0)
7926 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
7927 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
7929 else
7930 src = adjust_address (src, PSImode, index * 4);
7932 dst = get_fpscr_rtx ();
7933 emit_move_insn (dst, src);
7936 void
7937 emit_sf_insn (rtx pat)
7939 emit_insn (pat);
7942 void
7943 emit_df_insn (rtx pat)
7945 emit_insn (pat);
7948 void
7949 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7951 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7954 void
7955 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7957 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7958 get_fpscr_rtx ()));
7961 void
7962 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7964 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7967 void
7968 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7970 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7971 get_fpscr_rtx ()));
7974 /* ??? gcc does flow analysis strictly after common subexpression
7975 elimination. As a result, common subexpression elimination fails
7976 when there are some intervening statements setting the same register.
7977 If we did nothing about this, this would hurt the precision switching
7978 for SH4 badly. There is some cse after reload, but it is unable to
7979 undo the extra register pressure from the unused instructions, and
7980 it cannot remove auto-increment loads.
7982 A C code example that shows this flow/cse weakness for (at least) SH
7983 and sparc (as of gcc ss-970706) is this:
7985 double
7986 f(double a)
7988 double d;
7989 d = 0.1;
7990 a += d;
7991 d = 1.1;
7992 d = 0.1;
7993 a *= d;
7994 return a;
7997 So we add another pass before common subexpression elimination, to
7998 remove assignments that are dead due to a following assignment in the
7999 same basic block. */
8001 static void
8002 mark_use (rtx x, rtx *reg_set_block)
8004 enum rtx_code code;
8006 if (! x)
8007 return;
8008 code = GET_CODE (x);
8009 switch (code)
8011 case REG:
8013 int regno = REGNO (x);
8014 int nregs = (regno < FIRST_PSEUDO_REGISTER
8015 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8016 : 1);
8019 reg_set_block[regno + nregs - 1] = 0;
8021 while (--nregs);
8022 break;
8024 case SET:
8026 rtx dest = SET_DEST (x);
8028 if (GET_CODE (dest) == SUBREG)
8029 dest = SUBREG_REG (dest);
8030 if (GET_CODE (dest) != REG)
8031 mark_use (dest, reg_set_block);
8032 mark_use (SET_SRC (x), reg_set_block);
8033 break;
8035 case CLOBBER:
8036 break;
8037 default:
8039 const char *fmt = GET_RTX_FORMAT (code);
8040 int i, j;
8041 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8043 if (fmt[i] == 'e')
8044 mark_use (XEXP (x, i), reg_set_block);
8045 else if (fmt[i] == 'E')
8046 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8047 mark_use (XVECEXP (x, i, j), reg_set_block);
8049 break;
8054 static rtx get_free_reg (HARD_REG_SET);
8056 /* This function returns a register to use to load the address to load
8057 the fpscr from. Currently it always returns r1 or r7, but when we are
8058 able to use pseudo registers after combine, or have a better mechanism
8059 for choosing a register, it should be done here. */
8060 /* REGS_LIVE is the liveness information for the point for which we
8061 need this allocation. In some bare-bones exit blocks, r1 is live at the
8062 start. We can even have all of r0..r3 being live:
8063 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8064 INSN before which new insns are placed with will clobber the register
8065 we return. If a basic block consists only of setting the return value
8066 register to a pseudo and using that register, the return value is not
8067 live before or after this block, yet we we'll insert our insns right in
8068 the middle. */
8070 static rtx
8071 get_free_reg (HARD_REG_SET regs_live)
8073 if (! TEST_HARD_REG_BIT (regs_live, 1))
8074 return gen_rtx_REG (Pmode, 1);
8076 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8077 there shouldn't be anything but a jump before the function end. */
8078 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8079 return gen_rtx_REG (Pmode, 7);
8082 /* This function will set the fpscr from memory.
8083 MODE is the mode we are setting it to. */
8084 void
8085 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8087 enum attr_fp_mode fp_mode = mode;
8088 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8089 rtx addr_reg = get_free_reg (regs_live);
8091 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8094 /* Is the given character a logical line separator for the assembler? */
8095 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8096 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8097 #endif
8100 sh_insn_length_adjustment (rtx insn)
8102 /* Instructions with unfilled delay slots take up an extra two bytes for
8103 the nop in the delay slot. */
8104 if (((GET_CODE (insn) == INSN
8105 && GET_CODE (PATTERN (insn)) != USE
8106 && GET_CODE (PATTERN (insn)) != CLOBBER)
8107 || GET_CODE (insn) == CALL_INSN
8108 || (GET_CODE (insn) == JUMP_INSN
8109 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8110 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8111 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8112 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8113 return 2;
8115 /* SH2e has a bug that prevents the use of annulled branches, so if
8116 the delay slot is not filled, we'll have to put a NOP in it. */
8117 if (sh_cpu == CPU_SH2E
8118 && GET_CODE (insn) == JUMP_INSN
8119 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8120 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8121 && get_attr_type (insn) == TYPE_CBRANCH
8122 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8123 return 2;
8125 /* sh-dsp parallel processing insn take four bytes instead of two. */
8127 if (GET_CODE (insn) == INSN)
8129 int sum = 0;
8130 rtx body = PATTERN (insn);
8131 const char *template;
8132 char c;
8133 int maybe_label = 1;
8135 if (GET_CODE (body) == ASM_INPUT)
8136 template = XSTR (body, 0);
8137 else if (asm_noperands (body) >= 0)
8138 template
8139 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8140 else
8141 return 0;
8144 int ppi_adjust = 0;
8147 c = *template++;
8148 while (c == ' ' || c == '\t');
8149 /* all sh-dsp parallel-processing insns start with p.
8150 The only non-ppi sh insn starting with p is pref.
8151 The only ppi starting with pr is prnd. */
8152 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8153 ppi_adjust = 2;
8154 /* The repeat pseudo-insn expands two three insns, a total of
8155 six bytes in size. */
8156 else if ((c == 'r' || c == 'R')
8157 && ! strncasecmp ("epeat", template, 5))
8158 ppi_adjust = 4;
8159 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8161 /* If this is a label, it is obviously not a ppi insn. */
8162 if (c == ':' && maybe_label)
8164 ppi_adjust = 0;
8165 break;
8167 else if (c == '\'' || c == '"')
8168 maybe_label = 0;
8169 c = *template++;
8171 sum += ppi_adjust;
8172 maybe_label = c != ':';
8174 while (c);
8175 return sum;
8177 return 0;
8180 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8181 isn't protected by a PIC unspec. */
8183 nonpic_symbol_mentioned_p (rtx x)
8185 register const char *fmt;
8186 register int i;
8188 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8189 || GET_CODE (x) == PC)
8190 return 1;
8192 /* We don't want to look into the possible MEM location of a
8193 CONST_DOUBLE, since we're not going to use it, in general. */
8194 if (GET_CODE (x) == CONST_DOUBLE)
8195 return 0;
8197 if (GET_CODE (x) == UNSPEC
8198 && (XINT (x, 1) == UNSPEC_PIC
8199 || XINT (x, 1) == UNSPEC_GOT
8200 || XINT (x, 1) == UNSPEC_GOTOFF
8201 || XINT (x, 1) == UNSPEC_GOTPLT
8202 || XINT (x, 1) == UNSPEC_GOTTPOFF
8203 || XINT (x, 1) == UNSPEC_DTPOFF
8204 || XINT (x, 1) == UNSPEC_PLT))
8205 return 0;
8207 fmt = GET_RTX_FORMAT (GET_CODE (x));
8208 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8210 if (fmt[i] == 'E')
8212 register int j;
8214 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8215 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8216 return 1;
8218 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8219 return 1;
8222 return 0;
8225 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8226 @GOTOFF in `reg'. */
8228 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8229 rtx reg)
8231 if (tls_symbolic_operand (orig, Pmode))
8232 return orig;
8234 if (GET_CODE (orig) == LABEL_REF
8235 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8237 if (reg == 0)
8238 reg = gen_reg_rtx (Pmode);
8240 emit_insn (gen_symGOTOFF2reg (reg, orig));
8241 return reg;
8243 else if (GET_CODE (orig) == SYMBOL_REF)
8245 if (reg == 0)
8246 reg = gen_reg_rtx (Pmode);
8248 emit_insn (gen_symGOT2reg (reg, orig));
8249 return reg;
8251 return orig;
8254 /* Mark the use of a constant in the literal table. If the constant
8255 has multiple labels, make it unique. */
8256 static rtx
8257 mark_constant_pool_use (rtx x)
8259 rtx insn, lab, pattern;
8261 if (x == NULL)
8262 return x;
8264 switch (GET_CODE (x))
8266 case LABEL_REF:
8267 x = XEXP (x, 0);
8268 case CODE_LABEL:
8269 break;
8270 default:
8271 return x;
8274 /* Get the first label in the list of labels for the same constant
8275 and delete another labels in the list. */
8276 lab = x;
8277 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8279 if (GET_CODE (insn) != CODE_LABEL
8280 || LABEL_REFS (insn) != NEXT_INSN (insn))
8281 break;
8282 lab = insn;
8285 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8286 INSN_DELETED_P (insn) = 1;
8288 /* Mark constants in a window. */
8289 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8291 if (GET_CODE (insn) != INSN)
8292 continue;
8294 pattern = PATTERN (insn);
8295 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8296 continue;
8298 switch (XINT (pattern, 1))
8300 case UNSPECV_CONST2:
8301 case UNSPECV_CONST4:
8302 case UNSPECV_CONST8:
8303 XVECEXP (pattern, 0, 1) = const1_rtx;
8304 break;
8305 case UNSPECV_WINDOW_END:
8306 if (XVECEXP (pattern, 0, 0) == x)
8307 return lab;
8308 break;
8309 case UNSPECV_CONST_END:
8310 return lab;
8311 default:
8312 break;
8316 return lab;
8319 /* Return true if it's possible to redirect BRANCH1 to the destination
8320 of an unconditional jump BRANCH2. We only want to do this if the
8321 resulting branch will have a short displacement. */
8323 sh_can_redirect_branch (rtx branch1, rtx branch2)
8325 if (flag_expensive_optimizations && simplejump_p (branch2))
8327 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8328 rtx insn;
8329 int distance;
8331 for (distance = 0, insn = NEXT_INSN (branch1);
8332 insn && distance < 256;
8333 insn = PREV_INSN (insn))
8335 if (insn == dest)
8336 return 1;
8337 else
8338 distance += get_attr_length (insn);
8340 for (distance = 0, insn = NEXT_INSN (branch1);
8341 insn && distance < 256;
8342 insn = NEXT_INSN (insn))
8344 if (insn == dest)
8345 return 1;
8346 else
8347 distance += get_attr_length (insn);
8350 return 0;
8353 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8355 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8356 unsigned int new_reg)
8358 /* Interrupt functions can only use registers that have already been
8359 saved by the prologue, even if they would normally be
8360 call-clobbered. */
8362 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8363 return 0;
8365 return 1;
8368 /* Function to update the integer COST
8369 based on the relationship between INSN that is dependent on
8370 DEP_INSN through the dependence LINK. The default is to make no
8371 adjustment to COST. This can be used for example to specify to
8372 the scheduler that an output- or anti-dependence does not incur
8373 the same cost as a data-dependence. The return value should be
8374 the new value for COST. */
8375 static int
8376 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8378 rtx reg, use_pat;
8380 if (TARGET_SHMEDIA)
8382 /* On SHmedia, if the dependence is an anti-dependence or
8383 output-dependence, there is no cost. */
8384 if (REG_NOTE_KIND (link) != 0)
8386 /* However, dependencies between target register loads and
8387 uses of the register in a subsequent block that are separated
8388 by a conditional branch are not modelled - we have to do with
8389 the anti-dependency between the target register load and the
8390 conditional branch that ends the current block. */
8391 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8392 && GET_CODE (PATTERN (dep_insn)) == SET
8393 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8394 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8395 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8397 int orig_cost = cost;
8398 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8399 rtx target = ((! note
8400 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8401 ? insn : JUMP_LABEL (insn));
8402 /* On the likely path, the branch costs 1, on the unlikely path,
8403 it costs 3. */
8404 cost--;
8406 target = next_active_insn (target);
8407 while (target && ! flow_dependent_p (target, dep_insn)
8408 && --cost > 0);
8409 /* If two branches are executed in immediate succession, with the
8410 first branch properly predicted, this causes a stall at the
8411 second branch, hence we won't need the target for the
8412 second branch for two cycles after the launch of the first
8413 branch. */
8414 if (cost > orig_cost - 2)
8415 cost = orig_cost - 2;
8417 else
8418 cost = 0;
8421 else if (get_attr_is_mac_media (insn)
8422 && get_attr_is_mac_media (dep_insn))
8423 cost = 1;
8425 else if (! reload_completed
8426 && GET_CODE (PATTERN (insn)) == SET
8427 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8428 && GET_CODE (PATTERN (dep_insn)) == SET
8429 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8430 && cost < 4)
8431 cost = 4;
8432 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8433 that is needed at the target. */
8434 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8435 && ! flow_dependent_p (insn, dep_insn))
8436 cost--;
8438 else if (REG_NOTE_KIND (link) == 0)
8440 enum attr_type dep_type, type;
8442 if (recog_memoized (insn) < 0
8443 || recog_memoized (dep_insn) < 0)
8444 return cost;
8446 dep_type = get_attr_type (dep_insn);
8447 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8448 cost--;
8449 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8450 && (type = get_attr_type (insn)) != TYPE_CALL
8451 && type != TYPE_SFUNC)
8452 cost--;
8454 /* The only input for a call that is timing-critical is the
8455 function's address. */
8456 if (GET_CODE(insn) == CALL_INSN)
8458 rtx call = PATTERN (insn);
8460 if (GET_CODE (call) == PARALLEL)
8461 call = XVECEXP (call, 0 ,0);
8462 if (GET_CODE (call) == SET)
8463 call = SET_SRC (call);
8464 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8465 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8466 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8467 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8468 cost = 0;
8470 /* Likewise, the most timing critical input for an sfuncs call
8471 is the function address. However, sfuncs typically start
8472 using their arguments pretty quickly.
8473 Assume a four cycle delay before they are needed. */
8474 /* All sfunc calls are parallels with at least four components.
8475 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8476 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8477 && XVECLEN (PATTERN (insn), 0) >= 4
8478 && (reg = sfunc_uses_reg (insn)))
8480 if (! reg_set_p (reg, dep_insn))
8481 cost -= 4;
8483 /* When the preceding instruction loads the shift amount of
8484 the following SHAD/SHLD, the latency of the load is increased
8485 by 1 cycle. */
8486 else if (TARGET_SH4
8487 && get_attr_type (insn) == TYPE_DYN_SHIFT
8488 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8489 && reg_overlap_mentioned_p (SET_DEST (single_set (dep_insn)),
8490 XEXP (SET_SRC (single_set (insn)),
8491 1)))
8492 cost++;
8493 /* When an LS group instruction with a latency of less than
8494 3 cycles is followed by a double-precision floating-point
8495 instruction, FIPR, or FTRV, the latency of the first
8496 instruction is increased to 3 cycles. */
8497 else if (cost < 3
8498 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8499 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8500 cost = 3;
8501 /* The lsw register of a double-precision computation is ready one
8502 cycle earlier. */
8503 else if (reload_completed
8504 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8505 && (use_pat = single_set (insn))
8506 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8507 SET_SRC (use_pat)))
8508 cost -= 1;
8510 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8511 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8512 cost -= 1;
8514 /* An anti-dependence penalty of two applies if the first insn is a double
8515 precision fadd / fsub / fmul. */
8516 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8517 && recog_memoized (dep_insn) >= 0
8518 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8519 /* A lot of alleged anti-flow dependences are fake,
8520 so check this one is real. */
8521 && flow_dependent_p (dep_insn, insn))
8522 cost = 2;
8525 return cost;
8528 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8529 if DEP_INSN is anti-flow dependent on INSN. */
8530 static int
8531 flow_dependent_p (rtx insn, rtx dep_insn)
8533 rtx tmp = PATTERN (insn);
8535 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8536 return tmp == NULL_RTX;
8539 /* A helper function for flow_dependent_p called through note_stores. */
8540 static void
8541 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8543 rtx * pinsn = (rtx *) data;
8545 if (*pinsn && reg_referenced_p (x, *pinsn))
8546 *pinsn = NULL_RTX;
8549 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8550 'special function' patterns (type sfunc) that clobber pr, but that
8551 do not look like function calls to leaf_function_p. Hence we must
8552 do this extra check. */
8553 static int
8554 sh_pr_n_sets (void)
8556 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8559 /* Return where to allocate pseudo for a given hard register initial
8560 value. */
8561 static rtx
8562 sh_allocate_initial_value (rtx hard_reg)
8564 rtx x;
8566 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8568 if (current_function_is_leaf
8569 && ! sh_pr_n_sets ()
8570 && ! (TARGET_SHCOMPACT
8571 && ((current_function_args_info.call_cookie
8572 & ~ CALL_COOKIE_RET_TRAMP (1))
8573 || current_function_has_nonlocal_label)))
8574 x = hard_reg;
8575 else
8576 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8578 else
8579 x = NULL_RTX;
8581 return x;
8584 /* This function returns "2" to indicate dual issue for the SH4
8585 processor. To be used by the DFA pipeline description. */
8586 static int
8587 sh_issue_rate (void)
8589 if (TARGET_SUPERSCALAR)
8590 return 2;
8591 else
8592 return 1;
8595 /* Functions for ready queue reordering for sched1. */
8597 /* Get weight for mode for a set x. */
8598 static short
8599 find_set_regmode_weight (rtx x, enum machine_mode mode)
8601 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8602 return 1;
8603 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8605 if (GET_CODE (SET_DEST (x)) == REG)
8607 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8608 return 1;
8609 else
8610 return 0;
8612 return 1;
8614 return 0;
8617 /* Get regmode weight for insn. */
8618 static short
8619 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8621 short reg_weight = 0;
8622 rtx x;
8624 /* Increment weight for each register born here. */
8625 x = PATTERN (insn);
8626 reg_weight += find_set_regmode_weight (x, mode);
8627 if (GET_CODE (x) == PARALLEL)
8629 int j;
8630 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8632 x = XVECEXP (PATTERN (insn), 0, j);
8633 reg_weight += find_set_regmode_weight (x, mode);
8636 /* Decrement weight for each register that dies here. */
8637 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8639 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8641 rtx note = XEXP (x, 0);
8642 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8643 reg_weight--;
8646 return reg_weight;
8649 /* Calculate regmode weights for all insns of a basic block. */
8650 static void
8651 find_regmode_weight (int b, enum machine_mode mode)
8653 rtx insn, next_tail, head, tail;
8655 get_block_head_tail (b, &head, &tail);
8656 next_tail = NEXT_INSN (tail);
8658 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8660 /* Handle register life information. */
8661 if (!INSN_P (insn))
8662 continue;
8664 if (mode == SFmode)
8665 INSN_REGMODE_WEIGHT (insn, mode) =
8666 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8667 else if (mode == SImode)
8668 INSN_REGMODE_WEIGHT (insn, mode) =
8669 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8673 /* Comparison function for ready queue sorting. */
8674 static int
8675 rank_for_reorder (const void *x, const void *y)
8677 rtx tmp = *(const rtx *) y;
8678 rtx tmp2 = *(const rtx *) x;
8680 /* The insn in a schedule group should be issued the first. */
8681 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8682 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8684 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8685 minimizes instruction movement, thus minimizing sched's effect on
8686 register pressure. */
8687 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8690 /* Resort the array A in which only element at index N may be out of order. */
8691 static void
8692 swap_reorder (rtx *a, int n)
8694 rtx insn = a[n - 1];
8695 int i = n - 2;
8697 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8699 a[i + 1] = a[i];
8700 i -= 1;
8702 a[i + 1] = insn;
8705 #define SCHED_REORDER(READY, N_READY) \
8706 do \
8708 if ((N_READY) == 2) \
8709 swap_reorder (READY, N_READY); \
8710 else if ((N_READY) > 2) \
8711 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8713 while (0)
8715 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8716 macro. */
8717 static void
8718 ready_reorder (rtx *ready, int nready)
8720 SCHED_REORDER (ready, nready);
8723 /* Calculate regmode weights for all insns of all basic block. */
8724 static void
8725 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8726 int verbose ATTRIBUTE_UNUSED,
8727 int old_max_uid)
8729 basic_block b;
8731 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8732 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8734 FOR_EACH_BB_REVERSE (b)
8736 find_regmode_weight (b->index, SImode);
8737 find_regmode_weight (b->index, SFmode);
8740 CURR_REGMODE_PRESSURE (SImode) = 0;
8741 CURR_REGMODE_PRESSURE (SFmode) = 0;
8745 /* Cleanup. */
8746 static void
8747 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8748 int verbose ATTRIBUTE_UNUSED)
8750 if (regmode_weight[0])
8752 free (regmode_weight[0]);
8753 regmode_weight[0] = NULL;
8755 if (regmode_weight[1])
8757 free (regmode_weight[1]);
8758 regmode_weight[1] = NULL;
8762 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8763 keep count of register pressures on SImode and SFmode. */
8764 static int
8765 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8766 int sched_verbose ATTRIBUTE_UNUSED,
8767 rtx insn,
8768 int can_issue_more)
8770 if (GET_CODE (PATTERN (insn)) != USE
8771 && GET_CODE (PATTERN (insn)) != CLOBBER)
8772 cached_can_issue_more = can_issue_more - 1;
8773 else
8774 cached_can_issue_more = can_issue_more;
8776 if (reload_completed)
8777 return cached_can_issue_more;
8779 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8780 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8782 return cached_can_issue_more;
8785 static void
8786 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8787 int verbose ATTRIBUTE_UNUSED,
8788 int veclen ATTRIBUTE_UNUSED)
8790 CURR_REGMODE_PRESSURE (SImode) = 0;
8791 CURR_REGMODE_PRESSURE (SFmode) = 0;
8794 /* Some magic numbers. */
8795 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8796 functions that already have high pressure on r0. */
8797 #define R0_MAX_LIFE_REGIONS 2
8798 #define R0_MAX_LIVE_LENGTH 12
8799 /* Register Pressure thresholds for SImode and SFmode registers. */
8800 #define SIMODE_MAX_WEIGHT 5
8801 #define SFMODE_MAX_WEIGHT 10
8803 /* Return true if the pressure is high for MODE. */
8804 static short
8805 high_pressure (enum machine_mode mode)
8807 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8808 functions that already have high pressure on r0. */
8809 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8810 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8811 return 1;
8813 if (mode == SFmode)
8814 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8815 else
8816 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8819 /* Reorder ready queue if register pressure is high. */
8820 static int
8821 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8822 int sched_verbose ATTRIBUTE_UNUSED,
8823 rtx *ready,
8824 int *n_readyp,
8825 int clock_var ATTRIBUTE_UNUSED)
8827 if (reload_completed)
8828 return sh_issue_rate ();
8830 if (high_pressure (SFmode) || high_pressure (SImode))
8832 ready_reorder (ready, *n_readyp);
8835 return sh_issue_rate ();
8838 /* Skip cycles if the current register pressure is high. */
8839 static int
8840 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8841 int sched_verbose ATTRIBUTE_UNUSED,
8842 rtx *ready ATTRIBUTE_UNUSED,
8843 int *n_readyp ATTRIBUTE_UNUSED,
8844 int clock_var ATTRIBUTE_UNUSED)
8846 if (reload_completed)
8847 return cached_can_issue_more;
8849 if (high_pressure(SFmode) || high_pressure (SImode))
8850 skip_cycles = 1;
8852 return cached_can_issue_more;
8855 /* Skip cycles without sorting the ready queue. This will move insn from
8856 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8857 queue by sh_reorder. */
8859 /* Generally, skipping these many cycles are sufficient for all insns to move
8860 from Q -> R. */
8861 #define MAX_SKIPS 8
8863 static int
8864 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8865 int sched_verbose ATTRIBUTE_UNUSED,
8866 rtx insn ATTRIBUTE_UNUSED,
8867 int last_clock_var,
8868 int clock_var,
8869 int *sort_p)
8871 if (reload_completed)
8872 return 0;
8874 if (skip_cycles)
8876 if ((clock_var - last_clock_var) < MAX_SKIPS)
8878 *sort_p = 0;
8879 return 1;
8881 /* If this is the last cycle we are skipping, allow reordering of R. */
8882 if ((clock_var - last_clock_var) == MAX_SKIPS)
8884 *sort_p = 1;
8885 return 1;
8889 skip_cycles = 0;
8891 return 0;
8894 /* SHmedia requires registers for branches, so we can't generate new
8895 branches past reload. */
8896 static bool
8897 sh_cannot_modify_jumps_p (void)
8899 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8902 static int
8903 sh_target_reg_class (void)
8905 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8908 static bool
8909 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8911 HARD_REG_SET dummy;
8912 rtx insn;
8914 if (! shmedia_space_reserved_for_target_registers)
8915 return 0;
8916 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
8917 return 0;
8918 if (calc_live_regs (&dummy) >= 6 * 8)
8919 return 1;
8920 /* This is a borderline case. See if we got a nested loop, or a loop
8921 with a call, or with more than 4 labels inside. */
8922 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
8924 if (GET_CODE (insn) == NOTE
8925 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8927 int labels = 0;
8931 insn = NEXT_INSN (insn);
8932 if ((GET_CODE (insn) == NOTE
8933 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8934 || GET_CODE (insn) == CALL_INSN
8935 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
8936 return 1;
8938 while (GET_CODE (insn) != NOTE
8939 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
8942 return 0;
8945 static bool
8946 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8948 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8952 On the SH1..SH4, the trampoline looks like
8953 2 0002 D202 mov.l l2,r2
8954 1 0000 D301 mov.l l1,r3
8955 3 0004 422B jmp @r2
8956 4 0006 0009 nop
8957 5 0008 00000000 l1: .long area
8958 6 000c 00000000 l2: .long function
8960 SH5 (compact) uses r1 instead of r3 for the static chain. */
8963 /* Emit RTL insns to initialize the variable parts of a trampoline.
8964 FNADDR is an RTX for the address of the function's pure code.
8965 CXT is an RTX for the static chain value for the function. */
8967 void
8968 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8970 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
8972 if (TARGET_SHMEDIA64)
8974 rtx tramp_templ;
8975 int fixed_len;
8977 rtx movi1 = GEN_INT (0xcc000010);
8978 rtx shori1 = GEN_INT (0xc8000010);
8979 rtx src, dst;
8981 /* The following trampoline works within a +- 128 KB range for cxt:
8982 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8983 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8984 gettr tr1,r1; blink tr0,r63 */
8985 /* Address rounding makes it hard to compute the exact bounds of the
8986 offset for this trampoline, but we have a rather generous offset
8987 range, so frame_offset should do fine as an upper bound. */
8988 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8990 /* ??? could optimize this trampoline initialization
8991 by writing DImode words with two insns each. */
8992 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8993 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8994 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8995 insn = gen_rtx_AND (DImode, insn, mask);
8996 /* Or in ptb/u .,tr1 pattern */
8997 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8998 insn = force_operand (insn, NULL_RTX);
8999 insn = gen_lowpart (SImode, insn);
9000 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9001 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9002 insn = gen_rtx_AND (DImode, insn, mask);
9003 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9004 insn = gen_lowpart (SImode, insn);
9005 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9006 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9007 insn = gen_rtx_AND (DImode, insn, mask);
9008 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9009 insn = gen_lowpart (SImode, insn);
9010 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9011 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9012 insn = gen_rtx_AND (DImode, insn, mask);
9013 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9014 insn = gen_lowpart (SImode, insn);
9015 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9016 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9017 insn = gen_rtx_AND (DImode, insn, mask);
9018 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9019 insn = gen_lowpart (SImode, insn);
9020 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9021 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9022 GEN_INT (0x6bf10600));
9023 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9024 GEN_INT (0x4415fc10));
9025 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9026 GEN_INT (0x4401fff0));
9027 emit_insn (gen_ic_invalidate_line (tramp));
9028 return;
9030 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9031 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9033 tramp_templ = gen_datalabel_ref (tramp_templ);
9034 dst = tramp_mem;
9035 src = gen_const_mem (BLKmode, tramp_templ);
9036 set_mem_align (dst, 256);
9037 set_mem_align (src, 64);
9038 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9040 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9041 emit_move_insn (adjust_address (tramp_mem, Pmode,
9042 fixed_len + GET_MODE_SIZE (Pmode)),
9043 cxt);
9044 emit_insn (gen_ic_invalidate_line (tramp));
9045 return;
9047 else if (TARGET_SHMEDIA)
9049 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9050 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9051 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9052 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9053 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9054 rotated 10 right, and higher 16 bit of every 32 selected. */
9055 rtx movishori
9056 = force_reg (V2HImode, (simplify_gen_subreg
9057 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9058 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9059 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9061 tramp = force_reg (Pmode, tramp);
9062 fnaddr = force_reg (SImode, fnaddr);
9063 cxt = force_reg (SImode, cxt);
9064 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9065 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9066 movishori));
9067 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9068 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9069 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9070 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9071 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9072 gen_rtx_SUBREG (V2HImode, cxt, 0),
9073 movishori));
9074 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9075 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9076 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9077 if (TARGET_LITTLE_ENDIAN)
9079 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9080 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9082 else
9084 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9085 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9087 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9088 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9089 emit_insn (gen_ic_invalidate_line (tramp));
9090 return;
9092 else if (TARGET_SHCOMPACT)
9094 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9095 return;
9097 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9098 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9099 SImode));
9100 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9101 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9102 SImode));
9103 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9104 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9105 if (TARGET_HARVARD)
9107 if (TARGET_USERMODE)
9108 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9109 FUNCTION_ORDINARY),
9110 0, VOIDmode, 1, tramp, SImode);
9111 else
9112 emit_insn (gen_ic_invalidate_line (tramp));
9116 /* FIXME: This is overly conservative. A SHcompact function that
9117 receives arguments ``by reference'' will have them stored in its
9118 own stack frame, so it must not pass pointers or references to
9119 these arguments to other functions by means of sibling calls. */
9120 /* If PIC, we cannot make sibling calls to global functions
9121 because the PLT requires r12 to be live. */
9122 static bool
9123 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9125 return (1
9126 && (! TARGET_SHCOMPACT
9127 || current_function_args_info.stack_regs == 0)
9128 && ! sh_cfun_interrupt_handler_p ()
9129 && (! flag_pic
9130 || (decl && ! TREE_PUBLIC (decl))
9131 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9134 /* Machine specific built-in functions. */
9136 struct builtin_description
9138 const enum insn_code icode;
9139 const char *const name;
9140 int signature;
9143 /* describe number and signedness of arguments; arg[0] == result
9144 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9145 /* 9: 64 bit pointer, 10: 32 bit pointer */
9146 static const char signature_args[][4] =
9148 #define SH_BLTIN_V2SI2 0
9149 { 4, 4 },
9150 #define SH_BLTIN_V4HI2 1
9151 { 4, 4 },
9152 #define SH_BLTIN_V2SI3 2
9153 { 4, 4, 4 },
9154 #define SH_BLTIN_V4HI3 3
9155 { 4, 4, 4 },
9156 #define SH_BLTIN_V8QI3 4
9157 { 4, 4, 4 },
9158 #define SH_BLTIN_MAC_HISI 5
9159 { 1, 4, 4, 1 },
9160 #define SH_BLTIN_SH_HI 6
9161 { 4, 4, 1 },
9162 #define SH_BLTIN_SH_SI 7
9163 { 4, 4, 1 },
9164 #define SH_BLTIN_V4HI2V2SI 8
9165 { 4, 4, 4 },
9166 #define SH_BLTIN_V4HI2V8QI 9
9167 { 4, 4, 4 },
9168 #define SH_BLTIN_SISF 10
9169 { 4, 2 },
9170 #define SH_BLTIN_LDUA_L 11
9171 { 2, 10 },
9172 #define SH_BLTIN_LDUA_Q 12
9173 { 1, 10 },
9174 #define SH_BLTIN_STUA_L 13
9175 { 0, 10, 2 },
9176 #define SH_BLTIN_STUA_Q 14
9177 { 0, 10, 1 },
9178 #define SH_BLTIN_LDUA_L64 15
9179 { 2, 9 },
9180 #define SH_BLTIN_LDUA_Q64 16
9181 { 1, 9 },
9182 #define SH_BLTIN_STUA_L64 17
9183 { 0, 9, 2 },
9184 #define SH_BLTIN_STUA_Q64 18
9185 { 0, 9, 1 },
9186 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9187 #define SH_BLTIN_2 19
9188 #define SH_BLTIN_SU 19
9189 { 1, 2 },
9190 #define SH_BLTIN_3 20
9191 #define SH_BLTIN_SUS 20
9192 { 2, 2, 1 },
9193 #define SH_BLTIN_PSSV 21
9194 { 0, 8, 2, 2 },
9195 #define SH_BLTIN_XXUU 22
9196 #define SH_BLTIN_UUUU 22
9197 { 1, 1, 1, 1 },
9198 #define SH_BLTIN_PV 23
9199 { 0, 8 },
9201 /* mcmv: operands considered unsigned. */
9202 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9203 /* mperm: control value considered unsigned int. */
9204 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9205 /* mshards_q: returns signed short. */
9206 /* nsb: takes long long arg, returns unsigned char. */
9207 static const struct builtin_description bdesc[] =
9209 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9210 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9211 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9212 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9213 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9214 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9215 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9216 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9217 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9218 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9219 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9220 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9221 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9222 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9223 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9224 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9225 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9226 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9227 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9228 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9229 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9230 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9231 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9232 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9233 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9234 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9235 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9236 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9237 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9238 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9239 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9240 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9241 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9242 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9243 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9244 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9245 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9246 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9247 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9248 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9249 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9250 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9251 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9252 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9253 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9254 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9255 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9256 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9257 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9258 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9259 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9260 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9261 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9262 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9263 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9264 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9265 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9266 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9267 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9268 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9269 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9270 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9271 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9272 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9273 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9274 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9275 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9276 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9277 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9278 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9279 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9280 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9281 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9282 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9283 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9284 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9285 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9286 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9287 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9288 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9289 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9290 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9291 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9292 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9295 static void
9296 sh_media_init_builtins (void)
9298 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9299 const struct builtin_description *d;
9301 memset (shared, 0, sizeof shared);
9302 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9304 tree type, arg_type = 0;
9305 int signature = d->signature;
9306 int i;
9308 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9309 type = shared[signature];
9310 else
9312 int has_result = signature_args[signature][0] != 0;
9314 if ((signature_args[signature][1] & 8)
9315 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9316 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9317 continue;
9318 if (! TARGET_FPU_ANY
9319 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9320 continue;
9321 type = void_list_node;
9322 for (i = 3; ; i--)
9324 int arg = signature_args[signature][i];
9325 int opno = i - 1 + has_result;
9327 if (arg & 8)
9328 arg_type = ptr_type_node;
9329 else if (arg)
9330 arg_type = (*lang_hooks.types.type_for_mode)
9331 (insn_data[d->icode].operand[opno].mode,
9332 (arg & 1));
9333 else if (i)
9334 continue;
9335 else
9336 arg_type = void_type_node;
9337 if (i == 0)
9338 break;
9339 type = tree_cons (NULL_TREE, arg_type, type);
9341 type = build_function_type (arg_type, type);
9342 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9343 shared[signature] = type;
9345 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9346 NULL, NULL_TREE);
9350 /* Implements target hook vector_mode_supported_p. */
9351 bool
9352 sh_vector_mode_supported_p (enum machine_mode mode)
9354 if (TARGET_FPU_ANY
9355 && ((mode == V2SFmode)
9356 || (mode == V4SFmode)
9357 || (mode == V16SFmode)))
9358 return true;
9360 else if (TARGET_SHMEDIA
9361 && ((mode == V8QImode)
9362 || (mode == V2HImode)
9363 || (mode == V4HImode)
9364 || (mode == V2SImode)))
9365 return true;
9367 return false;
9370 /* Implements target hook dwarf_calling_convention. Return an enum
9371 of dwarf_calling_convention. */
9373 sh_dwarf_calling_convention (tree func)
9375 if (sh_attr_renesas_p (func))
9376 return DW_CC_GNU_renesas_sh;
9378 return DW_CC_normal;
9381 static void
9382 sh_init_builtins (void)
9384 if (TARGET_SHMEDIA)
9385 sh_media_init_builtins ();
9388 /* Expand an expression EXP that calls a built-in function,
9389 with result going to TARGET if that's convenient
9390 (and in mode MODE if that's convenient).
9391 SUBTARGET may be used as the target for computing one of EXP's operands.
9392 IGNORE is nonzero if the value is to be ignored. */
9394 static rtx
9395 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9396 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9398 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9399 tree arglist = TREE_OPERAND (exp, 1);
9400 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9401 const struct builtin_description *d = &bdesc[fcode];
9402 enum insn_code icode = d->icode;
9403 int signature = d->signature;
9404 enum machine_mode tmode = VOIDmode;
9405 int nop = 0, i;
9406 rtx op[4];
9407 rtx pat = 0;
9409 if (signature_args[signature][0])
9411 if (ignore)
9412 return 0;
9414 tmode = insn_data[icode].operand[0].mode;
9415 if (! target
9416 || GET_MODE (target) != tmode
9417 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9418 target = gen_reg_rtx (tmode);
9419 op[nop++] = target;
9421 else
9422 target = 0;
9424 for (i = 1; i <= 3; i++, nop++)
9426 tree arg;
9427 enum machine_mode opmode, argmode;
9428 tree optype;
9430 if (! signature_args[signature][i])
9431 break;
9432 arg = TREE_VALUE (arglist);
9433 if (arg == error_mark_node)
9434 return const0_rtx;
9435 arglist = TREE_CHAIN (arglist);
9436 if (signature_args[signature][i] & 8)
9438 opmode = ptr_mode;
9439 optype = ptr_type_node;
9441 else
9443 opmode = insn_data[icode].operand[nop].mode;
9444 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9446 argmode = TYPE_MODE (TREE_TYPE (arg));
9447 if (argmode != opmode)
9448 arg = build1 (NOP_EXPR, optype, arg);
9449 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9450 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9451 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9454 switch (nop)
9456 case 1:
9457 pat = (*insn_data[d->icode].genfun) (op[0]);
9458 break;
9459 case 2:
9460 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9461 break;
9462 case 3:
9463 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9464 break;
9465 case 4:
9466 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9467 break;
9468 default:
9469 gcc_unreachable ();
9471 if (! pat)
9472 return 0;
9473 emit_insn (pat);
9474 return target;
9477 void
9478 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9480 rtx sel0 = const0_rtx;
9481 rtx sel1 = const1_rtx;
9482 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9483 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9485 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9486 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9489 void
9490 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9492 rtx sel0 = const0_rtx;
9493 rtx sel1 = const1_rtx;
9494 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9495 = gen_binary_sf_op;
9496 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9498 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9499 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9502 /* Return the class of registers for which a mode change from FROM to TO
9503 is invalid. */
9504 bool
9505 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9506 enum reg_class class)
9508 /* We want to enable the use of SUBREGs as a means to
9509 VEC_SELECT a single element of a vector. */
9510 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9511 return (reg_classes_intersect_p (GENERAL_REGS, class));
9513 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9515 if (TARGET_LITTLE_ENDIAN)
9517 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9518 return reg_classes_intersect_p (DF_REGS, class);
9520 else
9522 if (GET_MODE_SIZE (from) < 8)
9523 return reg_classes_intersect_p (DF_HI_REGS, class);
9526 return 0;
9530 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9531 that label is used. */
9533 void
9534 sh_mark_label (rtx address, int nuses)
9536 if (GOTOFF_P (address))
9538 /* Extract the label or symbol. */
9539 address = XEXP (address, 0);
9540 if (GET_CODE (address) == PLUS)
9541 address = XEXP (address, 0);
9542 address = XVECEXP (address, 0, 0);
9544 if (GET_CODE (address) == LABEL_REF
9545 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9546 LABEL_NUSES (XEXP (address, 0)) += nuses;
9549 /* Compute extra cost of moving data between one register class
9550 and another. */
9552 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9553 uses this information. Hence, the general register <-> floating point
9554 register information here is not used for SFmode. */
9557 sh_register_move_cost (enum machine_mode mode,
9558 enum reg_class srcclass, enum reg_class dstclass)
9560 if (dstclass == T_REGS || dstclass == PR_REGS)
9561 return 10;
9563 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9564 return 4;
9566 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9567 && REGCLASS_HAS_FP_REG (srcclass)
9568 && REGCLASS_HAS_FP_REG (dstclass))
9569 return 4;
9571 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9572 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9574 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9575 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9576 return 9;
9578 if ((REGCLASS_HAS_FP_REG (dstclass)
9579 && REGCLASS_HAS_GENERAL_REG (srcclass))
9580 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9581 && REGCLASS_HAS_FP_REG (srcclass)))
9582 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9583 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9585 if ((dstclass == FPUL_REGS
9586 && REGCLASS_HAS_GENERAL_REG (srcclass))
9587 || (srcclass == FPUL_REGS
9588 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9589 return 5;
9591 if ((dstclass == FPUL_REGS
9592 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9593 || (srcclass == FPUL_REGS
9594 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9595 return 7;
9597 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9598 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9599 return 20;
9601 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9602 if (TARGET_SHMEDIA
9603 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9605 if (sh_gettrcost >= 0)
9606 return sh_gettrcost;
9607 else if (!TARGET_PT_FIXED)
9608 return 100;
9611 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9612 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9613 return 4;
9615 if (TARGET_SHMEDIA
9616 || (TARGET_FMOVD
9617 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9618 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9619 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9621 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9624 static rtx emit_load_ptr (rtx, rtx);
9626 static rtx
9627 emit_load_ptr (rtx reg, rtx addr)
9629 rtx mem = gen_const_mem (ptr_mode, addr);
9631 if (Pmode != ptr_mode)
9632 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9633 return emit_move_insn (reg, mem);
9636 static void
9637 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9638 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9639 tree function)
9641 CUMULATIVE_ARGS cum;
9642 int structure_value_byref = 0;
9643 rtx this, this_value, sibcall, insns, funexp;
9644 tree funtype = TREE_TYPE (function);
9645 int simple_add = CONST_OK_FOR_ADD (delta);
9646 int did_load = 0;
9647 rtx scratch0, scratch1, scratch2;
9648 unsigned i;
9650 reload_completed = 1;
9651 epilogue_completed = 1;
9652 no_new_pseudos = 1;
9653 current_function_uses_only_leaf_regs = 1;
9654 reset_block_changes ();
9656 emit_note (NOTE_INSN_PROLOGUE_END);
9658 /* Find the "this" pointer. We have such a wide range of ABIs for the
9659 SH that it's best to do this completely machine independently.
9660 "this" is passed as first argument, unless a structure return pointer
9661 comes first, in which case "this" comes second. */
9662 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9663 #ifndef PCC_STATIC_STRUCT_RETURN
9664 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9665 structure_value_byref = 1;
9666 #endif /* not PCC_STATIC_STRUCT_RETURN */
9667 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9669 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9671 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9673 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9675 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9676 static chain pointer (even if you can't have nested virtual functions
9677 right now, someone might implement them sometime), and the rest of the
9678 registers are used for argument passing, are callee-saved, or reserved. */
9679 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9680 -ffixed-reg has been used. */
9681 if (! call_used_regs[0] || fixed_regs[0])
9682 error ("r0 needs to be available as a call-clobbered register");
9683 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9684 if (! TARGET_SH5)
9686 if (call_used_regs[1] && ! fixed_regs[1])
9687 scratch1 = gen_rtx_REG (ptr_mode, 1);
9688 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9689 pointing where to return struct values. */
9690 if (call_used_regs[3] && ! fixed_regs[3])
9691 scratch2 = gen_rtx_REG (Pmode, 3);
9693 else if (TARGET_SHMEDIA)
9695 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9696 if (i != REGNO (scratch0) &&
9697 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9699 scratch1 = gen_rtx_REG (ptr_mode, i);
9700 break;
9702 if (scratch1 == scratch0)
9703 error ("Need a second call-clobbered general purpose register");
9704 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9705 if (call_used_regs[i] && ! fixed_regs[i])
9707 scratch2 = gen_rtx_REG (Pmode, i);
9708 break;
9710 if (scratch2 == scratch0)
9711 error ("Need a call-clobbered target register");
9714 this_value = plus_constant (this, delta);
9715 if (vcall_offset
9716 && (simple_add || scratch0 != scratch1)
9717 && strict_memory_address_p (ptr_mode, this_value))
9719 emit_load_ptr (scratch0, this_value);
9720 did_load = 1;
9723 if (!delta)
9724 ; /* Do nothing. */
9725 else if (simple_add)
9726 emit_move_insn (this, this_value);
9727 else
9729 emit_move_insn (scratch1, GEN_INT (delta));
9730 emit_insn (gen_add2_insn (this, scratch1));
9733 if (vcall_offset)
9735 rtx offset_addr;
9737 if (!did_load)
9738 emit_load_ptr (scratch0, this);
9740 offset_addr = plus_constant (scratch0, vcall_offset);
9741 if (strict_memory_address_p (ptr_mode, offset_addr))
9742 ; /* Do nothing. */
9743 else if (! TARGET_SH5 && scratch0 != scratch1)
9745 /* scratch0 != scratch1, and we have indexed loads. Get better
9746 schedule by loading the offset into r1 and using an indexed
9747 load - then the load of r1 can issue before the load from
9748 (this + delta) finishes. */
9749 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9750 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9752 else if (CONST_OK_FOR_ADD (vcall_offset))
9754 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9755 offset_addr = scratch0;
9757 else if (scratch0 != scratch1)
9759 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9760 emit_insn (gen_add2_insn (scratch0, scratch1));
9761 offset_addr = scratch0;
9763 else
9764 gcc_unreachable (); /* FIXME */
9765 emit_load_ptr (scratch0, offset_addr);
9767 if (Pmode != ptr_mode)
9768 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9769 emit_insn (gen_add2_insn (this, scratch0));
9772 /* Generate a tail call to the target function. */
9773 if (! TREE_USED (function))
9775 assemble_external (function);
9776 TREE_USED (function) = 1;
9778 funexp = XEXP (DECL_RTL (function), 0);
9779 /* If the function is overridden, so is the thunk, hence we don't
9780 need GOT addressing even if this is a public symbol. */
9781 #if 0
9782 if (TARGET_SH1 && ! flag_weak)
9783 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9784 else
9785 #endif
9786 if (TARGET_SH2 && flag_pic)
9788 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9789 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9791 else
9793 if (TARGET_SHMEDIA && flag_pic)
9795 funexp = gen_sym2PIC (funexp);
9796 PUT_MODE (funexp, Pmode);
9798 emit_move_insn (scratch2, funexp);
9799 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9800 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9802 sibcall = emit_call_insn (sibcall);
9803 SIBLING_CALL_P (sibcall) = 1;
9804 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9805 emit_barrier ();
9807 /* Run just enough of rest_of_compilation to do scheduling and get
9808 the insns emitted. Note that use_thunk calls
9809 assemble_start_function and assemble_end_function. */
9811 insn_locators_initialize ();
9812 insns = get_insns ();
9814 if (optimize > 0)
9816 /* Initialize the bitmap obstacks. */
9817 bitmap_obstack_initialize (NULL);
9818 bitmap_obstack_initialize (&reg_obstack);
9819 if (! cfun->cfg)
9820 init_flow ();
9821 rtl_register_cfg_hooks ();
9822 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9823 init_rtl_bb_info (EXIT_BLOCK_PTR);
9824 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9825 EXIT_BLOCK_PTR->flags |= BB_RTL;
9826 find_basic_blocks (insns);
9828 if (flag_schedule_insns_after_reload)
9830 life_analysis (dump_file, PROP_FINAL);
9832 split_all_insns (1);
9834 schedule_insns (dump_file);
9836 /* We must split jmp insn in PIC case. */
9837 else if (flag_pic)
9838 split_all_insns_noflow ();
9841 sh_reorg ();
9843 if (optimize > 0 && flag_delayed_branch)
9844 dbr_schedule (insns, dump_file);
9846 shorten_branches (insns);
9847 final_start_function (insns, file, 1);
9848 final (insns, file, 1);
9849 final_end_function ();
9851 if (optimize > 0)
9853 /* Release all memory allocated by flow. */
9854 free_basic_block_vars ();
9856 /* Release the bitmap obstacks. */
9857 bitmap_obstack_release (&reg_obstack);
9858 bitmap_obstack_release (NULL);
9861 reload_completed = 0;
9862 epilogue_completed = 0;
9863 no_new_pseudos = 0;
9867 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
9869 rtx sym;
9871 /* If this is not an ordinary function, the name usually comes from a
9872 string literal or an sprintf buffer. Make sure we use the same
9873 string consistently, so that cse will be able to unify address loads. */
9874 if (kind != FUNCTION_ORDINARY)
9875 name = IDENTIFIER_POINTER (get_identifier (name));
9876 sym = gen_rtx_SYMBOL_REF (Pmode, name);
9877 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9878 if (flag_pic)
9879 switch (kind)
9881 case FUNCTION_ORDINARY:
9882 break;
9883 case SFUNC_GOT:
9885 rtx reg = target ? target : gen_reg_rtx (Pmode);
9887 emit_insn (gen_symGOT2reg (reg, sym));
9888 sym = reg;
9889 break;
9891 case SFUNC_STATIC:
9893 /* ??? To allow cse to work, we use GOTOFF relocations.
9894 we could add combiner patterns to transform this into
9895 straight pc-relative calls with sym2PIC / bsrf when
9896 label load and function call are still 1:1 and in the
9897 same basic block during combine. */
9898 rtx reg = target ? target : gen_reg_rtx (Pmode);
9900 emit_insn (gen_symGOTOFF2reg (reg, sym));
9901 sym = reg;
9902 break;
9905 if (target && sym != target)
9907 emit_move_insn (target, sym);
9908 return target;
9910 return sym;
9913 /* Find the number of a general purpose register in S. */
9914 static int
9915 scavenge_reg (HARD_REG_SET *s)
9917 int r;
9918 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9919 if (TEST_HARD_REG_BIT (*s, r))
9920 return r;
9921 return -1;
9925 sh_get_pr_initial_val (void)
9927 rtx val;
9929 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9930 PR register on SHcompact, because it might be clobbered by the prologue.
9931 We check first if that is known to be the case. */
9932 if (TARGET_SHCOMPACT
9933 && ((current_function_args_info.call_cookie
9934 & ~ CALL_COOKIE_RET_TRAMP (1))
9935 || current_function_has_nonlocal_label))
9936 return gen_frame_mem (SImode, return_address_pointer_rtx);
9938 /* If we haven't finished rtl generation, there might be a nonlocal label
9939 that we haven't seen yet.
9940 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9941 is set, unless it has been called before for the same register. And even
9942 then, we end in trouble if we didn't use the register in the same
9943 basic block before. So call get_hard_reg_initial_val now and wrap it
9944 in an unspec if we might need to replace it. */
9945 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9946 combine can put the pseudo returned by get_hard_reg_initial_val into
9947 instructions that need a general purpose registers, which will fail to
9948 be recognized when the pseudo becomes allocated to PR. */
9950 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9951 if (TARGET_SH1)
9952 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9953 return val;
9957 sh_expand_t_scc (enum rtx_code code, rtx target)
9959 rtx result = target;
9960 HOST_WIDE_INT val;
9962 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9963 || GET_CODE (sh_compare_op1) != CONST_INT)
9964 return 0;
9965 if (GET_CODE (result) != REG)
9966 result = gen_reg_rtx (SImode);
9967 val = INTVAL (sh_compare_op1);
9968 if ((code == EQ && val == 1) || (code == NE && val == 0))
9969 emit_insn (gen_movt (result));
9970 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9972 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9973 emit_insn (gen_subc (result, result, result));
9974 emit_insn (gen_addsi3 (result, result, const1_rtx));
9976 else if (code == EQ || code == NE)
9977 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9978 else
9979 return 0;
9980 if (result != target)
9981 emit_move_insn (target, result);
9982 return 1;
9985 /* INSN is an sfunc; return the rtx that describes the address used. */
9986 static rtx
9987 extract_sfunc_addr (rtx insn)
9989 rtx pattern, part = NULL_RTX;
9990 int len, i;
9992 pattern = PATTERN (insn);
9993 len = XVECLEN (pattern, 0);
9994 for (i = 0; i < len; i++)
9996 part = XVECEXP (pattern, 0, i);
9997 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9998 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9999 return XEXP (part, 0);
10001 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10002 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10005 /* Verify that the register in use_sfunc_addr still agrees with the address
10006 used in the sfunc. This prevents fill_slots_from_thread from changing
10007 use_sfunc_addr.
10008 INSN is the use_sfunc_addr instruction, and REG is the register it
10009 guards. */
10011 check_use_sfunc_addr (rtx insn, rtx reg)
10013 /* Search for the sfunc. It should really come right after INSN. */
10014 while ((insn = NEXT_INSN (insn)))
10016 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10017 break;
10018 if (! INSN_P (insn))
10019 continue;
10021 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10022 insn = XVECEXP (PATTERN (insn), 0, 0);
10023 if (GET_CODE (PATTERN (insn)) != PARALLEL
10024 || get_attr_type (insn) != TYPE_SFUNC)
10025 continue;
10026 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10028 gcc_unreachable ();
10031 /* This function returns a constant rtx that represents pi / 2**15 in
10032 SFmode. it's used to scale SFmode angles, in radians, to a
10033 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10034 maps to 0x10000). */
10036 static GTY(()) rtx sh_fsca_sf2int_rtx;
10039 sh_fsca_sf2int (void)
10041 if (! sh_fsca_sf2int_rtx)
10043 REAL_VALUE_TYPE rv;
10045 real_from_string (&rv, "10430.378350470453");
10046 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10049 return sh_fsca_sf2int_rtx;
10052 /* This function returns a constant rtx that represents pi / 2**15 in
10053 DFmode. it's used to scale DFmode angles, in radians, to a
10054 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10055 maps to 0x10000). */
10057 static GTY(()) rtx sh_fsca_df2int_rtx;
10060 sh_fsca_df2int (void)
10062 if (! sh_fsca_df2int_rtx)
10064 REAL_VALUE_TYPE rv;
10066 real_from_string (&rv, "10430.378350470453");
10067 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10070 return sh_fsca_df2int_rtx;
10073 /* This function returns a constant rtx that represents 2**15 / pi in
10074 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10075 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10076 2*pi). */
10078 static GTY(()) rtx sh_fsca_int2sf_rtx;
10081 sh_fsca_int2sf (void)
10083 if (! sh_fsca_int2sf_rtx)
10085 REAL_VALUE_TYPE rv;
10087 real_from_string (&rv, "9.587379924285257e-5");
10088 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10091 return sh_fsca_int2sf_rtx;
10094 /* Initialize the CUMULATIVE_ARGS structure. */
10096 void
10097 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10098 tree fntype,
10099 rtx libname ATTRIBUTE_UNUSED,
10100 tree fndecl,
10101 signed int n_named_args,
10102 enum machine_mode mode)
10104 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10105 pcum->free_single_fp_reg = 0;
10106 pcum->stack_regs = 0;
10107 pcum->byref_regs = 0;
10108 pcum->byref = 0;
10109 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10111 /* XXX - Should we check TARGET_HITACHI here ??? */
10112 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10114 if (fntype)
10116 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10117 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10118 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10119 pcum->arg_count [(int) SH_ARG_INT]
10120 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10122 pcum->call_cookie
10123 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10124 && pcum->arg_count [(int) SH_ARG_INT] == 0
10125 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10126 ? int_size_in_bytes (TREE_TYPE (fntype))
10127 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10128 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10129 == FIRST_RET_REG));
10131 else
10133 pcum->arg_count [(int) SH_ARG_INT] = 0;
10134 pcum->prototype_p = FALSE;
10135 if (mode != VOIDmode)
10137 pcum->call_cookie =
10138 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10139 && GET_MODE_SIZE (mode) > 4
10140 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10142 /* If the default ABI is the Renesas ABI then all library
10143 calls must assume that the library will be using the
10144 Renesas ABI. So if the function would return its result
10145 in memory then we must force the address of this memory
10146 block onto the stack. Ideally we would like to call
10147 targetm.calls.return_in_memory() here but we do not have
10148 the TYPE or the FNDECL available so we synthesize the
10149 contents of that function as best we can. */
10150 pcum->force_mem =
10151 (TARGET_DEFAULT & MASK_HITACHI)
10152 && (mode == BLKmode
10153 || (GET_MODE_SIZE (mode) > 4
10154 && !(mode == DFmode
10155 && TARGET_FPU_DOUBLE)));
10157 else
10159 pcum->call_cookie = 0;
10160 pcum->force_mem = FALSE;
10165 /* Determine if two hard register sets intersect.
10166 Return 1 if they do. */
10168 static int
10169 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10171 HARD_REG_SET c;
10172 COPY_HARD_REG_SET (c, *a);
10173 AND_HARD_REG_SET (c, *b);
10174 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10175 return 1;
10176 lose:
10177 return 0;
10180 #ifdef TARGET_ADJUST_UNROLL_MAX
10181 static int
10182 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10183 int max_unrolled_insns, int strength_reduce_p,
10184 int unroll_type)
10186 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10187 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10189 /* Throttle back loop unrolling so that the costs of using more
10190 targets than the eight target register we have don't outweigh
10191 the benefits of unrolling. */
10192 rtx insn;
10193 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10194 int n_barriers = 0;
10195 rtx dest;
10196 int i;
10197 rtx exit_dest[8];
10198 int threshold;
10199 int unroll_benefit = 0, mem_latency = 0;
10200 int base_cost, best_cost, cost;
10201 int factor, best_factor;
10202 int n_dest;
10203 unsigned max_iterations = 32767;
10204 int n_iterations;
10205 int need_precond = 0, precond = 0;
10206 basic_block * bbs = get_loop_body (loop);
10207 struct niter_desc *desc;
10209 /* Assume that all labels inside the loop are used from inside the
10210 loop. If the loop has multiple entry points, it is unlikely to
10211 be unrolled anyways.
10212 Also assume that all calls are to different functions. That is
10213 somewhat pessimistic, but if you have lots of calls, unrolling the
10214 loop is not likely to gain you much in the first place. */
10215 i = loop->num_nodes - 1;
10216 for (insn = BB_HEAD (bbs[i]); ; )
10218 if (GET_CODE (insn) == CODE_LABEL)
10219 n_labels++;
10220 else if (GET_CODE (insn) == CALL_INSN)
10221 n_calls++;
10222 else if (GET_CODE (insn) == NOTE
10223 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10224 n_inner_loops++;
10225 else if (GET_CODE (insn) == BARRIER)
10226 n_barriers++;
10227 if (insn != BB_END (bbs[i]))
10228 insn = NEXT_INSN (insn);
10229 else if (--i >= 0)
10230 insn = BB_HEAD (bbs[i]);
10231 else
10232 break;
10234 free (bbs);
10235 /* One label for the loop top is normal, and it won't be duplicated by
10236 unrolling. */
10237 if (n_labels <= 1)
10238 return max_unrolled_insns;
10239 if (n_inner_loops > 0)
10240 return 0;
10241 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10242 dest = LABEL_NEXTREF (dest))
10244 for (i = n_exit_dest - 1;
10245 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10246 if (i < 0)
10247 exit_dest[n_exit_dest++] = dest;
10249 /* If the loop top and call and exit destinations are enough to fill up
10250 the target registers, we're unlikely to do any more damage by
10251 unrolling. */
10252 if (n_calls + n_exit_dest >= 7)
10253 return max_unrolled_insns;
10255 /* ??? In the new loop unroller, there is no longer any strength
10256 reduction information available. Thus, when it comes to unrolling,
10257 we know the cost of everything, but we know the value of nothing. */
10258 #if 0
10259 if (strength_reduce_p
10260 && (unroll_type == LPT_UNROLL_RUNTIME
10261 || unroll_type == LPT_UNROLL_CONSTANT
10262 || unroll_type == LPT_PEEL_COMPLETELY))
10264 struct loop_ivs *ivs = LOOP_IVS (loop);
10265 struct iv_class *bl;
10267 /* We'll save one compare-and-branch in each loop body copy
10268 but the last one. */
10269 unroll_benefit = 1;
10270 /* Assess the benefit of removing biv & giv updates. */
10271 for (bl = ivs->list; bl; bl = bl->next)
10273 rtx increment = biv_total_increment (bl);
10274 struct induction *v;
10276 if (increment && GET_CODE (increment) == CONST_INT)
10278 unroll_benefit++;
10279 for (v = bl->giv; v; v = v->next_iv)
10281 if (! v->ignore && v->same == 0
10282 && GET_CODE (v->mult_val) == CONST_INT)
10283 unroll_benefit++;
10284 /* If this giv uses an array, try to determine
10285 a maximum iteration count from the size of the
10286 array. This need not be correct all the time,
10287 but should not be too far off the mark too often. */
10288 while (v->giv_type == DEST_ADDR)
10290 rtx mem = PATTERN (v->insn);
10291 tree mem_expr, type, size_tree;
10293 if (GET_CODE (SET_SRC (mem)) == MEM)
10294 mem = SET_SRC (mem);
10295 else if (GET_CODE (SET_DEST (mem)) == MEM)
10296 mem = SET_DEST (mem);
10297 else
10298 break;
10299 mem_expr = MEM_EXPR (mem);
10300 if (! mem_expr)
10301 break;
10302 type = TREE_TYPE (mem_expr);
10303 if (TREE_CODE (type) != ARRAY_TYPE
10304 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10305 break;
10306 size_tree = fold (build (TRUNC_DIV_EXPR,
10307 bitsizetype,
10308 TYPE_SIZE (type),
10309 TYPE_SIZE_UNIT (type)));
10310 if (TREE_CODE (size_tree) == INTEGER_CST
10311 && ! TREE_INT_CST_HIGH (size_tree)
10312 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10313 max_iterations = TREE_INT_CST_LOW (size_tree);
10314 break;
10320 #else /* 0 */
10321 /* Assume there is at least some benefit. */
10322 unroll_benefit = 1;
10323 #endif /* 0 */
10325 desc = get_simple_loop_desc (loop);
10326 n_iterations = desc->const_iter ? desc->niter : 0;
10327 max_iterations
10328 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10330 if (! strength_reduce_p || ! n_iterations)
10331 need_precond = 1;
10332 if (! n_iterations)
10334 n_iterations
10335 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10336 if (! n_iterations)
10337 return 0;
10339 #if 0 /* ??? See above - missing induction variable information. */
10340 while (unroll_benefit > 1) /* no loop */
10342 /* We include the benefit of biv/ giv updates. Check if some or
10343 all of these updates are likely to fit into a scheduling
10344 bubble of a load.
10345 We check for the following case:
10346 - All the insns leading to the first JUMP_INSN are in a strict
10347 dependency chain.
10348 - there is at least one memory reference in them.
10350 When we find such a pattern, we assume that we can hide as many
10351 updates as the total of the load latency is, if we have an
10352 unroll factor of at least two. We might or might not also do
10353 this without unrolling, so rather than considering this as an
10354 extra unroll benefit, discount it in the unroll benefits of unroll
10355 factors higher than two. */
10357 rtx set, last_set;
10359 insn = next_active_insn (loop->start);
10360 last_set = single_set (insn);
10361 if (! last_set)
10362 break;
10363 if (GET_CODE (SET_SRC (last_set)) == MEM)
10364 mem_latency += 2;
10365 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10367 if (! INSN_P (insn))
10368 continue;
10369 if (GET_CODE (insn) == JUMP_INSN)
10370 break;
10371 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10373 /* Check if this is a to-be-reduced giv insn. */
10374 struct loop_ivs *ivs = LOOP_IVS (loop);
10375 struct iv_class *bl;
10376 struct induction *v;
10377 for (bl = ivs->list; bl; bl = bl->next)
10379 if (bl->biv->insn == insn)
10380 goto is_biv;
10381 for (v = bl->giv; v; v = v->next_iv)
10382 if (v->insn == insn)
10383 goto is_giv;
10385 mem_latency--;
10386 is_biv:
10387 is_giv:
10388 continue;
10390 set = single_set (insn);
10391 if (! set)
10392 continue;
10393 if (GET_CODE (SET_SRC (set)) == MEM)
10394 mem_latency += 2;
10395 last_set = set;
10397 if (mem_latency < 0)
10398 mem_latency = 0;
10399 else if (mem_latency > unroll_benefit - 1)
10400 mem_latency = unroll_benefit - 1;
10401 break;
10403 #endif /* 0 */
10404 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10405 <= unroll_benefit)
10406 return max_unrolled_insns;
10408 n_dest = n_labels + n_calls + n_exit_dest;
10409 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10410 best_cost = 0;
10411 best_factor = 1;
10412 if (n_barriers * 2 > n_labels - 1)
10413 n_barriers = (n_labels - 1) / 2;
10414 for (factor = 2; factor <= 8; factor++)
10416 /* Bump up preconditioning cost for each power of two. */
10417 if (! (factor & (factor-1)))
10418 precond += 4;
10419 /* When preconditioning, only powers of two will be considered. */
10420 else if (need_precond)
10421 continue;
10422 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10423 + (n_labels - 1) * factor + n_calls + n_exit_dest
10424 - (n_barriers * factor >> 1)
10425 + need_precond);
10426 cost
10427 = ((n_dest <= 8 ? 0 : n_dest - 7)
10428 - base_cost * factor
10429 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10430 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10431 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10432 / n_iterations));
10433 if (need_precond)
10434 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10435 if (cost < best_cost)
10437 best_cost = cost;
10438 best_factor = factor;
10441 threshold = best_factor * insn_count;
10442 if (max_unrolled_insns > threshold)
10443 max_unrolled_insns = threshold;
10445 return max_unrolled_insns;
10447 #endif /* TARGET_ADJUST_UNROLL_MAX */
10449 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10450 not enter into CONST_DOUBLE for the replace.
10452 Note that copying is not done so X must not be shared unless all copies
10453 are to be modified.
10455 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10456 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10457 replacements[n*2+1] - and that we take mode changes into account.
10459 If a replacement is ambiguous, return NULL_RTX.
10461 If MODIFY is zero, don't modify any rtl in place,
10462 just return zero or nonzero for failure / success. */
10465 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10467 int i, j;
10468 const char *fmt;
10470 /* The following prevents loops occurrence when we change MEM in
10471 CONST_DOUBLE onto the same CONST_DOUBLE. */
10472 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10473 return x;
10475 for (i = n_replacements - 1; i >= 0 ; i--)
10476 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10477 return replacements[i*2+1];
10479 /* Allow this function to make replacements in EXPR_LISTs. */
10480 if (x == 0)
10481 return 0;
10483 if (GET_CODE (x) == SUBREG)
10485 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10486 n_replacements, modify);
10488 if (GET_CODE (new) == CONST_INT)
10490 x = simplify_subreg (GET_MODE (x), new,
10491 GET_MODE (SUBREG_REG (x)),
10492 SUBREG_BYTE (x));
10493 if (! x)
10494 abort ();
10496 else if (modify)
10497 SUBREG_REG (x) = new;
10499 return x;
10501 else if (GET_CODE (x) == REG)
10503 unsigned regno = REGNO (x);
10504 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10505 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10506 rtx result = NULL_RTX;
10508 for (i = n_replacements - 1; i >= 0; i--)
10510 rtx from = replacements[i*2];
10511 rtx to = replacements[i*2+1];
10512 unsigned from_regno, from_nregs, to_regno, new_regno;
10514 if (GET_CODE (from) != REG)
10515 continue;
10516 from_regno = REGNO (from);
10517 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10518 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10519 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10521 if (regno < from_regno
10522 || regno + nregs > from_regno + nregs
10523 || GET_CODE (to) != REG
10524 || result)
10525 return NULL_RTX;
10526 to_regno = REGNO (to);
10527 if (to_regno < FIRST_PSEUDO_REGISTER)
10529 new_regno = regno + to_regno - from_regno;
10530 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10531 != nregs)
10532 return NULL_RTX;
10533 result = gen_rtx_REG (GET_MODE (x), new_regno);
10535 else if (GET_MODE (x) <= GET_MODE (to))
10536 result = gen_lowpart_common (GET_MODE (x), to);
10537 else
10538 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10541 return result ? result : x;
10543 else if (GET_CODE (x) == ZERO_EXTEND)
10545 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10546 n_replacements, modify);
10548 if (GET_CODE (new) == CONST_INT)
10550 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10551 new, GET_MODE (XEXP (x, 0)));
10552 if (! x)
10553 abort ();
10555 else if (modify)
10556 XEXP (x, 0) = new;
10558 return x;
10561 fmt = GET_RTX_FORMAT (GET_CODE (x));
10562 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10564 rtx new;
10566 if (fmt[i] == 'e')
10568 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10569 n_replacements, modify);
10570 if (!new)
10571 return NULL_RTX;
10572 if (modify)
10573 XEXP (x, i) = new;
10575 else if (fmt[i] == 'E')
10576 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10578 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10579 n_replacements, modify);
10580 if (!new)
10581 return NULL_RTX;
10582 if (modify)
10583 XVECEXP (x, i, j) = new;
10587 return x;
10591 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10593 enum rtx_code code = TRUNCATE;
10595 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10597 rtx inner = XEXP (x, 0);
10598 enum machine_mode inner_mode = GET_MODE (inner);
10600 if (inner_mode == mode)
10601 return inner;
10602 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10603 x = inner;
10604 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10605 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10607 code = GET_CODE (x);
10608 x = inner;
10611 return gen_rtx_fmt_e (code, mode, x);
10614 /* called via for_each_rtx after reload, to clean up truncates of
10615 registers that span multiple actual hard registers. */
10617 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10619 rtx x = *p, reg;
10621 if (GET_CODE (x) != TRUNCATE)
10622 return 0;
10623 reg = XEXP (x, 0);
10624 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10626 enum machine_mode reg_mode = GET_MODE (reg);
10627 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10628 subreg_lowpart_offset (DImode, reg_mode));
10629 *(int*) n_changes += 1;
10630 return -1;
10632 return 0;
10635 /* Load and store depend on the highpart of the address. However,
10636 set_attr_alternative does not give well-defined results before reload,
10637 so we must look at the rtl ourselves to see if any of the feeding
10638 registers is used in a memref. */
10640 /* Called by sh_contains_memref_p via for_each_rtx. */
10641 static int
10642 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10644 return (GET_CODE (*loc) == MEM);
10647 /* Return nonzero iff INSN contains a MEM. */
10649 sh_contains_memref_p (rtx insn)
10651 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10654 /* FNADDR is the MEM expression from a call expander. Return an address
10655 to use in an SHmedia insn pattern. */
10657 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10659 int is_sym;
10661 fnaddr = XEXP (fnaddr, 0);
10662 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10663 if (flag_pic && is_sym)
10665 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10667 rtx reg = gen_reg_rtx (Pmode);
10669 /* We must not use GOTPLT for sibcalls, because PIC_REG
10670 must be restored before the PLT code gets to run. */
10671 if (is_sibcall)
10672 emit_insn (gen_symGOT2reg (reg, fnaddr));
10673 else
10674 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10675 fnaddr = reg;
10677 else
10679 fnaddr = gen_sym2PIC (fnaddr);
10680 PUT_MODE (fnaddr, Pmode);
10683 /* If ptabs might trap, make this visible to the rest of the compiler.
10684 We generally assume that symbols pertain to valid locations, but
10685 it is possible to generate invalid symbols with asm or linker tricks.
10686 In a list of functions where each returns its successor, an invalid
10687 symbol might denote an empty list. */
10688 if (!TARGET_PT_FIXED
10689 && (!is_sym || TARGET_INVALID_SYMBOLS)
10690 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10692 rtx tr = gen_reg_rtx (PDImode);
10694 emit_insn (gen_ptabs (tr, fnaddr));
10695 fnaddr = tr;
10697 else if (! target_reg_operand (fnaddr, Pmode))
10698 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10699 return fnaddr;
10702 /* ??? insn-conditions.c contains the insn conditions from sh.md,
10703 but does not include tree.h. This is fixed in 4.2 20060127. */
10704 bool
10705 sh_cfun_trap_exit_p (void)
10707 return (lookup_attribute ("trap_exit",
10708 DECL_ATTRIBUTES (current_function_decl))
10709 == NULL_TREE);
10712 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10714 /* This defines the storage for the variable part of a -mboard= option.
10715 It is only required when using the sh-superh-elf target */
10716 #ifdef _SUPERH_H
10717 const char * boardtype = "7750p2";
10718 const char * osruntime = "bare";
10719 #endif
10721 #include "gt-sh.h"