1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
28 #include "insn-config.h"
36 #include "hard-reg-set.h"
38 #include "insn-attr.h"
42 #include "integrate.h"
46 #include "target-def.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
52 #include "sched-int.h"
54 #include "tree-gimple.h"
58 int code_for_indirect_jump_scratch
= CODE_FOR_indirect_jump_scratch
;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt
;
73 tree sh_deferred_function_attributes
;
74 tree
*sh_deferred_function_attributes_tail
= &sh_deferred_function_attributes
;
76 /* Global variables for machine-dependent things. */
78 /* Which cpu are we scheduling for. */
79 enum processor_type sh_cpu
;
81 /* Definitions used in ready queue reordering for first scheduling pass. */
83 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
84 static short *regmode_weight
[2];
86 /* Total SFmode and SImode weights of scheduled insns. */
87 static int curr_regmode_pressure
[2];
89 /* If true, skip cycles for Q -> R movement. */
90 static int skip_cycles
= 0;
92 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
93 and returned from sh_reorder2. */
94 static short cached_can_issue_more
;
96 /* Saved operands from the last compare to use when we generate an scc
102 /* Provides the class number of the smallest class containing
105 enum reg_class regno_reg_class
[FIRST_PSEUDO_REGISTER
] =
107 R0_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
108 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
109 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
110 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
111 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
112 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
113 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
114 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
115 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
116 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
117 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
118 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
119 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
120 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
121 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
122 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
123 FP0_REGS
,FP_REGS
, FP_REGS
, FP_REGS
,
124 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
125 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
126 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
127 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
128 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
129 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
130 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
131 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
132 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
133 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
134 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
135 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
136 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
137 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
138 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
,
139 TARGET_REGS
, TARGET_REGS
, TARGET_REGS
, TARGET_REGS
,
140 TARGET_REGS
, TARGET_REGS
, TARGET_REGS
, TARGET_REGS
,
141 DF_REGS
, DF_REGS
, DF_REGS
, DF_REGS
,
142 DF_REGS
, DF_REGS
, DF_REGS
, DF_REGS
,
143 NO_REGS
, GENERAL_REGS
, PR_REGS
, T_REGS
,
144 MAC_REGS
, MAC_REGS
, FPUL_REGS
, FPSCR_REGS
,
145 GENERAL_REGS
, GENERAL_REGS
,
148 char sh_register_names
[FIRST_PSEUDO_REGISTER
] \
149 [MAX_REGISTER_NAME_LENGTH
+ 1] = SH_REGISTER_NAMES_INITIALIZER
;
151 char sh_additional_register_names
[ADDREGNAMES_SIZE
] \
152 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH
+ 1]
153 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER
;
155 /* Provide reg_class from a letter such as appears in the machine
156 description. *: target independently reserved letter.
157 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
159 enum reg_class reg_class_from_letter
[] =
161 /* a */ ALL_REGS
, /* b */ TARGET_REGS
, /* c */ FPSCR_REGS
, /* d */ DF_REGS
,
162 /* e */ FP_REGS
, /* f */ FP_REGS
, /* g **/ NO_REGS
, /* h */ NO_REGS
,
163 /* i **/ NO_REGS
, /* j */ NO_REGS
, /* k */ SIBCALL_REGS
, /* l */ PR_REGS
,
164 /* m **/ NO_REGS
, /* n **/ NO_REGS
, /* o **/ NO_REGS
, /* p **/ NO_REGS
,
165 /* q */ NO_REGS
, /* r **/ NO_REGS
, /* s **/ NO_REGS
, /* t */ T_REGS
,
166 /* u */ NO_REGS
, /* v */ NO_REGS
, /* w */ FP0_REGS
, /* x */ MAC_REGS
,
167 /* y */ FPUL_REGS
, /* z */ R0_REGS
170 int assembler_dialect
;
172 static bool shmedia_space_reserved_for_target_registers
;
174 static bool sh_handle_option (size_t, const char *, int);
175 static void split_branches (rtx
);
176 static int branch_dest (rtx
);
177 static void force_into (rtx
, rtx
);
178 static void print_slot (rtx
);
179 static rtx
add_constant (rtx
, enum machine_mode
, rtx
);
180 static void dump_table (rtx
, rtx
);
181 static int hi_const (rtx
);
182 static int broken_move (rtx
);
183 static int mova_p (rtx
);
184 static rtx
find_barrier (int, rtx
, rtx
);
185 static int noncall_uses_reg (rtx
, rtx
, rtx
*);
186 static rtx
gen_block_redirect (rtx
, int, int);
187 static void sh_reorg (void);
188 static void output_stack_adjust (int, rtx
, int, HARD_REG_SET
*);
189 static rtx
frame_insn (rtx
);
190 static rtx
push (int);
191 static void pop (int);
192 static void push_regs (HARD_REG_SET
*, int);
193 static int calc_live_regs (HARD_REG_SET
*);
194 static void mark_use (rtx
, rtx
*);
195 static HOST_WIDE_INT
rounded_frame_size (int);
196 static rtx
mark_constant_pool_use (rtx
);
197 const struct attribute_spec sh_attribute_table
[];
198 static tree
sh_handle_interrupt_handler_attribute (tree
*, tree
, tree
, int, bool *);
199 static tree
sh_handle_sp_switch_attribute (tree
*, tree
, tree
, int, bool *);
200 static tree
sh_handle_trap_exit_attribute (tree
*, tree
, tree
, int, bool *);
201 static tree
sh_handle_renesas_attribute (tree
*, tree
, tree
, int, bool *);
202 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT
);
203 static void sh_insert_attributes (tree
, tree
*);
204 static const char *sh_check_pch_target_flags (int);
205 static int sh_adjust_cost (rtx
, rtx
, rtx
, int);
206 static int sh_issue_rate (void);
207 static int sh_dfa_new_cycle (FILE *, int, rtx
, int, int, int *sort_p
);
208 static short find_set_regmode_weight (rtx
, enum machine_mode
);
209 static short find_insn_regmode_weight (rtx
, enum machine_mode
);
210 static void find_regmode_weight (int, enum machine_mode
);
211 static void sh_md_init_global (FILE *, int, int);
212 static void sh_md_finish_global (FILE *, int);
213 static int rank_for_reorder (const void *, const void *);
214 static void swap_reorder (rtx
*, int);
215 static void ready_reorder (rtx
*, int);
216 static short high_pressure (enum machine_mode
);
217 static int sh_reorder (FILE *, int, rtx
*, int *, int);
218 static int sh_reorder2 (FILE *, int, rtx
*, int *, int);
219 static void sh_md_init (FILE *, int, int);
220 static int sh_variable_issue (FILE *, int, rtx
, int);
222 static bool sh_function_ok_for_sibcall (tree
, tree
);
224 static bool sh_cannot_modify_jumps_p (void);
225 static int sh_target_reg_class (void);
226 static bool sh_optimize_target_register_callee_saved (bool);
227 static bool sh_ms_bitfield_layout_p (tree
);
229 static void sh_init_builtins (void);
230 static void sh_media_init_builtins (void);
231 static rtx
sh_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
232 static void sh_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
233 static void sh_file_start (void);
234 static int flow_dependent_p (rtx
, rtx
);
235 static void flow_dependent_p_1 (rtx
, rtx
, void *);
236 static int shiftcosts (rtx
);
237 static int andcosts (rtx
);
238 static int addsubcosts (rtx
);
239 static int multcosts (rtx
);
240 static bool unspec_caller_rtx_p (rtx
);
241 static bool sh_cannot_copy_insn_p (rtx
);
242 static bool sh_rtx_costs (rtx
, int, int, int *);
243 static int sh_address_cost (rtx
);
244 #ifdef TARGET_ADJUST_UNROLL_MAX
245 static int sh_adjust_unroll_max (struct loop
*, int, int, int, int);
247 static int sh_pr_n_sets (void);
248 static rtx
sh_allocate_initial_value (rtx
);
249 static int shmedia_target_regs_stack_space (HARD_REG_SET
*);
250 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET
*);
251 static int shmedia_target_regs_stack_adjust (HARD_REG_SET
*);
252 static int scavenge_reg (HARD_REG_SET
*s
);
253 struct save_schedule_s
;
254 static struct save_entry_s
*sh5_schedule_saves (HARD_REG_SET
*,
255 struct save_schedule_s
*, int);
257 static rtx
sh_struct_value_rtx (tree
, int);
258 static bool sh_return_in_memory (tree
, tree
);
259 static rtx
sh_builtin_saveregs (void);
260 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
, tree
, int *, int);
261 static bool sh_strict_argument_naming (CUMULATIVE_ARGS
*);
262 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS
*);
263 static tree
sh_build_builtin_va_list (void);
264 static tree
sh_gimplify_va_arg_expr (tree
, tree
, tree
*, tree
*);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
267 static bool sh_callee_copies (CUMULATIVE_ARGS
*, enum machine_mode
,
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
271 static int sh_dwarf_calling_convention (tree
);
272 static int hard_regs_intersect_p (HARD_REG_SET
*, HARD_REG_SET
*);
275 /* Initialize the GCC target structure. */
276 #undef TARGET_ATTRIBUTE_TABLE
277 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
279 /* The next two are used for debug info when compiling with -gdwarf. */
280 #undef TARGET_ASM_UNALIGNED_HI_OP
281 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
282 #undef TARGET_ASM_UNALIGNED_SI_OP
283 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
285 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
286 #undef TARGET_ASM_UNALIGNED_DI_OP
287 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
288 #undef TARGET_ASM_ALIGNED_DI_OP
289 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
291 #undef TARGET_ASM_FUNCTION_EPILOGUE
292 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
294 #undef TARGET_ASM_OUTPUT_MI_THUNK
295 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
297 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
298 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
300 #undef TARGET_ASM_FILE_START
301 #define TARGET_ASM_FILE_START sh_file_start
302 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
303 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
305 #undef TARGET_DEFAULT_TARGET_FLAGS
306 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
307 #undef TARGET_HANDLE_OPTION
308 #define TARGET_HANDLE_OPTION sh_handle_option
310 #undef TARGET_INSERT_ATTRIBUTES
311 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
313 #undef TARGET_SCHED_ADJUST_COST
314 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
316 #undef TARGET_SCHED_ISSUE_RATE
317 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
319 /* The next 5 hooks have been implemented for reenabling sched1. With the
320 help of these macros we are limiting the movement of insns in sched1 to
321 reduce the register pressure. The overall idea is to keep count of SImode
322 and SFmode regs required by already scheduled insns. When these counts
323 cross some threshold values; give priority to insns that free registers.
324 The insn that frees registers is most likely to be the insn with lowest
325 LUID (original insn order); but such an insn might be there in the stalled
326 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
327 upto a max of 8 cycles so that such insns may move from Q -> R.
329 The description of the hooks are as below:
331 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
332 scheduler; it is called inside the sched_init function just after
333 find_insn_reg_weights function call. It is used to calculate the SImode
334 and SFmode weights of insns of basic blocks; much similar to what
335 find_insn_reg_weights does.
336 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
338 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
339 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
342 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
343 high; reorder the ready queue so that the insn with lowest LUID will be
346 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
347 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
349 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
350 can be returned from TARGET_SCHED_REORDER2.
352 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
354 #undef TARGET_SCHED_DFA_NEW_CYCLE
355 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
357 #undef TARGET_SCHED_INIT_GLOBAL
358 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
360 #undef TARGET_SCHED_FINISH_GLOBAL
361 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
363 #undef TARGET_SCHED_VARIABLE_ISSUE
364 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
366 #undef TARGET_SCHED_REORDER
367 #define TARGET_SCHED_REORDER sh_reorder
369 #undef TARGET_SCHED_REORDER2
370 #define TARGET_SCHED_REORDER2 sh_reorder2
372 #undef TARGET_SCHED_INIT
373 #define TARGET_SCHED_INIT sh_md_init
375 #undef TARGET_CANNOT_MODIFY_JUMPS_P
376 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
377 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
378 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
379 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
380 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
381 sh_optimize_target_register_callee_saved
383 #undef TARGET_MS_BITFIELD_LAYOUT_P
384 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
386 #undef TARGET_INIT_BUILTINS
387 #define TARGET_INIT_BUILTINS sh_init_builtins
388 #undef TARGET_EXPAND_BUILTIN
389 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
391 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
392 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
394 #undef TARGET_CANNOT_COPY_INSN_P
395 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
396 #undef TARGET_RTX_COSTS
397 #define TARGET_RTX_COSTS sh_rtx_costs
398 #undef TARGET_ADDRESS_COST
399 #define TARGET_ADDRESS_COST sh_address_cost
400 #undef TARGET_ALLOCATE_INITIAL_VALUE
401 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
403 #undef TARGET_MACHINE_DEPENDENT_REORG
404 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
407 #undef TARGET_HAVE_TLS
408 #define TARGET_HAVE_TLS true
411 #undef TARGET_PROMOTE_PROTOTYPES
412 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
413 #undef TARGET_PROMOTE_FUNCTION_ARGS
414 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
415 #undef TARGET_PROMOTE_FUNCTION_RETURN
416 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
418 #undef TARGET_STRUCT_VALUE_RTX
419 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
420 #undef TARGET_RETURN_IN_MEMORY
421 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
423 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
424 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
425 #undef TARGET_SETUP_INCOMING_VARARGS
426 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
427 #undef TARGET_STRICT_ARGUMENT_NAMING
428 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
429 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
430 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
431 #undef TARGET_MUST_PASS_IN_STACK
432 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
433 #undef TARGET_PASS_BY_REFERENCE
434 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
435 #undef TARGET_CALLEE_COPIES
436 #define TARGET_CALLEE_COPIES sh_callee_copies
437 #undef TARGET_ARG_PARTIAL_BYTES
438 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
440 #undef TARGET_BUILD_BUILTIN_VA_LIST
441 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
442 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
443 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
445 #undef TARGET_VECTOR_MODE_SUPPORTED_P
446 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
448 #undef TARGET_CHECK_PCH_TARGET_FLAGS
449 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
451 #undef TARGET_DWARF_CALLING_CONVENTION
452 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
454 /* Return regmode weight for insn. */
455 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
457 /* Return current register pressure for regmode. */
458 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
462 #undef TARGET_ENCODE_SECTION_INFO
463 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
464 #undef TARGET_STRIP_NAME_ENCODING
465 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
466 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
467 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
471 #ifdef TARGET_ADJUST_UNROLL_MAX
472 #undef TARGET_ADJUST_UNROLL_MAX
473 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
476 struct gcc_target targetm
= TARGET_INITIALIZER
;
478 /* Implement TARGET_HANDLE_OPTION. */
481 sh_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
,
482 int value ATTRIBUTE_UNUSED
)
487 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH1
;
491 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2
;
495 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2A
;
499 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2A_NOFPU
;
503 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2A_SINGLE
;
506 case OPT_m2a_single_only
:
507 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2A_SINGLE_ONLY
;
511 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH2E
;
515 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH3
;
519 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH3E
;
523 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4
;
527 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4_NOFPU
;
531 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4_SINGLE
;
534 case OPT_m4_single_only
:
535 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4_SINGLE_ONLY
;
539 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4A
;
544 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4A_NOFPU
;
548 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4A_SINGLE
;
551 case OPT_m4a_single_only
:
552 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH4A_SINGLE_ONLY
;
556 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_32MEDIA
;
559 case OPT_m5_32media_nofpu
:
560 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_32MEDIA_NOFPU
;
564 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_64MEDIA
;
567 case OPT_m5_64media_nofpu
:
568 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_64MEDIA_NOFPU
;
572 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_COMPACT
;
575 case OPT_m5_compact_nofpu
:
576 target_flags
= (target_flags
& ~MASK_ARCH
) | SELECT_SH5_COMPACT_NOFPU
;
584 /* Print the operand address in x to the stream. */
587 print_operand_address (FILE *stream
, rtx x
)
589 switch (GET_CODE (x
))
593 fprintf (stream
, "@%s", reg_names
[true_regnum (x
)]);
598 rtx base
= XEXP (x
, 0);
599 rtx index
= XEXP (x
, 1);
601 switch (GET_CODE (index
))
604 fprintf (stream
, "@(%d,%s)", (int) INTVAL (index
),
605 reg_names
[true_regnum (base
)]);
611 int base_num
= true_regnum (base
);
612 int index_num
= true_regnum (index
);
614 fprintf (stream
, "@(r0,%s)",
615 reg_names
[MAX (base_num
, index_num
)]);
626 fprintf (stream
, "@-%s", reg_names
[true_regnum (XEXP (x
, 0))]);
630 fprintf (stream
, "@%s+", reg_names
[true_regnum (XEXP (x
, 0))]);
634 x
= mark_constant_pool_use (x
);
635 output_addr_const (stream
, x
);
640 /* Print operand x (an rtx) in assembler syntax to file stream
641 according to modifier code.
643 '.' print a .s if insn needs delay slot
644 ',' print LOCAL_LABEL_PREFIX
645 '@' print trap, rte or rts depending upon pragma interruptness
646 '#' output a nop if there is nothing to put in the delay slot
647 ''' print likelihood suffix (/u for unlikely).
648 '>' print branch target if -fverbose-asm
649 'O' print a constant without the #
650 'R' print the LSW of a dp value - changes if in little endian
651 'S' print the MSW of a dp value - changes if in little endian
652 'T' print the next word of a dp value - same as 'R' in big endian mode.
653 'M' print an `x' if `m' will print `base,index'.
654 'N' print 'r63' if the operand is (const_int 0).
655 'd' print a V2SF reg as dN instead of fpN.
656 'm' print a pair `base,offset' or `base,index', for LD and ST.
657 'U' Likewise for {LD,ST}{HI,LO}.
658 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
659 'o' output an operator. */
662 print_operand (FILE *stream
, rtx x
, int code
)
665 enum machine_mode mode
;
673 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))
674 && get_attr_length (XVECEXP (final_sequence
, 0, 1)))
675 fprintf (stream
, ASSEMBLER_DIALECT
? "/s" : ".s");
678 fprintf (stream
, "%s", LOCAL_LABEL_PREFIX
);
681 trapa_attr
= lookup_attribute ("trap_exit",
682 DECL_ATTRIBUTES (current_function_decl
));
684 fprintf (stream
, "trapa #%ld",
685 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr
))));
686 else if (sh_cfun_interrupt_handler_p ())
687 fprintf (stream
, "rte");
689 fprintf (stream
, "rts");
692 /* Output a nop if there's nothing in the delay slot. */
693 if (dbr_sequence_length () == 0)
694 fprintf (stream
, "\n\tnop");
698 rtx note
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
700 if (note
&& INTVAL (XEXP (note
, 0)) * 2 < REG_BR_PROB_BASE
)
701 fputs ("/u", stream
);
705 if (flag_verbose_asm
&& JUMP_LABEL (current_output_insn
))
707 fputs ("\t! target: ", stream
);
708 output_addr_const (stream
, JUMP_LABEL (current_output_insn
));
712 x
= mark_constant_pool_use (x
);
713 output_addr_const (stream
, x
);
715 /* N.B.: %R / %S / %T adjust memory addresses by four.
716 For SHMEDIA, that means they can be used to access the first and
717 second 32 bit part of a 64 bit (or larger) value that
718 might be held in floating point registers or memory.
719 While they can be used to access 64 bit parts of a larger value
720 held in general purpose registers, that won't work with memory -
721 neither for fp registers, since the frxx names are used. */
723 if (REG_P (x
) || GET_CODE (x
) == SUBREG
)
725 regno
= true_regnum (x
);
726 regno
+= FP_REGISTER_P (regno
) ? 1 : LSW
;
727 fputs (reg_names
[regno
], (stream
));
731 x
= adjust_address (x
, SImode
, 4 * LSW
);
732 print_operand_address (stream
, XEXP (x
, 0));
739 if (mode
== VOIDmode
)
741 if (GET_MODE_SIZE (mode
) >= 8)
742 sub
= simplify_subreg (SImode
, x
, mode
, 4 * LSW
);
744 print_operand (stream
, sub
, 0);
746 output_operand_lossage ("invalid operand to %%R");
750 if (REG_P (x
) || GET_CODE (x
) == SUBREG
)
752 regno
= true_regnum (x
);
753 regno
+= FP_REGISTER_P (regno
) ? 0 : MSW
;
754 fputs (reg_names
[regno
], (stream
));
758 x
= adjust_address (x
, SImode
, 4 * MSW
);
759 print_operand_address (stream
, XEXP (x
, 0));
766 if (mode
== VOIDmode
)
768 if (GET_MODE_SIZE (mode
) >= 8)
769 sub
= simplify_subreg (SImode
, x
, mode
, 4 * MSW
);
771 print_operand (stream
, sub
, 0);
773 output_operand_lossage ("invalid operand to %%S");
777 /* Next word of a double. */
778 switch (GET_CODE (x
))
781 fputs (reg_names
[REGNO (x
) + 1], (stream
));
784 if (GET_CODE (XEXP (x
, 0)) != PRE_DEC
785 && GET_CODE (XEXP (x
, 0)) != POST_INC
)
786 x
= adjust_address (x
, SImode
, 4);
787 print_operand_address (stream
, XEXP (x
, 0));
794 switch (GET_CODE (x
))
796 case PLUS
: fputs ("add", stream
); break;
797 case MINUS
: fputs ("sub", stream
); break;
798 case MULT
: fputs ("mul", stream
); break;
799 case DIV
: fputs ("div", stream
); break;
800 case EQ
: fputs ("eq", stream
); break;
801 case NE
: fputs ("ne", stream
); break;
802 case GT
: case LT
: fputs ("gt", stream
); break;
803 case GE
: case LE
: fputs ("ge", stream
); break;
804 case GTU
: case LTU
: fputs ("gtu", stream
); break;
805 case GEU
: case LEU
: fputs ("geu", stream
); break;
811 if (GET_CODE (x
) == MEM
812 && GET_CODE (XEXP (x
, 0)) == PLUS
813 && (GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
814 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == SUBREG
))
819 gcc_assert (GET_CODE (x
) == MEM
);
823 switch (GET_CODE (x
))
827 print_operand (stream
, x
, 0);
828 fputs (", 0", stream
);
832 print_operand (stream
, XEXP (x
, 0), 0);
833 fputs (", ", stream
);
834 print_operand (stream
, XEXP (x
, 1), 0);
843 gcc_assert (GET_CODE (x
) == REG
&& GET_MODE (x
) == V2SFmode
);
845 fprintf ((stream
), "d%s", reg_names
[REGNO (x
)] + 1);
849 if (x
== CONST0_RTX (GET_MODE (x
)))
851 fprintf ((stream
), "r63");
856 if (GET_CODE (x
) == CONST_INT
)
858 fprintf ((stream
), "%u", (unsigned) INTVAL (x
) & (0x10000 - 1));
868 switch (GET_CODE (x
))
872 rtx inner
= XEXP (x
, 0);
874 enum machine_mode inner_mode
;
876 /* We might see SUBREGs with vector mode registers inside. */
877 if (GET_CODE (inner
) == SUBREG
878 && (GET_MODE_SIZE (GET_MODE (inner
))
879 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner
))))
880 && subreg_lowpart_p (inner
))
881 inner
= SUBREG_REG (inner
);
882 if (GET_CODE (inner
) == CONST_INT
)
884 x
= GEN_INT (trunc_int_for_mode (INTVAL (inner
), GET_MODE (x
)));
887 inner_mode
= GET_MODE (inner
);
888 if (GET_CODE (inner
) == SUBREG
889 && (GET_MODE_SIZE (GET_MODE (inner
))
890 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner
))))
891 && GET_CODE (SUBREG_REG (inner
)) == REG
)
893 offset
= subreg_regno_offset (REGNO (SUBREG_REG (inner
)),
894 GET_MODE (SUBREG_REG (inner
)),
897 inner
= SUBREG_REG (inner
);
899 if (GET_CODE (inner
) != REG
|| GET_MODE_SIZE (inner_mode
) > 8)
901 /* Floating point register pairs are always big endian;
902 general purpose registers are 64 bit wide. */
903 regno
= REGNO (inner
);
904 regno
= (HARD_REGNO_NREGS (regno
, inner_mode
)
905 - HARD_REGNO_NREGS (regno
, mode
))
913 /* FIXME: We need this on SHmedia32 because reload generates
914 some sign-extended HI or QI loads into DImode registers
915 but, because Pmode is SImode, the address ends up with a
916 subreg:SI of the DImode register. Maybe reload should be
917 fixed so as to apply alter_subreg to such loads? */
919 gcc_assert (trapping_target_operand (x
, VOIDmode
));
920 x
= XEXP (XEXP (x
, 2), 0);
923 gcc_assert (SUBREG_BYTE (x
) == 0
924 && GET_CODE (SUBREG_REG (x
)) == REG
);
932 if (FP_REGISTER_P (regno
)
933 && mode
== V16SFmode
)
934 fprintf ((stream
), "mtrx%s", reg_names
[regno
] + 2);
935 else if (FP_REGISTER_P (REGNO (x
))
937 fprintf ((stream
), "fv%s", reg_names
[regno
] + 2);
938 else if (GET_CODE (x
) == REG
940 fprintf ((stream
), "fp%s", reg_names
[regno
] + 2);
941 else if (FP_REGISTER_P (REGNO (x
))
942 && GET_MODE_SIZE (mode
) > 4)
943 fprintf ((stream
), "d%s", reg_names
[regno
] + 1);
945 fputs (reg_names
[regno
], (stream
));
949 output_address (XEXP (x
, 0));
954 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
955 && (GET_MODE (XEXP (x
, 0)) == DImode
956 || GET_MODE (XEXP (x
, 0)) == SImode
)
957 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == TRUNCATE
958 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == HImode
)
960 rtx val
= XEXP (XEXP (XEXP (x
, 0), 0), 0);
963 if (GET_CODE (val
) == ASHIFTRT
)
966 if (GET_CODE (XEXP (val
, 0)) == CONST
)
968 output_addr_const (stream
, XEXP (val
, 0));
969 if (GET_CODE (XEXP (val
, 0)) == CONST
)
971 fputs (" >> ", stream
);
972 output_addr_const (stream
, XEXP (val
, 1));
977 if (GET_CODE (val
) == CONST
)
979 output_addr_const (stream
, val
);
980 if (GET_CODE (val
) == CONST
)
983 fputs (" & 65535)", stream
);
991 output_addr_const (stream
, x
);
998 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1000 force_into (rtx value
, rtx target
)
1002 value
= force_operand (value
, target
);
1003 if (! rtx_equal_p (value
, target
))
1004 emit_insn (gen_move_insn (target
, value
));
1007 /* Emit code to perform a block move. Choose the best method.
1009 OPERANDS[0] is the destination.
1010 OPERANDS[1] is the source.
1011 OPERANDS[2] is the size.
1012 OPERANDS[3] is the alignment safe to use. */
1015 expand_block_move (rtx
*operands
)
1017 int align
= INTVAL (operands
[3]);
1018 int constp
= (GET_CODE (operands
[2]) == CONST_INT
);
1019 int bytes
= (constp
? INTVAL (operands
[2]) : 0);
1024 /* If we could use mov.l to move words and dest is word-aligned, we
1025 can use movua.l for loads and still generate a relatively short
1026 and efficient sequence. */
1027 if (TARGET_SH4A_ARCH
&& align
< 4
1028 && MEM_ALIGN (operands
[0]) >= 32
1029 && can_move_by_pieces (bytes
, 32))
1031 rtx dest
= copy_rtx (operands
[0]);
1032 rtx src
= copy_rtx (operands
[1]);
1033 /* We could use different pseudos for each copied word, but
1034 since movua can only load into r0, it's kind of
1036 rtx temp
= gen_reg_rtx (SImode
);
1037 rtx src_addr
= copy_addr_to_reg (XEXP (src
, 0));
1040 while (copied
+ 4 <= bytes
)
1042 rtx to
= adjust_address (dest
, SImode
, copied
);
1043 rtx from
= adjust_automodify_address (src
, SImode
, src_addr
, copied
);
1045 emit_insn (gen_movua (temp
, from
));
1046 emit_move_insn (src_addr
, plus_constant (src_addr
, 4));
1047 emit_move_insn (to
, temp
);
1052 move_by_pieces (adjust_address (dest
, BLKmode
, copied
),
1053 adjust_automodify_address (src
, BLKmode
,
1055 bytes
- copied
, align
, 0);
1060 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1061 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1062 if (align
< 4 || (bytes
% 4 != 0))
1065 if (TARGET_HARD_SH4
)
1069 else if (bytes
== 12)
1071 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
1072 rtx r4
= gen_rtx_REG (SImode
, 4);
1073 rtx r5
= gen_rtx_REG (SImode
, 5);
1075 function_symbol (func_addr_rtx
, "__movmemSI12_i4", SFUNC_STATIC
);
1076 force_into (XEXP (operands
[0], 0), r4
);
1077 force_into (XEXP (operands
[1], 0), r5
);
1078 emit_insn (gen_block_move_real_i4 (func_addr_rtx
));
1081 else if (! TARGET_SMALLCODE
)
1083 const char *entry_name
;
1084 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
1086 rtx r4
= gen_rtx_REG (SImode
, 4);
1087 rtx r5
= gen_rtx_REG (SImode
, 5);
1088 rtx r6
= gen_rtx_REG (SImode
, 6);
1090 entry_name
= (bytes
& 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1091 function_symbol (func_addr_rtx
, entry_name
, SFUNC_STATIC
);
1092 force_into (XEXP (operands
[0], 0), r4
);
1093 force_into (XEXP (operands
[1], 0), r5
);
1095 dwords
= bytes
>> 3;
1096 emit_insn (gen_move_insn (r6
, GEN_INT (dwords
- 1)));
1097 emit_insn (gen_block_lump_real_i4 (func_addr_rtx
));
1106 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
1107 rtx r4
= gen_rtx_REG (SImode
, 4);
1108 rtx r5
= gen_rtx_REG (SImode
, 5);
1110 sprintf (entry
, "__movmemSI%d", bytes
);
1111 function_symbol (func_addr_rtx
, entry
, SFUNC_STATIC
);
1112 force_into (XEXP (operands
[0], 0), r4
);
1113 force_into (XEXP (operands
[1], 0), r5
);
1114 emit_insn (gen_block_move_real (func_addr_rtx
));
1118 /* This is the same number of bytes as a memcpy call, but to a different
1119 less common function name, so this will occasionally use more space. */
1120 if (! TARGET_SMALLCODE
)
1122 rtx func_addr_rtx
= gen_reg_rtx (Pmode
);
1123 int final_switch
, while_loop
;
1124 rtx r4
= gen_rtx_REG (SImode
, 4);
1125 rtx r5
= gen_rtx_REG (SImode
, 5);
1126 rtx r6
= gen_rtx_REG (SImode
, 6);
1128 function_symbol (func_addr_rtx
, "__movmem", SFUNC_STATIC
);
1129 force_into (XEXP (operands
[0], 0), r4
);
1130 force_into (XEXP (operands
[1], 0), r5
);
1132 /* r6 controls the size of the move. 16 is decremented from it
1133 for each 64 bytes moved. Then the negative bit left over is used
1134 as an index into a list of move instructions. e.g., a 72 byte move
1135 would be set up with size(r6) = 14, for one iteration through the
1136 big while loop, and a switch of -2 for the last part. */
1138 final_switch
= 16 - ((bytes
/ 4) % 16);
1139 while_loop
= ((bytes
/ 4) / 16 - 1) * 16;
1140 emit_insn (gen_move_insn (r6
, GEN_INT (while_loop
+ final_switch
)));
1141 emit_insn (gen_block_lump_real (func_addr_rtx
));
1148 /* Prepare operands for a move define_expand; specifically, one of the
1149 operands must be in a register. */
1152 prepare_move_operands (rtx operands
[], enum machine_mode mode
)
1154 if ((mode
== SImode
|| mode
== DImode
)
1156 && ! ((mode
== Pmode
|| mode
== ptr_mode
)
1157 && tls_symbolic_operand (operands
[1], Pmode
) != 0))
1160 if (SYMBOLIC_CONST_P (operands
[1]))
1162 if (GET_CODE (operands
[0]) == MEM
)
1163 operands
[1] = force_reg (Pmode
, operands
[1]);
1164 else if (TARGET_SHMEDIA
1165 && GET_CODE (operands
[1]) == LABEL_REF
1166 && target_reg_operand (operands
[0], mode
))
1170 temp
= no_new_pseudos
? operands
[0] : gen_reg_rtx (Pmode
);
1171 operands
[1] = legitimize_pic_address (operands
[1], mode
, temp
);
1174 else if (GET_CODE (operands
[1]) == CONST
1175 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
1176 && SYMBOLIC_CONST_P (XEXP (XEXP (operands
[1], 0), 0)))
1178 temp
= no_new_pseudos
? operands
[0] : gen_reg_rtx (Pmode
);
1179 temp
= legitimize_pic_address (XEXP (XEXP (operands
[1], 0), 0),
1181 operands
[1] = expand_binop (mode
, add_optab
, temp
,
1182 XEXP (XEXP (operands
[1], 0), 1),
1183 no_new_pseudos
? temp
1184 : gen_reg_rtx (Pmode
),
1185 0, OPTAB_LIB_WIDEN
);
1189 if (! reload_in_progress
&& ! reload_completed
)
1191 /* Copy the source to a register if both operands aren't registers. */
1192 if (! register_operand (operands
[0], mode
)
1193 && ! sh_register_operand (operands
[1], mode
))
1194 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
1196 if (GET_CODE (operands
[0]) == MEM
&& ! memory_operand (operands
[0], mode
))
1198 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1199 except that we can't use that function because it is static. */
1200 rtx
new = change_address (operands
[0], mode
, 0);
1201 MEM_COPY_ATTRIBUTES (new, operands
[0]);
1205 /* This case can happen while generating code to move the result
1206 of a library call to the target. Reject `st r0,@(rX,rY)' because
1207 reload will fail to find a spill register for rX, since r0 is already
1208 being used for the source. */
1210 && refers_to_regno_p (R0_REG
, R0_REG
+ 1, operands
[1], (rtx
*)0)
1211 && GET_CODE (operands
[0]) == MEM
1212 && GET_CODE (XEXP (operands
[0], 0)) == PLUS
1213 && GET_CODE (XEXP (XEXP (operands
[0], 0), 1)) == REG
)
1214 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
1217 if (mode
== Pmode
|| mode
== ptr_mode
)
1220 enum tls_model tls_kind
;
1224 if (GET_CODE (op1
) == CONST
1225 && GET_CODE (XEXP (op1
, 0)) == PLUS
1226 && tls_symbolic_operand (XEXP (XEXP (op1
, 0), 0), Pmode
))
1228 opc
= XEXP (XEXP (op1
, 0), 1);
1229 op1
= XEXP (XEXP (op1
, 0), 0);
1234 if ((tls_kind
= tls_symbolic_operand (op1
, Pmode
)))
1236 rtx tga_op1
, tga_ret
, tmp
, tmp2
;
1240 case TLS_MODEL_GLOBAL_DYNAMIC
:
1241 tga_ret
= gen_rtx_REG (Pmode
, R0_REG
);
1242 emit_call_insn (gen_tls_global_dynamic (tga_ret
, op1
));
1246 case TLS_MODEL_LOCAL_DYNAMIC
:
1247 tga_ret
= gen_rtx_REG (Pmode
, R0_REG
);
1248 emit_call_insn (gen_tls_local_dynamic (tga_ret
, op1
));
1250 tmp
= gen_reg_rtx (Pmode
);
1251 emit_move_insn (tmp
, tga_ret
);
1253 if (register_operand (op0
, Pmode
))
1256 tmp2
= gen_reg_rtx (Pmode
);
1258 emit_insn (gen_symDTPOFF2reg (tmp2
, op1
, tmp
));
1262 case TLS_MODEL_INITIAL_EXEC
:
1265 /* Don't schedule insns for getting GOT address when
1266 the first scheduling is enabled, to avoid spill
1268 if (flag_schedule_insns
)
1269 emit_insn (gen_blockage ());
1270 emit_insn (gen_GOTaddr2picreg ());
1271 emit_insn (gen_rtx_USE (VOIDmode
, gen_rtx_REG (SImode
,
1273 if (flag_schedule_insns
)
1274 emit_insn (gen_blockage ());
1276 tga_op1
= no_new_pseudos
? op0
: gen_reg_rtx (Pmode
);
1277 tmp
= gen_sym2GOTTPOFF (op1
);
1278 emit_insn (gen_tls_initial_exec (tga_op1
, tmp
));
1282 case TLS_MODEL_LOCAL_EXEC
:
1283 tmp2
= gen_reg_rtx (Pmode
);
1284 emit_insn (gen_load_gbr (tmp2
));
1285 tmp
= gen_reg_rtx (Pmode
);
1286 emit_insn (gen_symTPOFF2reg (tmp
, op1
));
1288 if (register_operand (op0
, Pmode
))
1291 op1
= gen_reg_rtx (Pmode
);
1293 emit_insn (gen_addsi3 (op1
, tmp
, tmp2
));
1300 emit_insn (gen_addsi3 (op1
, op1
, force_reg (SImode
, opc
)));
1308 /* Prepare the operands for an scc instruction; make sure that the
1309 compare has been done. */
1311 prepare_scc_operands (enum rtx_code code
)
1313 rtx t_reg
= gen_rtx_REG (SImode
, T_REG
);
1314 enum rtx_code oldcode
= code
;
1315 enum machine_mode mode
;
1317 /* First need a compare insn. */
1321 /* It isn't possible to handle this case. */
1338 if (code
!= oldcode
)
1340 rtx tmp
= sh_compare_op0
;
1341 sh_compare_op0
= sh_compare_op1
;
1342 sh_compare_op1
= tmp
;
1345 mode
= GET_MODE (sh_compare_op0
);
1346 if (mode
== VOIDmode
)
1347 mode
= GET_MODE (sh_compare_op1
);
1349 sh_compare_op0
= force_reg (mode
, sh_compare_op0
);
1350 if ((code
!= EQ
&& code
!= NE
1351 && (sh_compare_op1
!= const0_rtx
1352 || code
== GTU
|| code
== GEU
|| code
== LTU
|| code
== LEU
))
1353 || (mode
== DImode
&& sh_compare_op1
!= const0_rtx
)
1354 || (TARGET_SH2E
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
))
1355 sh_compare_op1
= force_reg (mode
, sh_compare_op1
);
1357 if ((TARGET_SH4
|| TARGET_SH2A
) && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1358 (mode
== SFmode
? emit_sf_insn
: emit_df_insn
)
1359 (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2,
1360 gen_rtx_SET (VOIDmode
, t_reg
,
1361 gen_rtx_fmt_ee (code
, SImode
,
1362 sh_compare_op0
, sh_compare_op1
)),
1363 gen_rtx_USE (VOIDmode
, get_fpscr_rtx ()))));
1365 emit_insn (gen_rtx_SET (VOIDmode
, t_reg
,
1366 gen_rtx_fmt_ee (code
, SImode
,
1367 sh_compare_op0
, sh_compare_op1
)));
1372 /* Called from the md file, set up the operands of a compare instruction. */
1375 from_compare (rtx
*operands
, int code
)
1377 enum machine_mode mode
= GET_MODE (sh_compare_op0
);
1379 if (mode
== VOIDmode
)
1380 mode
= GET_MODE (sh_compare_op1
);
1383 || (TARGET_SH2E
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
))
1385 /* Force args into regs, since we can't use constants here. */
1386 sh_compare_op0
= force_reg (mode
, sh_compare_op0
);
1387 if (sh_compare_op1
!= const0_rtx
1388 || code
== GTU
|| code
== GEU
1389 || (TARGET_SH2E
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
))
1390 sh_compare_op1
= force_reg (mode
, sh_compare_op1
);
1392 if (TARGET_SH2E
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
&& code
== GE
)
1394 from_compare (operands
, GT
);
1395 insn
= gen_ieee_ccmpeqsf_t (sh_compare_op0
, sh_compare_op1
);
1398 insn
= gen_rtx_SET (VOIDmode
,
1399 gen_rtx_REG (SImode
, T_REG
),
1400 gen_rtx_fmt_ee (code
, SImode
,
1401 sh_compare_op0
, sh_compare_op1
));
1402 if ((TARGET_SH4
|| TARGET_SH2A
) && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1404 insn
= gen_rtx_PARALLEL (VOIDmode
,
1406 gen_rtx_USE (VOIDmode
, get_fpscr_rtx ())));
1407 (mode
== SFmode
? emit_sf_insn
: emit_df_insn
) (insn
);
1413 /* Functions to output assembly code. */
1415 /* Return a sequence of instructions to perform DI or DF move.
1417 Since the SH cannot move a DI or DF in one instruction, we have
1418 to take care when we see overlapping source and dest registers. */
1421 output_movedouble (rtx insn ATTRIBUTE_UNUSED
, rtx operands
[],
1422 enum machine_mode mode
)
1424 rtx dst
= operands
[0];
1425 rtx src
= operands
[1];
1427 if (GET_CODE (dst
) == MEM
1428 && GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
1429 return "mov.l %T1,%0\n\tmov.l %1,%0";
1431 if (register_operand (dst
, mode
)
1432 && register_operand (src
, mode
))
1434 if (REGNO (src
) == MACH_REG
)
1435 return "sts mach,%S0\n\tsts macl,%R0";
1437 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1438 when mov.d r1,r0 do r1->r0 then r2->r1. */
1440 if (REGNO (src
) + 1 == REGNO (dst
))
1441 return "mov %T1,%T0\n\tmov %1,%0";
1443 return "mov %1,%0\n\tmov %T1,%T0";
1445 else if (GET_CODE (src
) == CONST_INT
)
1447 if (INTVAL (src
) < 0)
1448 output_asm_insn ("mov #-1,%S0", operands
);
1450 output_asm_insn ("mov #0,%S0", operands
);
1452 return "mov %1,%R0";
1454 else if (GET_CODE (src
) == MEM
)
1457 int dreg
= REGNO (dst
);
1458 rtx inside
= XEXP (src
, 0);
1460 switch (GET_CODE (inside
))
1463 ptrreg
= REGNO (inside
);
1467 ptrreg
= subreg_regno (inside
);
1471 ptrreg
= REGNO (XEXP (inside
, 0));
1472 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1473 an offsettable address. Unfortunately, offsettable addresses use
1474 QImode to check the offset, and a QImode offsettable address
1475 requires r0 for the other operand, which is not currently
1476 supported, so we can't use the 'o' constraint.
1477 Thus we must check for and handle r0+REG addresses here.
1478 We punt for now, since this is likely very rare. */
1479 gcc_assert (GET_CODE (XEXP (inside
, 1)) != REG
);
1483 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1485 return "mov.l %1,%0\n\tmov.l %1,%T0";
1490 /* Work out the safe way to copy. Copy into the second half first. */
1492 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1495 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1498 /* Print an instruction which would have gone into a delay slot after
1499 another instruction, but couldn't because the other instruction expanded
1500 into a sequence where putting the slot insn at the end wouldn't work. */
1503 print_slot (rtx insn
)
1505 final_scan_insn (XVECEXP (insn
, 0, 1), asm_out_file
, optimize
, 1, NULL
);
1507 INSN_DELETED_P (XVECEXP (insn
, 0, 1)) = 1;
1511 output_far_jump (rtx insn
, rtx op
)
1513 struct { rtx lab
, reg
, op
; } this;
1514 rtx braf_base_lab
= NULL_RTX
;
1517 int offset
= branch_dest (insn
) - INSN_ADDRESSES (INSN_UID (insn
));
1520 this.lab
= gen_label_rtx ();
1524 && offset
- get_attr_length (insn
) <= 32766)
1527 jump
= "mov.w %O0,%1; braf %1";
1535 jump
= "mov.l %O0,%1; braf %1";
1537 jump
= "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1540 jump
= "mov.l %O0,%1; jmp @%1";
1542 /* If we have a scratch register available, use it. */
1543 if (GET_CODE ((prev
= prev_nonnote_insn (insn
))) == INSN
1544 && INSN_CODE (prev
) == CODE_FOR_indirect_jump_scratch
)
1546 this.reg
= SET_DEST (XVECEXP (PATTERN (prev
), 0, 0));
1547 if (REGNO (this.reg
) == R0_REG
&& flag_pic
&& ! TARGET_SH2
)
1548 jump
= "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1549 output_asm_insn (jump
, &this.lab
);
1550 if (dbr_sequence_length ())
1551 print_slot (final_sequence
);
1553 output_asm_insn ("nop", 0);
1557 /* Output the delay slot insn first if any. */
1558 if (dbr_sequence_length ())
1559 print_slot (final_sequence
);
1561 this.reg
= gen_rtx_REG (SImode
, 13);
1562 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1563 Fortunately, MACL is fixed and call-clobbered, and we never
1564 need its value across jumps, so save r13 in it instead of in
1567 output_asm_insn ("lds r13, macl", 0);
1569 output_asm_insn ("mov.l r13,@-r15", 0);
1570 output_asm_insn (jump
, &this.lab
);
1572 output_asm_insn ("sts macl, r13", 0);
1574 output_asm_insn ("mov.l @r15+,r13", 0);
1576 if (far
&& flag_pic
&& TARGET_SH2
)
1578 braf_base_lab
= gen_label_rtx ();
1579 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
1580 CODE_LABEL_NUMBER (braf_base_lab
));
1583 output_asm_insn (".align 2", 0);
1584 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L", CODE_LABEL_NUMBER (this.lab
));
1586 if (far
&& flag_pic
)
1589 this.lab
= braf_base_lab
;
1590 output_asm_insn (".long %O2-%O0", &this.lab
);
1593 output_asm_insn (far
? ".long %O2" : ".word %O2-%O0", &this.lab
);
1597 /* Local label counter, used for constants in the pool and inside
1598 pattern branches. */
1600 static int lf
= 100;
1602 /* Output code for ordinary branches. */
1605 output_branch (int logic
, rtx insn
, rtx
*operands
)
1607 switch (get_attr_length (insn
))
1610 /* This can happen if filling the delay slot has caused a forward
1611 branch to exceed its range (we could reverse it, but only
1612 when we know we won't overextend other branches; this should
1613 best be handled by relaxation).
1614 It can also happen when other condbranches hoist delay slot insn
1615 from their destination, thus leading to code size increase.
1616 But the branch will still be in the range -4092..+4098 bytes. */
1621 /* The call to print_slot will clobber the operands. */
1622 rtx op0
= operands
[0];
1624 /* If the instruction in the delay slot is annulled (true), then
1625 there is no delay slot where we can put it now. The only safe
1626 place for it is after the label. final will do that by default. */
1629 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))
1630 && get_attr_length (XVECEXP (final_sequence
, 0, 1)))
1632 asm_fprintf (asm_out_file
, "\tb%s%ss\t%LLF%d\n", logic
? "f" : "t",
1633 ASSEMBLER_DIALECT
? "/" : ".", label
);
1634 print_slot (final_sequence
);
1637 asm_fprintf (asm_out_file
, "\tb%s\t%LLF%d\n", logic
? "f" : "t", label
);
1639 output_asm_insn ("bra\t%l0", &op0
);
1640 fprintf (asm_out_file
, "\tnop\n");
1641 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LF", label
);
1645 /* When relaxing, handle this like a short branch. The linker
1646 will fix it up if it still doesn't fit after relaxation. */
1648 return logic
? "bt%.\t%l0" : "bf%.\t%l0";
1650 /* These are for SH2e, in which we have to account for the
1651 extra nop because of the hardware bug in annulled branches. */
1657 gcc_assert (!final_sequence
1658 || !(INSN_ANNULLED_BRANCH_P
1659 (XVECEXP (final_sequence
, 0, 0))));
1660 asm_fprintf (asm_out_file
, "b%s%ss\t%LLF%d\n",
1662 ASSEMBLER_DIALECT
? "/" : ".", label
);
1663 fprintf (asm_out_file
, "\tnop\n");
1664 output_asm_insn ("bra\t%l0", operands
);
1665 fprintf (asm_out_file
, "\tnop\n");
1666 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LF", label
);
1670 /* When relaxing, fall through. */
1675 sprintf (buffer
, "b%s%ss\t%%l0",
1677 ASSEMBLER_DIALECT
? "/" : ".");
1678 output_asm_insn (buffer
, &operands
[0]);
1683 /* There should be no longer branches now - that would
1684 indicate that something has destroyed the branches set
1685 up in machine_dependent_reorg. */
1691 output_branchy_insn (enum rtx_code code
, const char *template,
1692 rtx insn
, rtx
*operands
)
1694 rtx next_insn
= NEXT_INSN (insn
);
1696 if (next_insn
&& GET_CODE (next_insn
) == JUMP_INSN
&& condjump_p (next_insn
))
1698 rtx src
= SET_SRC (PATTERN (next_insn
));
1699 if (GET_CODE (src
) == IF_THEN_ELSE
&& GET_CODE (XEXP (src
, 0)) != code
)
1701 /* Following branch not taken */
1702 operands
[9] = gen_label_rtx ();
1703 emit_label_after (operands
[9], next_insn
);
1704 INSN_ADDRESSES_NEW (operands
[9],
1705 INSN_ADDRESSES (INSN_UID (next_insn
))
1706 + get_attr_length (next_insn
));
1711 int offset
= (branch_dest (next_insn
)
1712 - INSN_ADDRESSES (INSN_UID (next_insn
)) + 4);
1713 if (offset
>= -252 && offset
<= 258)
1715 if (GET_CODE (src
) == IF_THEN_ELSE
)
1717 src
= XEXP (src
, 1);
1723 operands
[9] = gen_label_rtx ();
1724 emit_label_after (operands
[9], insn
);
1725 INSN_ADDRESSES_NEW (operands
[9],
1726 INSN_ADDRESSES (INSN_UID (insn
))
1727 + get_attr_length (insn
));
1732 output_ieee_ccmpeq (rtx insn
, rtx
*operands
)
1734 return output_branchy_insn (NE
, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1738 /* Output the start of the assembler file. */
1741 sh_file_start (void)
1743 default_file_start ();
1746 /* Declare the .directive section before it is used. */
1747 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file
);
1748 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file
);
1752 /* We need to show the text section with the proper
1753 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1754 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1755 will complain. We can teach GAS specifically about the
1756 default attributes for our choice of text section, but
1757 then we would have to change GAS again if/when we change
1758 the text section name. */
1759 fprintf (asm_out_file
, "%s\n", TEXT_SECTION_ASM_OP
);
1761 /* Switch to the data section so that the coffsem symbol
1762 isn't in the text section. */
1765 if (TARGET_LITTLE_ENDIAN
)
1766 fputs ("\t.little\n", asm_out_file
);
1770 if (TARGET_SHCOMPACT
)
1771 fputs ("\t.mode\tSHcompact\n", asm_out_file
);
1772 else if (TARGET_SHMEDIA
)
1773 fprintf (asm_out_file
, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1774 TARGET_SHMEDIA64
? 64 : 32);
1778 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1781 unspec_caller_rtx_p (rtx pat
)
1783 switch (GET_CODE (pat
))
1786 return unspec_caller_rtx_p (XEXP (pat
, 0));
1789 if (unspec_caller_rtx_p (XEXP (pat
, 0)))
1791 return unspec_caller_rtx_p (XEXP (pat
, 1));
1793 if (XINT (pat
, 1) == UNSPEC_CALLER
)
1802 /* Indicate that INSN cannot be duplicated. This is true for insn
1803 that generates a unique label. */
1806 sh_cannot_copy_insn_p (rtx insn
)
1810 if (!reload_completed
|| !flag_pic
)
1813 if (GET_CODE (insn
) != INSN
)
1815 if (asm_noperands (insn
) >= 0)
1818 pat
= PATTERN (insn
);
1819 if (GET_CODE (pat
) != SET
)
1821 pat
= SET_SRC (pat
);
1823 if (unspec_caller_rtx_p (pat
))
1829 /* Actual number of instructions used to make a shift by N. */
1830 static const char ashiftrt_insns
[] =
1831 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1833 /* Left shift and logical right shift are the same. */
1834 static const char shift_insns
[] =
1835 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1837 /* Individual shift amounts needed to get the above length sequences.
1838 One bit right shifts clobber the T bit, so when possible, put one bit
1839 shifts in the middle of the sequence, so the ends are eligible for
1840 branch delay slots. */
1841 static const short shift_amounts
[32][5] = {
1842 {0}, {1}, {2}, {2, 1},
1843 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1844 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1845 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1846 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1847 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1848 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1849 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1851 /* Likewise, but for shift amounts < 16, up to three highmost bits
1852 might be clobbered. This is typically used when combined with some
1853 kind of sign or zero extension. */
1855 static const char ext_shift_insns
[] =
1856 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1858 static const short ext_shift_amounts
[32][4] = {
1859 {0}, {1}, {2}, {2, 1},
1860 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1861 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1862 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1863 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1864 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1865 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1866 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1868 /* Assuming we have a value that has been sign-extended by at least one bit,
1869 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1870 to shift it by N without data loss, and quicker than by other means? */
1871 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1873 /* This is used in length attributes in sh.md to help compute the length
1874 of arbitrary constant shift instructions. */
1877 shift_insns_rtx (rtx insn
)
1879 rtx set_src
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
1880 int shift_count
= INTVAL (XEXP (set_src
, 1));
1881 enum rtx_code shift_code
= GET_CODE (set_src
);
1886 return ashiftrt_insns
[shift_count
];
1889 return shift_insns
[shift_count
];
1895 /* Return the cost of a shift. */
1905 if (GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
1907 if (GET_MODE (x
) == DImode
1908 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1909 && INTVAL (XEXP (x
, 1)) == 1)
1912 /* Everything else is invalid, because there is no pattern for it. */
1915 /* If shift by a non constant, then this will be expensive. */
1916 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
1917 return SH_DYNAMIC_SHIFT_COST
;
1919 value
= INTVAL (XEXP (x
, 1));
1921 /* Otherwise, return the true cost in instructions. */
1922 if (GET_CODE (x
) == ASHIFTRT
)
1924 int cost
= ashiftrt_insns
[value
];
1925 /* If SH3, then we put the constant in a reg and use shad. */
1926 if (cost
> 1 + SH_DYNAMIC_SHIFT_COST
)
1927 cost
= 1 + SH_DYNAMIC_SHIFT_COST
;
1931 return shift_insns
[value
];
1934 /* Return the cost of an AND operation. */
1941 /* Anding with a register is a single cycle and instruction. */
1942 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
1945 i
= INTVAL (XEXP (x
, 1));
1949 if ((GET_CODE (XEXP (x
, 1)) == CONST_INT
1950 && CONST_OK_FOR_I16 (INTVAL (XEXP (x
, 1))))
1951 || EXTRA_CONSTRAINT_C16 (XEXP (x
, 1)))
1957 /* These constants are single cycle extu.[bw] instructions. */
1958 if (i
== 0xff || i
== 0xffff)
1960 /* Constants that can be used in an and immediate instruction in a single
1961 cycle, but this requires r0, so make it a little more expensive. */
1962 if (CONST_OK_FOR_K08 (i
))
1964 /* Constants that can be loaded with a mov immediate and an and.
1965 This case is probably unnecessary. */
1966 if (CONST_OK_FOR_I08 (i
))
1968 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1969 This case is probably unnecessary. */
1973 /* Return the cost of an addition or a subtraction. */
1978 /* Adding a register is a single cycle insn. */
1979 if (GET_CODE (XEXP (x
, 1)) == REG
1980 || GET_CODE (XEXP (x
, 1)) == SUBREG
)
1983 /* Likewise for small constants. */
1984 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
1985 && CONST_OK_FOR_ADD (INTVAL (XEXP (x
, 1))))
1989 switch (GET_CODE (XEXP (x
, 1)))
1994 return TARGET_SHMEDIA64
? 5 : 3;
1997 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x
, 1))))
1999 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x
, 1)) >> 16))
2001 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x
, 1)) >> 16) >> 16))
2009 /* Any other constant requires a 2 cycle pc-relative load plus an
2014 /* Return the cost of a multiply. */
2016 multcosts (rtx x ATTRIBUTE_UNUSED
)
2018 if (sh_multcost
>= 0)
2021 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2022 accept constants. Ideally, we would use a cost of one or two and
2023 add the cost of the operand, but disregard the latter when inside loops
2024 and loop invariant code motion is still to follow.
2025 Using a multiply first and splitting it later if it's a loss
2026 doesn't work because of different sign / zero extension semantics
2027 of multiplies vs. shifts. */
2028 return TARGET_SMALLCODE
? 2 : 3;
2032 /* We have a mul insn, so we can never take more than the mul and the
2033 read of the mac reg, but count more because of the latency and extra
2035 if (TARGET_SMALLCODE
)
2040 /* If we're aiming at small code, then just count the number of
2041 insns in a multiply call sequence. */
2042 if (TARGET_SMALLCODE
)
2045 /* Otherwise count all the insns in the routine we'd be calling too. */
2049 /* Compute a (partial) cost for rtx X. Return true if the complete
2050 cost has been computed, and false if subexpressions should be
2051 scanned. In either case, *TOTAL contains the cost result. */
2054 sh_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
2061 if (INTVAL (x
) == 0)
2063 else if (outer_code
== AND
&& and_operand ((x
), DImode
))
2065 else if ((outer_code
== IOR
|| outer_code
== XOR
2066 || outer_code
== PLUS
)
2067 && CONST_OK_FOR_I10 (INTVAL (x
)))
2069 else if (CONST_OK_FOR_I16 (INTVAL (x
)))
2070 *total
= COSTS_N_INSNS (outer_code
!= SET
);
2071 else if (CONST_OK_FOR_I16 (INTVAL (x
) >> 16))
2072 *total
= COSTS_N_INSNS ((outer_code
!= SET
) + 1);
2073 else if (CONST_OK_FOR_I16 ((INTVAL (x
) >> 16) >> 16))
2074 *total
= COSTS_N_INSNS (3);
2076 *total
= COSTS_N_INSNS (4);
2079 if (CONST_OK_FOR_I08 (INTVAL (x
)))
2081 else if ((outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
2082 && CONST_OK_FOR_K08 (INTVAL (x
)))
2091 if (TARGET_SHMEDIA64
)
2092 *total
= COSTS_N_INSNS (4);
2093 else if (TARGET_SHMEDIA32
)
2094 *total
= COSTS_N_INSNS (2);
2101 *total
= COSTS_N_INSNS (4);
2106 if (x
== CONST0_RTX (GET_MODE (x
)))
2108 else if (sh_1el_vec (x
, VOIDmode
))
2109 *total
= outer_code
!= SET
;
2110 if (sh_rep_vec (x
, VOIDmode
))
2111 *total
= ((GET_MODE_UNIT_SIZE (GET_MODE (x
)) + 3) / 4
2112 + (outer_code
!= SET
));
2113 *total
= COSTS_N_INSNS (3) + (outer_code
!= SET
);
2118 *total
= COSTS_N_INSNS (addsubcosts (x
));
2122 *total
= COSTS_N_INSNS (andcosts (x
));
2126 *total
= COSTS_N_INSNS (multcosts (x
));
2132 *total
= COSTS_N_INSNS (shiftcosts (x
));
2139 *total
= COSTS_N_INSNS (20);
2143 if (sh_1el_vec (x
, VOIDmode
))
2144 *total
= outer_code
!= SET
;
2145 if (sh_rep_vec (x
, VOIDmode
))
2146 *total
= ((GET_MODE_UNIT_SIZE (GET_MODE (x
)) + 3) / 4
2147 + (outer_code
!= SET
));
2148 *total
= COSTS_N_INSNS (3) + (outer_code
!= SET
);
2161 /* Compute the cost of an address. For the SH, all valid addresses are
2162 the same cost. Use a slightly higher cost for reg + reg addressing,
2163 since it increases pressure on r0. */
2166 sh_address_cost (rtx X
)
2168 return (GET_CODE (X
) == PLUS
2169 && ! CONSTANT_P (XEXP (X
, 1))
2170 && ! TARGET_SHMEDIA
? 1 : 0);
2173 /* Code to expand a shift. */
2176 gen_ashift (int type
, int n
, rtx reg
)
2178 /* Negative values here come from the shift_amounts array. */
2191 emit_insn (gen_ashrsi3_k (reg
, reg
, GEN_INT (n
)));
2195 emit_insn (gen_lshrsi3_m (reg
, reg
, GEN_INT (n
)));
2197 emit_insn (gen_lshrsi3_k (reg
, reg
, GEN_INT (n
)));
2200 emit_insn (gen_ashlsi3_std (reg
, reg
, GEN_INT (n
)));
2205 /* Same for HImode */
2208 gen_ashift_hi (int type
, int n
, rtx reg
)
2210 /* Negative values here come from the shift_amounts array. */
2224 /* We don't have HImode right shift operations because using the
2225 ordinary 32 bit shift instructions for that doesn't generate proper
2226 zero/sign extension.
2227 gen_ashift_hi is only called in contexts where we know that the
2228 sign extension works out correctly. */
2231 if (GET_CODE (reg
) == SUBREG
)
2233 offset
= SUBREG_BYTE (reg
);
2234 reg
= SUBREG_REG (reg
);
2236 gen_ashift (type
, n
, gen_rtx_SUBREG (SImode
, reg
, offset
));
2240 emit_insn (gen_ashlhi3_k (reg
, reg
, GEN_INT (n
)));
2245 /* Output RTL to split a constant shift into its component SH constant
2246 shift instructions. */
2249 gen_shifty_op (int code
, rtx
*operands
)
2251 int value
= INTVAL (operands
[2]);
2254 /* Truncate the shift count in case it is out of bounds. */
2255 value
= value
& 0x1f;
2259 if (code
== LSHIFTRT
)
2261 emit_insn (gen_rotlsi3_1 (operands
[0], operands
[0]));
2262 emit_insn (gen_movt (operands
[0]));
2265 else if (code
== ASHIFT
)
2267 /* There is a two instruction sequence for 31 bit left shifts,
2268 but it requires r0. */
2269 if (GET_CODE (operands
[0]) == REG
&& REGNO (operands
[0]) == 0)
2271 emit_insn (gen_andsi3 (operands
[0], operands
[0], const1_rtx
));
2272 emit_insn (gen_rotlsi3_31 (operands
[0], operands
[0]));
2277 else if (value
== 0)
2279 /* This can happen even when optimizing, if there were subregs before
2280 reload. Don't output a nop here, as this is never optimized away;
2281 use a no-op move instead. */
2282 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[0]));
2286 max
= shift_insns
[value
];
2287 for (i
= 0; i
< max
; i
++)
2288 gen_ashift (code
, shift_amounts
[value
][i
], operands
[0]);
2291 /* Same as above, but optimized for values where the topmost bits don't
2295 gen_shifty_hi_op (int code
, rtx
*operands
)
2297 int value
= INTVAL (operands
[2]);
2299 void (*gen_fun
) (int, int, rtx
);
2301 /* This operation is used by and_shl for SImode values with a few
2302 high bits known to be cleared. */
2306 emit_insn (gen_nop ());
2310 gen_fun
= GET_MODE (operands
[0]) == HImode
? gen_ashift_hi
: gen_ashift
;
2313 max
= ext_shift_insns
[value
];
2314 for (i
= 0; i
< max
; i
++)
2315 gen_fun (code
, ext_shift_amounts
[value
][i
], operands
[0]);
2318 /* When shifting right, emit the shifts in reverse order, so that
2319 solitary negative values come first. */
2320 for (i
= ext_shift_insns
[value
] - 1; i
>= 0; i
--)
2321 gen_fun (code
, ext_shift_amounts
[value
][i
], operands
[0]);
2324 /* Output RTL for an arithmetic right shift. */
2326 /* ??? Rewrite to use super-optimizer sequences. */
2329 expand_ashiftrt (rtx
*operands
)
2337 if (GET_CODE (operands
[2]) != CONST_INT
)
2339 rtx count
= copy_to_mode_reg (SImode
, operands
[2]);
2340 emit_insn (gen_negsi2 (count
, count
));
2341 emit_insn (gen_ashrsi3_d (operands
[0], operands
[1], count
));
2344 else if (ashiftrt_insns
[INTVAL (operands
[2]) & 31]
2345 > 1 + SH_DYNAMIC_SHIFT_COST
)
2348 = force_reg (SImode
, GEN_INT (- (INTVAL (operands
[2]) & 31)));
2349 emit_insn (gen_ashrsi3_d (operands
[0], operands
[1], count
));
2353 if (GET_CODE (operands
[2]) != CONST_INT
)
2356 value
= INTVAL (operands
[2]) & 31;
2360 /* If we are called from abs expansion, arrange things so that we
2361 we can use a single MT instruction that doesn't clobber the source,
2362 if LICM can hoist out the load of the constant zero. */
2363 if (currently_expanding_to_rtl
)
2365 emit_insn (gen_cmpgtsi_t (force_reg (SImode
, CONST0_RTX (SImode
)),
2367 emit_insn (gen_mov_neg_si_t (operands
[0]));
2370 emit_insn (gen_ashrsi2_31 (operands
[0], operands
[1]));
2373 else if (value
>= 16 && value
<= 19)
2375 wrk
= gen_reg_rtx (SImode
);
2376 emit_insn (gen_ashrsi2_16 (wrk
, operands
[1]));
2379 gen_ashift (ASHIFTRT
, 1, wrk
);
2380 emit_move_insn (operands
[0], wrk
);
2383 /* Expand a short sequence inline, longer call a magic routine. */
2384 else if (value
<= 5)
2386 wrk
= gen_reg_rtx (SImode
);
2387 emit_move_insn (wrk
, operands
[1]);
2389 gen_ashift (ASHIFTRT
, 1, wrk
);
2390 emit_move_insn (operands
[0], wrk
);
2394 wrk
= gen_reg_rtx (Pmode
);
2396 /* Load the value into an arg reg and call a helper. */
2397 emit_move_insn (gen_rtx_REG (SImode
, 4), operands
[1]);
2398 sprintf (func
, "__ashiftrt_r4_%d", value
);
2399 function_symbol (wrk
, func
, SFUNC_STATIC
);
2400 emit_insn (gen_ashrsi3_n (GEN_INT (value
), wrk
));
2401 emit_move_insn (operands
[0], gen_rtx_REG (SImode
, 4));
2406 sh_dynamicalize_shift_p (rtx count
)
2408 return shift_insns
[INTVAL (count
)] > 1 + SH_DYNAMIC_SHIFT_COST
;
2411 /* Try to find a good way to implement the combiner pattern
2412 [(set (match_operand:SI 0 "register_operand" "r")
2413 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2414 (match_operand:SI 2 "const_int_operand" "n"))
2415 (match_operand:SI 3 "const_int_operand" "n"))) .
2416 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2417 return 0 for simple right / left or left/right shift combination.
2418 return 1 for a combination of shifts with zero_extend.
2419 return 2 for a combination of shifts with an AND that needs r0.
2420 return 3 for a combination of shifts with an AND that needs an extra
2421 scratch register, when the three highmost bits of the AND mask are clear.
2422 return 4 for a combination of shifts with an AND that needs an extra
2423 scratch register, when any of the three highmost bits of the AND mask
2425 If ATTRP is set, store an initial right shift width in ATTRP[0],
2426 and the instruction length in ATTRP[1] . These values are not valid
2428 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2429 shift_amounts for the last shift value that is to be used before the
2432 shl_and_kind (rtx left_rtx
, rtx mask_rtx
, int *attrp
)
2434 unsigned HOST_WIDE_INT mask
, lsb
, mask2
, lsb2
;
2435 int left
= INTVAL (left_rtx
), right
;
2437 int cost
, best_cost
= 10000;
2438 int best_right
= 0, best_len
= 0;
2442 if (left
< 0 || left
> 31)
2444 if (GET_CODE (mask_rtx
) == CONST_INT
)
2445 mask
= (unsigned HOST_WIDE_INT
) INTVAL (mask_rtx
) >> left
;
2447 mask
= (unsigned HOST_WIDE_INT
) GET_MODE_MASK (SImode
) >> left
;
2448 /* Can this be expressed as a right shift / left shift pair? */
2449 lsb
= ((mask
^ (mask
- 1)) >> 1) + 1;
2450 right
= exact_log2 (lsb
);
2451 mask2
= ~(mask
+ lsb
- 1);
2452 lsb2
= ((mask2
^ (mask2
- 1)) >> 1) + 1;
2453 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2455 best_cost
= shift_insns
[right
] + shift_insns
[right
+ left
];
2456 /* mask has no trailing zeroes <==> ! right */
2457 else if (! right
&& mask2
== ~(lsb2
- 1))
2459 int late_right
= exact_log2 (lsb2
);
2460 best_cost
= shift_insns
[left
+ late_right
] + shift_insns
[late_right
];
2462 /* Try to use zero extend. */
2463 if (mask2
== ~(lsb2
- 1))
2467 for (width
= 8; width
<= 16; width
+= 8)
2469 /* Can we zero-extend right away? */
2470 if (lsb2
== (unsigned HOST_WIDE_INT
) 1 << width
)
2473 = 1 + ext_shift_insns
[right
] + ext_shift_insns
[left
+ right
];
2474 if (cost
< best_cost
)
2485 /* ??? Could try to put zero extend into initial right shift,
2486 or even shift a bit left before the right shift. */
2487 /* Determine value of first part of left shift, to get to the
2488 zero extend cut-off point. */
2489 first
= width
- exact_log2 (lsb2
) + right
;
2490 if (first
>= 0 && right
+ left
- first
>= 0)
2492 cost
= ext_shift_insns
[right
] + ext_shift_insns
[first
] + 1
2493 + ext_shift_insns
[right
+ left
- first
];
2494 if (cost
< best_cost
)
2506 /* Try to use r0 AND pattern */
2507 for (i
= 0; i
<= 2; i
++)
2511 if (! CONST_OK_FOR_K08 (mask
>> i
))
2513 cost
= (i
!= 0) + 2 + ext_shift_insns
[left
+ i
];
2514 if (cost
< best_cost
)
2519 best_len
= cost
- 1;
2522 /* Try to use a scratch register to hold the AND operand. */
2523 can_ext
= ((mask
<< left
) & ((unsigned HOST_WIDE_INT
) 3 << 30)) == 0;
2524 for (i
= 0; i
<= 2; i
++)
2528 cost
= (i
!= 0) + (CONST_OK_FOR_I08 (mask
>> i
) ? 2 : 3)
2529 + (can_ext
? ext_shift_insns
: shift_insns
)[left
+ i
];
2530 if (cost
< best_cost
)
2535 best_len
= cost
- 1 - ! CONST_OK_FOR_I08 (mask
>> i
);
2541 attrp
[0] = best_right
;
2542 attrp
[1] = best_len
;
2547 /* This is used in length attributes of the unnamed instructions
2548 corresponding to shl_and_kind return values of 1 and 2. */
2550 shl_and_length (rtx insn
)
2552 rtx set_src
, left_rtx
, mask_rtx
;
2555 set_src
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2556 left_rtx
= XEXP (XEXP (set_src
, 0), 1);
2557 mask_rtx
= XEXP (set_src
, 1);
2558 shl_and_kind (left_rtx
, mask_rtx
, attributes
);
2559 return attributes
[1];
2562 /* This is used in length attribute of the and_shl_scratch instruction. */
2565 shl_and_scr_length (rtx insn
)
2567 rtx set_src
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2568 int len
= shift_insns
[INTVAL (XEXP (set_src
, 1))];
2569 rtx op
= XEXP (set_src
, 0);
2570 len
+= shift_insns
[INTVAL (XEXP (op
, 1))] + 1;
2571 op
= XEXP (XEXP (op
, 0), 0);
2572 return len
+ shift_insns
[INTVAL (XEXP (op
, 1))];
2575 /* Generate rtl for instructions for which shl_and_kind advised a particular
2576 method of generating them, i.e. returned zero. */
2579 gen_shl_and (rtx dest
, rtx left_rtx
, rtx mask_rtx
, rtx source
)
2582 unsigned HOST_WIDE_INT mask
;
2583 int kind
= shl_and_kind (left_rtx
, mask_rtx
, attributes
);
2584 int right
, total_shift
;
2585 void (*shift_gen_fun
) (int, rtx
*) = gen_shifty_hi_op
;
2587 right
= attributes
[0];
2588 total_shift
= INTVAL (left_rtx
) + right
;
2589 mask
= (unsigned HOST_WIDE_INT
) INTVAL (mask_rtx
) >> total_shift
;
2596 int first
= attributes
[2];
2601 emit_insn ((mask
<< right
) <= 0xff
2602 ? gen_zero_extendqisi2 (dest
,
2603 gen_lowpart (QImode
, source
))
2604 : gen_zero_extendhisi2 (dest
,
2605 gen_lowpart (HImode
, source
)));
2609 emit_insn (gen_movsi (dest
, source
));
2613 operands
[2] = GEN_INT (right
);
2614 gen_shifty_hi_op (LSHIFTRT
, operands
);
2618 operands
[2] = GEN_INT (first
);
2619 gen_shifty_hi_op (ASHIFT
, operands
);
2620 total_shift
-= first
;
2624 emit_insn (mask
<= 0xff
2625 ? gen_zero_extendqisi2 (dest
, gen_lowpart (QImode
, dest
))
2626 : gen_zero_extendhisi2 (dest
, gen_lowpart (HImode
, dest
)));
2627 if (total_shift
> 0)
2629 operands
[2] = GEN_INT (total_shift
);
2630 gen_shifty_hi_op (ASHIFT
, operands
);
2635 shift_gen_fun
= gen_shifty_op
;
2637 /* If the topmost bit that matters is set, set the topmost bits
2638 that don't matter. This way, we might be able to get a shorter
2640 if (mask
& ((HOST_WIDE_INT
) 1 << (31 - total_shift
)))
2641 mask
|= (HOST_WIDE_INT
) ~0 << (31 - total_shift
);
2643 /* Don't expand fine-grained when combining, because that will
2644 make the pattern fail. */
2645 if (currently_expanding_to_rtl
2646 || reload_in_progress
|| reload_completed
)
2650 /* Cases 3 and 4 should be handled by this split
2651 only while combining */
2652 gcc_assert (kind
<= 2);
2655 emit_insn (gen_lshrsi3 (dest
, source
, GEN_INT (right
)));
2658 emit_insn (gen_andsi3 (dest
, source
, GEN_INT (mask
)));
2663 operands
[2] = GEN_INT (total_shift
);
2664 shift_gen_fun (ASHIFT
, operands
);
2671 if (kind
!= 4 && total_shift
< 16)
2673 neg
= -ext_shift_amounts
[total_shift
][1];
2675 neg
-= ext_shift_amounts
[total_shift
][2];
2679 emit_insn (gen_and_shl_scratch (dest
, source
,
2682 GEN_INT (total_shift
+ neg
),
2684 emit_insn (gen_movsi (dest
, dest
));
2691 /* Try to find a good way to implement the combiner pattern
2692 [(set (match_operand:SI 0 "register_operand" "=r")
2693 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2694 (match_operand:SI 2 "const_int_operand" "n")
2695 (match_operand:SI 3 "const_int_operand" "n")
2697 (clobber (reg:SI T_REG))]
2698 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2699 return 0 for simple left / right shift combination.
2700 return 1 for left shift / 8 bit sign extend / left shift.
2701 return 2 for left shift / 16 bit sign extend / left shift.
2702 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2703 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2704 return 5 for left shift / 16 bit sign extend / right shift
2705 return 6 for < 8 bit sign extend / left shift.
2706 return 7 for < 8 bit sign extend / left shift / single right shift.
2707 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2710 shl_sext_kind (rtx left_rtx
, rtx size_rtx
, int *costp
)
2712 int left
, size
, insize
, ext
;
2713 int cost
= 0, best_cost
;
2716 left
= INTVAL (left_rtx
);
2717 size
= INTVAL (size_rtx
);
2718 insize
= size
- left
;
2719 gcc_assert (insize
> 0);
2720 /* Default to left / right shift. */
2722 best_cost
= shift_insns
[32 - insize
] + ashiftrt_insns
[32 - size
];
2725 /* 16 bit shift / sign extend / 16 bit shift */
2726 cost
= shift_insns
[16 - insize
] + 1 + ashiftrt_insns
[16 - size
];
2727 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2728 below, by alternative 3 or something even better. */
2729 if (cost
< best_cost
)
2735 /* Try a plain sign extend between two shifts. */
2736 for (ext
= 16; ext
>= insize
; ext
-= 8)
2740 cost
= ext_shift_insns
[ext
- insize
] + 1 + shift_insns
[size
- ext
];
2741 if (cost
< best_cost
)
2743 kind
= ext
/ (unsigned) 8;
2747 /* Check if we can do a sloppy shift with a final signed shift
2748 restoring the sign. */
2749 if (EXT_SHIFT_SIGNED (size
- ext
))
2750 cost
= ext_shift_insns
[ext
- insize
] + ext_shift_insns
[size
- ext
] + 1;
2751 /* If not, maybe it's still cheaper to do the second shift sloppy,
2752 and do a final sign extend? */
2753 else if (size
<= 16)
2754 cost
= ext_shift_insns
[ext
- insize
] + 1
2755 + ext_shift_insns
[size
> ext
? size
- ext
: ext
- size
] + 1;
2758 if (cost
< best_cost
)
2760 kind
= ext
/ (unsigned) 8 + 2;
2764 /* Check if we can sign extend in r0 */
2767 cost
= 3 + shift_insns
[left
];
2768 if (cost
< best_cost
)
2773 /* Try the same with a final signed shift. */
2776 cost
= 3 + ext_shift_insns
[left
+ 1] + 1;
2777 if (cost
< best_cost
)
2786 /* Try to use a dynamic shift. */
2787 cost
= shift_insns
[32 - insize
] + 1 + SH_DYNAMIC_SHIFT_COST
;
2788 if (cost
< best_cost
)
2799 /* Function to be used in the length attribute of the instructions
2800 implementing this pattern. */
2803 shl_sext_length (rtx insn
)
2805 rtx set_src
, left_rtx
, size_rtx
;
2808 set_src
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2809 left_rtx
= XEXP (XEXP (set_src
, 0), 1);
2810 size_rtx
= XEXP (set_src
, 1);
2811 shl_sext_kind (left_rtx
, size_rtx
, &cost
);
2815 /* Generate rtl for this pattern */
2818 gen_shl_sext (rtx dest
, rtx left_rtx
, rtx size_rtx
, rtx source
)
2821 int left
, size
, insize
, cost
;
2824 kind
= shl_sext_kind (left_rtx
, size_rtx
, &cost
);
2825 left
= INTVAL (left_rtx
);
2826 size
= INTVAL (size_rtx
);
2827 insize
= size
- left
;
2835 int ext
= kind
& 1 ? 8 : 16;
2836 int shift2
= size
- ext
;
2838 /* Don't expand fine-grained when combining, because that will
2839 make the pattern fail. */
2840 if (! currently_expanding_to_rtl
2841 && ! reload_in_progress
&& ! reload_completed
)
2843 emit_insn (gen_shl_sext_ext (dest
, source
, left_rtx
, size_rtx
));
2844 emit_insn (gen_movsi (dest
, source
));
2848 emit_insn (gen_movsi (dest
, source
));
2852 operands
[2] = GEN_INT (ext
- insize
);
2853 gen_shifty_hi_op (ASHIFT
, operands
);
2856 ? gen_extendqisi2 (dest
, gen_lowpart (QImode
, dest
))
2857 : gen_extendhisi2 (dest
, gen_lowpart (HImode
, dest
)));
2862 operands
[2] = GEN_INT (shift2
);
2863 gen_shifty_op (ASHIFT
, operands
);
2870 if (EXT_SHIFT_SIGNED (shift2
))
2872 operands
[2] = GEN_INT (shift2
+ 1);
2873 gen_shifty_op (ASHIFT
, operands
);
2874 operands
[2] = const1_rtx
;
2875 gen_shifty_op (ASHIFTRT
, operands
);
2878 operands
[2] = GEN_INT (shift2
);
2879 gen_shifty_hi_op (ASHIFT
, operands
);
2883 operands
[2] = GEN_INT (-shift2
);
2884 gen_shifty_hi_op (LSHIFTRT
, operands
);
2886 emit_insn (size
<= 8
2887 ? gen_extendqisi2 (dest
, gen_lowpart (QImode
, dest
))
2888 : gen_extendhisi2 (dest
, gen_lowpart (HImode
, dest
)));
2895 if (! currently_expanding_to_rtl
2896 && ! reload_in_progress
&& ! reload_completed
)
2897 emit_insn (gen_shl_sext_ext (dest
, source
, left_rtx
, size_rtx
));
2901 operands
[2] = GEN_INT (16 - insize
);
2902 gen_shifty_hi_op (ASHIFT
, operands
);
2903 emit_insn (gen_extendhisi2 (dest
, gen_lowpart (HImode
, dest
)));
2905 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2907 gen_ashift (ASHIFTRT
, 1, dest
);
2912 /* Don't expand fine-grained when combining, because that will
2913 make the pattern fail. */
2914 if (! currently_expanding_to_rtl
2915 && ! reload_in_progress
&& ! reload_completed
)
2917 emit_insn (gen_shl_sext_ext (dest
, source
, left_rtx
, size_rtx
));
2918 emit_insn (gen_movsi (dest
, source
));
2921 emit_insn (gen_andsi3 (dest
, source
, GEN_INT ((1 << insize
) - 1)));
2922 emit_insn (gen_xorsi3 (dest
, dest
, GEN_INT (1 << (insize
- 1))));
2923 emit_insn (gen_addsi3 (dest
, dest
, GEN_INT (-1 << (insize
- 1))));
2925 operands
[2] = kind
== 7 ? GEN_INT (left
+ 1) : left_rtx
;
2926 gen_shifty_op (ASHIFT
, operands
);
2928 emit_insn (gen_ashrsi3_k (dest
, dest
, const1_rtx
));
2936 /* Prefix a symbol_ref name with "datalabel". */
2939 gen_datalabel_ref (rtx sym
)
2943 if (GET_CODE (sym
) == LABEL_REF
)
2944 return gen_rtx_CONST (GET_MODE (sym
),
2945 gen_rtx_UNSPEC (GET_MODE (sym
),
2949 gcc_assert (GET_CODE (sym
) == SYMBOL_REF
);
2951 str
= XSTR (sym
, 0);
2952 /* Share all SYMBOL_REF strings with the same value - that is important
2954 str
= IDENTIFIER_POINTER (get_identifier (str
));
2955 XSTR (sym
, 0) = str
;
2961 /* The SH cannot load a large constant into a register, constants have to
2962 come from a pc relative load. The reference of a pc relative load
2963 instruction must be less than 1k in front of the instruction. This
2964 means that we often have to dump a constant inside a function, and
2965 generate code to branch around it.
2967 It is important to minimize this, since the branches will slow things
2968 down and make things bigger.
2970 Worst case code looks like:
2988 We fix this by performing a scan before scheduling, which notices which
2989 instructions need to have their operands fetched from the constant table
2990 and builds the table.
2994 scan, find an instruction which needs a pcrel move. Look forward, find the
2995 last barrier which is within MAX_COUNT bytes of the requirement.
2996 If there isn't one, make one. Process all the instructions between
2997 the find and the barrier.
2999 In the above example, we can tell that L3 is within 1k of L1, so
3000 the first move can be shrunk from the 3 insn+constant sequence into
3001 just 1 insn, and the constant moved to L3 to make:
3012 Then the second move becomes the target for the shortening process. */
3016 rtx value
; /* Value in table. */
3017 rtx label
; /* Label of value. */
3018 rtx wend
; /* End of window. */
3019 enum machine_mode mode
; /* Mode of value. */
3021 /* True if this constant is accessed as part of a post-increment
3022 sequence. Note that HImode constants are never accessed in this way. */
3023 bool part_of_sequence_p
;
3026 /* The maximum number of constants that can fit into one pool, since
3027 constants in the range 0..510 are at least 2 bytes long, and in the
3028 range from there to 1018 at least 4 bytes. */
3030 #define MAX_POOL_SIZE 372
3031 static pool_node pool_vector
[MAX_POOL_SIZE
];
3032 static int pool_size
;
3033 static rtx pool_window_label
;
3034 static int pool_window_last
;
3036 /* ??? If we need a constant in HImode which is the truncated value of a
3037 constant we need in SImode, we could combine the two entries thus saving
3038 two bytes. Is this common enough to be worth the effort of implementing
3041 /* ??? This stuff should be done at the same time that we shorten branches.
3042 As it is now, we must assume that all branches are the maximum size, and
3043 this causes us to almost always output constant pools sooner than
3046 /* Add a constant to the pool and return its label. */
3049 add_constant (rtx x
, enum machine_mode mode
, rtx last_value
)
3052 rtx lab
, new, ref
, newref
;
3054 /* First see if we've already got it. */
3055 for (i
= 0; i
< pool_size
; i
++)
3057 if (x
->code
== pool_vector
[i
].value
->code
3058 && mode
== pool_vector
[i
].mode
)
3060 if (x
->code
== CODE_LABEL
)
3062 if (XINT (x
, 3) != XINT (pool_vector
[i
].value
, 3))
3065 if (rtx_equal_p (x
, pool_vector
[i
].value
))
3070 || ! rtx_equal_p (last_value
, pool_vector
[i
-1].value
))
3072 new = gen_label_rtx ();
3073 LABEL_REFS (new) = pool_vector
[i
].label
;
3074 pool_vector
[i
].label
= lab
= new;
3076 if (lab
&& pool_window_label
)
3078 newref
= gen_rtx_LABEL_REF (VOIDmode
, pool_window_label
);
3079 ref
= pool_vector
[pool_window_last
].wend
;
3080 LABEL_NEXTREF (newref
) = ref
;
3081 pool_vector
[pool_window_last
].wend
= newref
;
3084 pool_window_label
= new;
3085 pool_window_last
= i
;
3091 /* Need a new one. */
3092 pool_vector
[pool_size
].value
= x
;
3093 if (last_value
&& rtx_equal_p (last_value
, pool_vector
[pool_size
- 1].value
))
3096 pool_vector
[pool_size
- 1].part_of_sequence_p
= true;
3099 lab
= gen_label_rtx ();
3100 pool_vector
[pool_size
].mode
= mode
;
3101 pool_vector
[pool_size
].label
= lab
;
3102 pool_vector
[pool_size
].wend
= NULL_RTX
;
3103 pool_vector
[pool_size
].part_of_sequence_p
= (lab
== 0);
3104 if (lab
&& pool_window_label
)
3106 newref
= gen_rtx_LABEL_REF (VOIDmode
, pool_window_label
);
3107 ref
= pool_vector
[pool_window_last
].wend
;
3108 LABEL_NEXTREF (newref
) = ref
;
3109 pool_vector
[pool_window_last
].wend
= newref
;
3112 pool_window_label
= lab
;
3113 pool_window_last
= pool_size
;
3118 /* Output the literal table. START, if nonzero, is the first instruction
3119 this table is needed for, and also indicates that there is at least one
3120 casesi_worker_2 instruction; We have to emit the operand3 labels from
3121 these insns at a 4-byte aligned position. BARRIER is the barrier
3122 after which we are to place the table. */
3125 dump_table (rtx start
, rtx barrier
)
3133 /* Do two passes, first time dump out the HI sized constants. */
3135 for (i
= 0; i
< pool_size
; i
++)
3137 pool_node
*p
= &pool_vector
[i
];
3139 if (p
->mode
== HImode
)
3143 scan
= emit_insn_after (gen_align_2 (), scan
);
3146 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3147 scan
= emit_label_after (lab
, scan
);
3148 scan
= emit_insn_after (gen_consttable_2 (p
->value
, const0_rtx
),
3150 for (ref
= p
->wend
; ref
; ref
= LABEL_NEXTREF (ref
))
3152 lab
= XEXP (ref
, 0);
3153 scan
= emit_insn_after (gen_consttable_window_end (lab
), scan
);
3156 else if (p
->mode
== DFmode
)
3164 scan
= emit_insn_after (gen_align_4 (), scan
);
3166 for (; start
!= barrier
; start
= NEXT_INSN (start
))
3167 if (GET_CODE (start
) == INSN
3168 && recog_memoized (start
) == CODE_FOR_casesi_worker_2
)
3170 rtx src
= SET_SRC (XVECEXP (PATTERN (start
), 0, 0));
3171 rtx lab
= XEXP (XVECEXP (src
, 0, 3), 0);
3173 scan
= emit_label_after (lab
, scan
);
3176 if (TARGET_FMOVD
&& TARGET_ALIGN_DOUBLE
&& have_df
)
3178 rtx align_insn
= NULL_RTX
;
3180 scan
= emit_label_after (gen_label_rtx (), scan
);
3181 scan
= emit_insn_after (gen_align_log (GEN_INT (3)), scan
);
3184 for (i
= 0; i
< pool_size
; i
++)
3186 pool_node
*p
= &pool_vector
[i
];
3194 if (align_insn
&& !p
->part_of_sequence_p
)
3196 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3197 emit_label_before (lab
, align_insn
);
3198 emit_insn_before (gen_consttable_4 (p
->value
, const0_rtx
),
3200 for (ref
= p
->wend
; ref
; ref
= LABEL_NEXTREF (ref
))
3202 lab
= XEXP (ref
, 0);
3203 emit_insn_before (gen_consttable_window_end (lab
),
3206 delete_insn (align_insn
);
3207 align_insn
= NULL_RTX
;
3212 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3213 scan
= emit_label_after (lab
, scan
);
3214 scan
= emit_insn_after (gen_consttable_4 (p
->value
,
3216 need_align
= ! need_align
;
3222 scan
= emit_insn_after (gen_align_log (GEN_INT (3)), scan
);
3227 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3228 scan
= emit_label_after (lab
, scan
);
3229 scan
= emit_insn_after (gen_consttable_8 (p
->value
, const0_rtx
),
3236 if (p
->mode
!= HImode
)
3238 for (ref
= p
->wend
; ref
; ref
= LABEL_NEXTREF (ref
))
3240 lab
= XEXP (ref
, 0);
3241 scan
= emit_insn_after (gen_consttable_window_end (lab
),
3250 for (i
= 0; i
< pool_size
; i
++)
3252 pool_node
*p
= &pool_vector
[i
];
3263 scan
= emit_label_after (gen_label_rtx (), scan
);
3264 scan
= emit_insn_after (gen_align_4 (), scan
);
3266 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3267 scan
= emit_label_after (lab
, scan
);
3268 scan
= emit_insn_after (gen_consttable_4 (p
->value
, const0_rtx
),
3276 scan
= emit_label_after (gen_label_rtx (), scan
);
3277 scan
= emit_insn_after (gen_align_4 (), scan
);
3279 for (lab
= p
->label
; lab
; lab
= LABEL_REFS (lab
))
3280 scan
= emit_label_after (lab
, scan
);
3281 scan
= emit_insn_after (gen_consttable_8 (p
->value
, const0_rtx
),
3288 if (p
->mode
!= HImode
)
3290 for (ref
= p
->wend
; ref
; ref
= LABEL_NEXTREF (ref
))
3292 lab
= XEXP (ref
, 0);
3293 scan
= emit_insn_after (gen_consttable_window_end (lab
), scan
);
3298 scan
= emit_insn_after (gen_consttable_end (), scan
);
3299 scan
= emit_barrier_after (scan
);
3301 pool_window_label
= NULL_RTX
;
3302 pool_window_last
= 0;
3305 /* Return nonzero if constant would be an ok source for a
3306 mov.w instead of a mov.l. */
3311 return (GET_CODE (src
) == CONST_INT
3312 && INTVAL (src
) >= -32768
3313 && INTVAL (src
) <= 32767);
3316 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3318 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3319 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3320 need to fix it if the input value is CONST_OK_FOR_I08. */
3323 broken_move (rtx insn
)
3325 if (GET_CODE (insn
) == INSN
)
3327 rtx pat
= PATTERN (insn
);
3328 if (GET_CODE (pat
) == PARALLEL
)
3329 pat
= XVECEXP (pat
, 0, 0);
3330 if (GET_CODE (pat
) == SET
3331 /* We can load any 8 bit value if we don't care what the high
3332 order bits end up as. */
3333 && GET_MODE (SET_DEST (pat
)) != QImode
3334 && (CONSTANT_P (SET_SRC (pat
))
3335 /* Match mova_const. */
3336 || (GET_CODE (SET_SRC (pat
)) == UNSPEC
3337 && XINT (SET_SRC (pat
), 1) == UNSPEC_MOVA
3338 && GET_CODE (XVECEXP (SET_SRC (pat
), 0, 0)) == CONST
))
3340 && GET_CODE (SET_SRC (pat
)) == CONST_DOUBLE
3341 && (fp_zero_operand (SET_SRC (pat
))
3342 || fp_one_operand (SET_SRC (pat
)))
3343 /* ??? If this is a -m4 or -m4-single compilation, in general
3344 we don't know the current setting of fpscr, so disable fldi.
3345 There is an exception if this was a register-register move
3346 before reload - and hence it was ascertained that we have
3347 single precision setting - and in a post-reload optimization
3348 we changed this to do a constant load. In that case
3349 we don't have an r0 clobber, hence we must use fldi. */
3350 && (! TARGET_SH4
|| TARGET_FMOVD
3351 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn
), 0, 2), 0))
3353 && GET_CODE (SET_DEST (pat
)) == REG
3354 && FP_REGISTER_P (REGNO (SET_DEST (pat
))))
3356 && GET_MODE (SET_DEST (pat
)) == SImode
3357 && GET_CODE (SET_SRC (pat
)) == CONST_INT
3358 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat
))))
3359 && (GET_CODE (SET_SRC (pat
)) != CONST_INT
3360 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat
)))))
3370 return (GET_CODE (insn
) == INSN
3371 && GET_CODE (PATTERN (insn
)) == SET
3372 && GET_CODE (SET_SRC (PATTERN (insn
))) == UNSPEC
3373 && XINT (SET_SRC (PATTERN (insn
)), 1) == UNSPEC_MOVA
3374 /* Don't match mova_const. */
3375 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn
)), 0, 0)) == LABEL_REF
);
3378 /* Fix up a mova from a switch that went out of range. */
3380 fixup_mova (rtx mova
)
3384 SET_SRC (PATTERN (mova
)) = XVECEXP (SET_SRC (PATTERN (mova
)), 0, 0);
3385 INSN_CODE (mova
) = -1;
3390 rtx lab
= gen_label_rtx ();
3391 rtx wpat
, wpat0
, wpat1
, wsrc
, diff
;
3395 worker
= NEXT_INSN (worker
);
3397 && GET_CODE (worker
) != CODE_LABEL
3398 && GET_CODE (worker
) != JUMP_INSN
);
3399 } while (GET_CODE (worker
) == NOTE
3400 || recog_memoized (worker
) != CODE_FOR_casesi_worker_1
);
3401 wpat
= PATTERN (worker
);
3402 wpat0
= XVECEXP (wpat
, 0, 0);
3403 wpat1
= XVECEXP (wpat
, 0, 1);
3404 wsrc
= SET_SRC (wpat0
);
3405 PATTERN (worker
) = (gen_casesi_worker_2
3406 (SET_DEST (wpat0
), XVECEXP (wsrc
, 0, 1),
3407 XEXP (XVECEXP (wsrc
, 0, 2), 0), lab
,
3409 INSN_CODE (worker
) = -1;
3410 diff
= gen_rtx_MINUS (Pmode
, XVECEXP (SET_SRC (PATTERN (mova
)), 0, 0),
3411 gen_rtx_LABEL_REF (Pmode
, lab
));
3412 diff
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, diff
), UNSPEC_PIC
);
3413 SET_SRC (PATTERN (mova
)) = gen_rtx_CONST (Pmode
, diff
);
3414 INSN_CODE (mova
) = -1;
3418 /* Find the last barrier from insn FROM which is close enough to hold the
3419 constant pool. If we can't find one, then create one near the end of
3423 find_barrier (int num_mova
, rtx mova
, rtx from
)
3432 int leading_mova
= num_mova
;
3433 rtx barrier_before_mova
= 0, found_barrier
= 0, good_barrier
= 0;
3437 /* For HImode: range is 510, add 4 because pc counts from address of
3438 second instruction after this one, subtract 2 for the jump instruction
3439 that we may need to emit before the table, subtract 2 for the instruction
3440 that fills the jump delay slot (in very rare cases, reorg will take an
3441 instruction from after the constant pool or will leave the delay slot
3442 empty). This gives 510.
3443 For SImode: range is 1020, add 4 because pc counts from address of
3444 second instruction after this one, subtract 2 in case pc is 2 byte
3445 aligned, subtract 2 for the jump instruction that we may need to emit
3446 before the table, subtract 2 for the instruction that fills the jump
3447 delay slot. This gives 1018. */
3449 /* The branch will always be shortened now that the reference address for
3450 forward branches is the successor address, thus we need no longer make
3451 adjustments to the [sh]i_limit for -O0. */
3456 while (from
&& count_si
< si_limit
&& count_hi
< hi_limit
)
3458 int inc
= get_attr_length (from
);
3461 if (GET_CODE (from
) == CODE_LABEL
)
3464 new_align
= 1 << label_to_alignment (from
);
3465 else if (GET_CODE (prev_nonnote_insn (from
)) == BARRIER
)
3466 new_align
= 1 << barrier_align (from
);
3472 if (GET_CODE (from
) == BARRIER
)
3475 found_barrier
= from
;
3477 /* If we are at the end of the function, or in front of an alignment
3478 instruction, we need not insert an extra alignment. We prefer
3479 this kind of barrier. */
3480 if (barrier_align (from
) > 2)
3481 good_barrier
= from
;
3484 if (broken_move (from
))
3487 enum machine_mode mode
;
3489 pat
= PATTERN (from
);
3490 if (GET_CODE (pat
) == PARALLEL
)
3491 pat
= XVECEXP (pat
, 0, 0);
3492 src
= SET_SRC (pat
);
3493 dst
= SET_DEST (pat
);
3494 mode
= GET_MODE (dst
);
3496 /* We must explicitly check the mode, because sometimes the
3497 front end will generate code to load unsigned constants into
3498 HImode targets without properly sign extending them. */
3500 || (mode
== SImode
&& hi_const (src
) && REGNO (dst
) != FPUL_REG
))
3503 /* We put the short constants before the long constants, so
3504 we must count the length of short constants in the range
3505 for the long constants. */
3506 /* ??? This isn't optimal, but is easy to do. */
3511 /* We dump DF/DI constants before SF/SI ones, because
3512 the limit is the same, but the alignment requirements
3513 are higher. We may waste up to 4 additional bytes
3514 for alignment, and the DF/DI constant may have
3515 another SF/SI constant placed before it. */
3516 if (TARGET_SHCOMPACT
3518 && (mode
== DFmode
|| mode
== DImode
))
3523 while (si_align
> 2 && found_si
+ si_align
- 2 > count_si
)
3525 if (found_si
> count_si
)
3526 count_si
= found_si
;
3527 found_si
+= GET_MODE_SIZE (mode
);
3529 si_limit
-= GET_MODE_SIZE (mode
);
3539 barrier_before_mova
= good_barrier
? good_barrier
: found_barrier
;
3541 if (found_si
> count_si
)
3542 count_si
= found_si
;
3544 else if (GET_CODE (from
) == JUMP_INSN
3545 && (GET_CODE (PATTERN (from
)) == ADDR_VEC
3546 || GET_CODE (PATTERN (from
)) == ADDR_DIFF_VEC
))
3550 if (barrier_align (next_real_insn (from
)) == align_jumps_log
)
3552 /* We have just passed the barrier in front of the
3553 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3554 the ADDR_DIFF_VEC is accessed as data, just like our pool
3555 constants, this is a good opportunity to accommodate what
3556 we have gathered so far.
3557 If we waited any longer, we could end up at a barrier in
3558 front of code, which gives worse cache usage for separated
3559 instruction / data caches. */
3560 good_barrier
= found_barrier
;
3565 rtx body
= PATTERN (from
);
3566 inc
= XVECLEN (body
, 1) * GET_MODE_SIZE (GET_MODE (body
));
3569 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3570 else if (GET_CODE (from
) == JUMP_INSN
3572 && ! TARGET_SMALLCODE
)
3578 if (new_align
> si_align
)
3580 si_limit
-= (count_si
- 1) & (new_align
- si_align
);
3581 si_align
= new_align
;
3583 count_si
= (count_si
+ new_align
- 1) & -new_align
;
3588 if (new_align
> hi_align
)
3590 hi_limit
-= (count_hi
- 1) & (new_align
- hi_align
);
3591 hi_align
= new_align
;
3593 count_hi
= (count_hi
+ new_align
- 1) & -new_align
;
3595 from
= NEXT_INSN (from
);
3602 /* Try as we might, the leading mova is out of range. Change
3603 it into a load (which will become a pcload) and retry. */
3605 return find_barrier (0, 0, mova
);
3609 /* Insert the constant pool table before the mova instruction,
3610 to prevent the mova label reference from going out of range. */
3612 good_barrier
= found_barrier
= barrier_before_mova
;
3618 if (good_barrier
&& next_real_insn (found_barrier
))
3619 found_barrier
= good_barrier
;
3623 /* We didn't find a barrier in time to dump our stuff,
3624 so we'll make one. */
3625 rtx label
= gen_label_rtx ();
3627 /* If we exceeded the range, then we must back up over the last
3628 instruction we looked at. Otherwise, we just need to undo the
3629 NEXT_INSN at the end of the loop. */
3630 if (count_hi
> hi_limit
|| count_si
> si_limit
)
3631 from
= PREV_INSN (PREV_INSN (from
));
3633 from
= PREV_INSN (from
);
3635 /* Walk back to be just before any jump or label.
3636 Putting it before a label reduces the number of times the branch
3637 around the constant pool table will be hit. Putting it before
3638 a jump makes it more likely that the bra delay slot will be
3640 while (GET_CODE (from
) == JUMP_INSN
|| GET_CODE (from
) == NOTE
3641 || GET_CODE (from
) == CODE_LABEL
)
3642 from
= PREV_INSN (from
);
3644 from
= emit_jump_insn_after (gen_jump (label
), from
);
3645 JUMP_LABEL (from
) = label
;
3646 LABEL_NUSES (label
) = 1;
3647 found_barrier
= emit_barrier_after (from
);
3648 emit_label_after (label
, found_barrier
);
3651 return found_barrier
;
3654 /* If the instruction INSN is implemented by a special function, and we can
3655 positively find the register that is used to call the sfunc, and this
3656 register is not used anywhere else in this instruction - except as the
3657 destination of a set, return this register; else, return 0. */
3659 sfunc_uses_reg (rtx insn
)
3662 rtx pattern
, part
, reg_part
, reg
;
3664 if (GET_CODE (insn
) != INSN
)
3666 pattern
= PATTERN (insn
);
3667 if (GET_CODE (pattern
) != PARALLEL
|| get_attr_type (insn
) != TYPE_SFUNC
)
3670 for (reg_part
= 0, i
= XVECLEN (pattern
, 0) - 1; i
>= 1; i
--)
3672 part
= XVECEXP (pattern
, 0, i
);
3673 if (GET_CODE (part
) == USE
&& GET_MODE (XEXP (part
, 0)) == SImode
)
3678 reg
= XEXP (reg_part
, 0);
3679 for (i
= XVECLEN (pattern
, 0) - 1; i
>= 0; i
--)
3681 part
= XVECEXP (pattern
, 0, i
);
3682 if (part
== reg_part
|| GET_CODE (part
) == CLOBBER
)
3684 if (reg_mentioned_p (reg
, ((GET_CODE (part
) == SET
3685 && GET_CODE (SET_DEST (part
)) == REG
)
3686 ? SET_SRC (part
) : part
)))
3692 /* See if the only way in which INSN uses REG is by calling it, or by
3693 setting it while calling it. Set *SET to a SET rtx if the register
3697 noncall_uses_reg (rtx reg
, rtx insn
, rtx
*set
)
3703 reg2
= sfunc_uses_reg (insn
);
3704 if (reg2
&& REGNO (reg2
) == REGNO (reg
))
3706 pattern
= single_set (insn
);
3708 && GET_CODE (SET_DEST (pattern
)) == REG
3709 && REGNO (reg
) == REGNO (SET_DEST (pattern
)))
3713 if (GET_CODE (insn
) != CALL_INSN
)
3715 /* We don't use rtx_equal_p because we don't care if the mode is
3717 pattern
= single_set (insn
);
3719 && GET_CODE (SET_DEST (pattern
)) == REG
3720 && REGNO (reg
) == REGNO (SET_DEST (pattern
)))
3726 par
= PATTERN (insn
);
3727 if (GET_CODE (par
) == PARALLEL
)
3728 for (i
= XVECLEN (par
, 0) - 1; i
>= 0; i
--)
3730 part
= XVECEXP (par
, 0, i
);
3731 if (GET_CODE (part
) != SET
&& reg_mentioned_p (reg
, part
))
3734 return reg_mentioned_p (reg
, SET_SRC (pattern
));
3740 pattern
= PATTERN (insn
);
3742 if (GET_CODE (pattern
) == PARALLEL
)
3746 for (i
= XVECLEN (pattern
, 0) - 1; i
>= 1; i
--)
3747 if (reg_mentioned_p (reg
, XVECEXP (pattern
, 0, i
)))
3749 pattern
= XVECEXP (pattern
, 0, 0);
3752 if (GET_CODE (pattern
) == SET
)
3754 if (reg_mentioned_p (reg
, SET_DEST (pattern
)))
3756 /* We don't use rtx_equal_p, because we don't care if the
3757 mode is different. */
3758 if (GET_CODE (SET_DEST (pattern
)) != REG
3759 || REGNO (reg
) != REGNO (SET_DEST (pattern
)))
3765 pattern
= SET_SRC (pattern
);
3768 if (GET_CODE (pattern
) != CALL
3769 || GET_CODE (XEXP (pattern
, 0)) != MEM
3770 || ! rtx_equal_p (reg
, XEXP (XEXP (pattern
, 0), 0)))
3776 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3777 general registers. Bits 0..15 mean that the respective registers
3778 are used as inputs in the instruction. Bits 16..31 mean that the
3779 registers 0..15, respectively, are used as outputs, or are clobbered.
3780 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3782 regs_used (rtx x
, int is_dest
)
3790 code
= GET_CODE (x
);
3795 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x
))) - 1)
3796 << (REGNO (x
) + is_dest
));
3800 rtx y
= SUBREG_REG (x
);
3802 if (GET_CODE (y
) != REG
)
3805 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x
))) - 1)
3807 subreg_regno_offset (REGNO (y
),
3810 GET_MODE (x
)) + is_dest
));
3814 return regs_used (SET_SRC (x
), 0) | regs_used (SET_DEST (x
), 16);
3816 /* If there was a return value, it must have been indicated with USE. */
3831 fmt
= GET_RTX_FORMAT (code
);
3833 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
3838 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
3839 used
|= regs_used (XVECEXP (x
, i
, j
), is_dest
);
3841 else if (fmt
[i
] == 'e')
3842 used
|= regs_used (XEXP (x
, i
), is_dest
);
3847 /* Create an instruction that prevents redirection of a conditional branch
3848 to the destination of the JUMP with address ADDR.
3849 If the branch needs to be implemented as an indirect jump, try to find
3850 a scratch register for it.
3851 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3852 If any preceding insn that doesn't fit into a delay slot is good enough,
3853 pass 1. Pass 2 if a definite blocking insn is needed.
3854 -1 is used internally to avoid deep recursion.
3855 If a blocking instruction is made or recognized, return it. */
3858 gen_block_redirect (rtx jump
, int addr
, int need_block
)
3861 rtx prev
= prev_nonnote_insn (jump
);
3864 /* First, check if we already have an instruction that satisfies our need. */
3865 if (prev
&& GET_CODE (prev
) == INSN
&& ! INSN_DELETED_P (prev
))
3867 if (INSN_CODE (prev
) == CODE_FOR_indirect_jump_scratch
)
3869 if (GET_CODE (PATTERN (prev
)) == USE
3870 || GET_CODE (PATTERN (prev
)) == CLOBBER
3871 || get_attr_in_delay_slot (prev
) == IN_DELAY_SLOT_YES
)
3873 else if ((need_block
&= ~1) < 0)
3875 else if (recog_memoized (prev
) == CODE_FOR_block_branch_redirect
)
3878 if (GET_CODE (PATTERN (jump
)) == RETURN
)
3882 /* Reorg even does nasty things with return insns that cause branches
3883 to go out of range - see find_end_label and callers. */
3884 return emit_insn_before (gen_block_branch_redirect (const0_rtx
) , jump
);
3886 /* We can't use JUMP_LABEL here because it might be undefined
3887 when not optimizing. */
3888 dest
= XEXP (SET_SRC (PATTERN (jump
)), 0);
3889 /* If the branch is out of range, try to find a scratch register for it. */
3891 && (INSN_ADDRESSES (INSN_UID (dest
)) - addr
+ (unsigned) 4092
3895 /* Don't look for the stack pointer as a scratch register,
3896 it would cause trouble if an interrupt occurred. */
3897 unsigned try = 0x7fff, used
;
3898 int jump_left
= flag_expensive_optimizations
+ 1;
3900 /* It is likely that the most recent eligible instruction is wanted for
3901 the delay slot. Therefore, find out which registers it uses, and
3902 try to avoid using them. */
3904 for (scan
= jump
; (scan
= PREV_INSN (scan
)); )
3908 if (INSN_DELETED_P (scan
))
3910 code
= GET_CODE (scan
);
3911 if (code
== CODE_LABEL
|| code
== JUMP_INSN
)
3914 && GET_CODE (PATTERN (scan
)) != USE
3915 && GET_CODE (PATTERN (scan
)) != CLOBBER
3916 && get_attr_in_delay_slot (scan
) == IN_DELAY_SLOT_YES
)
3918 try &= ~regs_used (PATTERN (scan
), 0);
3922 for (used
= dead
= 0, scan
= JUMP_LABEL (jump
);
3923 (scan
= NEXT_INSN (scan
)); )
3927 if (INSN_DELETED_P (scan
))
3929 code
= GET_CODE (scan
);
3932 used
|= regs_used (PATTERN (scan
), 0);
3933 if (code
== CALL_INSN
)
3934 used
|= regs_used (CALL_INSN_FUNCTION_USAGE (scan
), 0);
3935 dead
|= (used
>> 16) & ~used
;
3941 if (code
== JUMP_INSN
)
3943 if (jump_left
-- && simplejump_p (scan
))
3944 scan
= JUMP_LABEL (scan
);
3950 /* Mask out the stack pointer again, in case it was
3951 the only 'free' register we have found. */
3954 /* If the immediate destination is still in range, check for possible
3955 threading with a jump beyond the delay slot insn.
3956 Don't check if we are called recursively; the jump has been or will be
3957 checked in a different invocation then. */
3959 else if (optimize
&& need_block
>= 0)
3961 rtx next
= next_active_insn (next_active_insn (dest
));
3962 if (next
&& GET_CODE (next
) == JUMP_INSN
3963 && GET_CODE (PATTERN (next
)) == SET
3964 && recog_memoized (next
) == CODE_FOR_jump_compact
)
3966 dest
= JUMP_LABEL (next
);
3968 && (INSN_ADDRESSES (INSN_UID (dest
)) - addr
+ (unsigned) 4092
3970 gen_block_redirect (next
, INSN_ADDRESSES (INSN_UID (next
)), -1);
3976 rtx reg
= gen_rtx_REG (SImode
, exact_log2 (dead
& -dead
));
3978 /* It would be nice if we could convert the jump into an indirect
3979 jump / far branch right now, and thus exposing all constituent
3980 instructions to further optimization. However, reorg uses
3981 simplejump_p to determine if there is an unconditional jump where
3982 it should try to schedule instructions from the target of the
3983 branch; simplejump_p fails for indirect jumps even if they have
3985 rtx insn
= emit_insn_before (gen_indirect_jump_scratch
3986 (reg
, GEN_INT (INSN_UID (JUMP_LABEL (jump
))))
3988 /* ??? We would like this to have the scope of the jump, but that
3989 scope will change when a delay slot insn of an inner scope is added.
3990 Hence, after delay slot scheduling, we'll have to expect
3991 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3994 INSN_LOCATOR (insn
) = INSN_LOCATOR (jump
);
3995 INSN_CODE (insn
) = CODE_FOR_indirect_jump_scratch
;
3998 else if (need_block
)
3999 /* We can't use JUMP_LABEL here because it might be undefined
4000 when not optimizing. */
4001 return emit_insn_before (gen_block_branch_redirect
4002 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump
)), 0))))
4007 #define CONDJUMP_MIN -252
4008 #define CONDJUMP_MAX 262
4011 /* A label (to be placed) in front of the jump
4012 that jumps to our ultimate destination. */
4014 /* Where we are going to insert it if we cannot move the jump any farther,
4015 or the jump itself if we have picked up an existing jump. */
4017 /* The ultimate destination. */
4019 struct far_branch
*prev
;
4020 /* If the branch has already been created, its address;
4021 else the address of its first prospective user. */
4025 static void gen_far_branch (struct far_branch
*);
4026 enum mdep_reorg_phase_e mdep_reorg_phase
;
4028 gen_far_branch (struct far_branch
*bp
)
4030 rtx insn
= bp
->insert_place
;
4032 rtx label
= gen_label_rtx ();
4035 emit_label_after (label
, insn
);
4038 jump
= emit_jump_insn_after (gen_jump (bp
->far_label
), insn
);
4039 LABEL_NUSES (bp
->far_label
)++;
4042 jump
= emit_jump_insn_after (gen_return (), insn
);
4043 /* Emit a barrier so that reorg knows that any following instructions
4044 are not reachable via a fall-through path.
4045 But don't do this when not optimizing, since we wouldn't suppress the
4046 alignment for the barrier then, and could end up with out-of-range
4047 pc-relative loads. */
4049 emit_barrier_after (jump
);
4050 emit_label_after (bp
->near_label
, insn
);
4051 JUMP_LABEL (jump
) = bp
->far_label
;
4052 ok
= invert_jump (insn
, label
, 1);
4055 /* If we are branching around a jump (rather than a return), prevent
4056 reorg from using an insn from the jump target as the delay slot insn -
4057 when reorg did this, it pessimized code (we rather hide the delay slot)
4058 and it could cause branches to go out of range. */
4061 (gen_stuff_delay_slot
4062 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump
)), 0))),
4063 GEN_INT (recog_memoized (insn
) == CODE_FOR_branch_false
)),
4065 /* Prevent reorg from undoing our splits. */
4066 gen_block_redirect (jump
, bp
->address
+= 2, 2);
4069 /* Fix up ADDR_DIFF_VECs. */
4071 fixup_addr_diff_vecs (rtx first
)
4075 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4077 rtx vec_lab
, pat
, prev
, prevpat
, x
, braf_label
;
4079 if (GET_CODE (insn
) != JUMP_INSN
4080 || GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
4082 pat
= PATTERN (insn
);
4083 vec_lab
= XEXP (XEXP (pat
, 0), 0);
4085 /* Search the matching casesi_jump_2. */
4086 for (prev
= vec_lab
; ; prev
= PREV_INSN (prev
))
4088 if (GET_CODE (prev
) != JUMP_INSN
)
4090 prevpat
= PATTERN (prev
);
4091 if (GET_CODE (prevpat
) != PARALLEL
|| XVECLEN (prevpat
, 0) != 2)
4093 x
= XVECEXP (prevpat
, 0, 1);
4094 if (GET_CODE (x
) != USE
)
4097 if (GET_CODE (x
) == LABEL_REF
&& XEXP (x
, 0) == vec_lab
)
4100 /* FIXME: This is a bug in the optimizer, but it seems harmless
4101 to just avoid panicing. */
4105 /* Emit the reference label of the braf where it belongs, right after
4106 the casesi_jump_2 (i.e. braf). */
4107 braf_label
= XEXP (XEXP (SET_SRC (XVECEXP (prevpat
, 0, 0)), 1), 0);
4108 emit_label_after (braf_label
, prev
);
4110 /* Fix up the ADDR_DIF_VEC to be relative
4111 to the reference address of the braf. */
4112 XEXP (XEXP (pat
, 0), 0) = braf_label
;
4116 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4117 a barrier. Return the base 2 logarithm of the desired alignment. */
4119 barrier_align (rtx barrier_or_label
)
4121 rtx next
= next_real_insn (barrier_or_label
), pat
, prev
;
4122 int slot
, credit
, jump_to_next
= 0;
4127 pat
= PATTERN (next
);
4129 if (GET_CODE (pat
) == ADDR_DIFF_VEC
)
4132 if (GET_CODE (pat
) == UNSPEC_VOLATILE
&& XINT (pat
, 1) == UNSPECV_ALIGN
)
4133 /* This is a barrier in front of a constant table. */
4136 prev
= prev_real_insn (barrier_or_label
);
4137 if (GET_CODE (PATTERN (prev
)) == ADDR_DIFF_VEC
)
4139 pat
= PATTERN (prev
);
4140 /* If this is a very small table, we want to keep the alignment after
4141 the table to the minimum for proper code alignment. */
4142 return ((TARGET_SMALLCODE
4143 || ((unsigned) XVECLEN (pat
, 1) * GET_MODE_SIZE (GET_MODE (pat
))
4144 <= (unsigned) 1 << (CACHE_LOG
- 2)))
4145 ? 1 << TARGET_SHMEDIA
: align_jumps_log
);
4148 if (TARGET_SMALLCODE
)
4151 if (! TARGET_SH2
|| ! optimize
)
4152 return align_jumps_log
;
4154 /* When fixing up pcloads, a constant table might be inserted just before
4155 the basic block that ends with the barrier. Thus, we can't trust the
4156 instruction lengths before that. */
4157 if (mdep_reorg_phase
> SH_FIXUP_PCLOAD
)
4159 /* Check if there is an immediately preceding branch to the insn beyond
4160 the barrier. We must weight the cost of discarding useful information
4161 from the current cache line when executing this branch and there is
4162 an alignment, against that of fetching unneeded insn in front of the
4163 branch target when there is no alignment. */
4165 /* There are two delay_slot cases to consider. One is the simple case
4166 where the preceding branch is to the insn beyond the barrier (simple
4167 delay slot filling), and the other is where the preceding branch has
4168 a delay slot that is a duplicate of the insn after the barrier
4169 (fill_eager_delay_slots) and the branch is to the insn after the insn
4170 after the barrier. */
4172 /* PREV is presumed to be the JUMP_INSN for the barrier under
4173 investigation. Skip to the insn before it. */
4174 prev
= prev_real_insn (prev
);
4176 for (slot
= 2, credit
= (1 << (CACHE_LOG
- 2)) + 2;
4177 credit
>= 0 && prev
&& GET_CODE (prev
) == INSN
;
4178 prev
= prev_real_insn (prev
))
4181 if (GET_CODE (PATTERN (prev
)) == USE
4182 || GET_CODE (PATTERN (prev
)) == CLOBBER
)
4184 if (GET_CODE (PATTERN (prev
)) == SEQUENCE
)
4186 prev
= XVECEXP (PATTERN (prev
), 0, 1);
4187 if (INSN_UID (prev
) == INSN_UID (next
))
4189 /* Delay slot was filled with insn at jump target. */
4196 get_attr_in_delay_slot (prev
) == IN_DELAY_SLOT_YES
)
4198 credit
-= get_attr_length (prev
);
4201 && GET_CODE (prev
) == JUMP_INSN
4202 && JUMP_LABEL (prev
))
4206 || next_real_insn (JUMP_LABEL (prev
)) == next
4207 /* If relax_delay_slots() decides NEXT was redundant
4208 with some previous instruction, it will have
4209 redirected PREV's jump to the following insn. */
4210 || JUMP_LABEL (prev
) == next_nonnote_insn (next
)
4211 /* There is no upper bound on redundant instructions
4212 that might have been skipped, but we must not put an
4213 alignment where none had been before. */
4214 || (x
= (NEXT_INSN (NEXT_INSN (PREV_INSN (prev
)))),
4216 && (INSN_CODE (x
) == CODE_FOR_block_branch_redirect
4217 || INSN_CODE (x
) == CODE_FOR_indirect_jump_scratch
4218 || INSN_CODE (x
) == CODE_FOR_stuff_delay_slot
))))
4220 rtx pat
= PATTERN (prev
);
4221 if (GET_CODE (pat
) == PARALLEL
)
4222 pat
= XVECEXP (pat
, 0, 0);
4223 if (credit
- slot
>= (GET_CODE (SET_SRC (pat
)) == PC
? 2 : 0))
4229 return align_jumps_log
;
4232 /* If we are inside a phony loop, almost any kind of label can turn up as the
4233 first one in the loop. Aligning a braf label causes incorrect switch
4234 destination addresses; we can detect braf labels because they are
4235 followed by a BARRIER.
4236 Applying loop alignment to small constant or switch tables is a waste
4237 of space, so we suppress this too. */
4239 sh_loop_align (rtx label
)
4244 next
= next_nonnote_insn (next
);
4245 while (next
&& GET_CODE (next
) == CODE_LABEL
);
4249 || GET_CODE (PATTERN (next
)) == ADDR_DIFF_VEC
4250 || recog_memoized (next
) == CODE_FOR_consttable_2
)
4253 return align_loops_log
;
4256 /* Do a final pass over the function, just before delayed branch
4262 rtx first
, insn
, mova
= NULL_RTX
;
4264 rtx r0_rtx
= gen_rtx_REG (Pmode
, 0);
4265 rtx r0_inc_rtx
= gen_rtx_POST_INC (Pmode
, r0_rtx
);
4267 first
= get_insns ();
4269 /* We must split call insns before introducing `mova's. If we're
4270 optimizing, they'll have already been split. Otherwise, make
4271 sure we don't split them too late. */
4273 split_all_insns_noflow ();
4278 /* If relaxing, generate pseudo-ops to associate function calls with
4279 the symbols they call. It does no harm to not generate these
4280 pseudo-ops. However, when we can generate them, it enables to
4281 linker to potentially relax the jsr to a bsr, and eliminate the
4282 register load and, possibly, the constant pool entry. */
4284 mdep_reorg_phase
= SH_INSERT_USES_LABELS
;
4287 /* Remove all REG_LABEL notes. We want to use them for our own
4288 purposes. This works because none of the remaining passes
4289 need to look at them.
4291 ??? But it may break in the future. We should use a machine
4292 dependent REG_NOTE, or some other approach entirely. */
4293 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4299 while ((note
= find_reg_note (insn
, REG_LABEL
, NULL_RTX
)) != 0)
4300 remove_note (insn
, note
);
4304 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4306 rtx pattern
, reg
, link
, set
, scan
, dies
, label
;
4307 int rescan
= 0, foundinsn
= 0;
4309 if (GET_CODE (insn
) == CALL_INSN
)
4311 pattern
= PATTERN (insn
);
4313 if (GET_CODE (pattern
) == PARALLEL
)
4314 pattern
= XVECEXP (pattern
, 0, 0);
4315 if (GET_CODE (pattern
) == SET
)
4316 pattern
= SET_SRC (pattern
);
4318 if (GET_CODE (pattern
) != CALL
4319 || GET_CODE (XEXP (pattern
, 0)) != MEM
)
4322 reg
= XEXP (XEXP (pattern
, 0), 0);
4326 reg
= sfunc_uses_reg (insn
);
4331 if (GET_CODE (reg
) != REG
)
4334 /* This is a function call via REG. If the only uses of REG
4335 between the time that it is set and the time that it dies
4336 are in function calls, then we can associate all the
4337 function calls with the setting of REG. */
4339 for (link
= LOG_LINKS (insn
); link
; link
= XEXP (link
, 1))
4341 if (REG_NOTE_KIND (link
) != 0)
4343 set
= single_set (XEXP (link
, 0));
4344 if (set
&& rtx_equal_p (reg
, SET_DEST (set
)))
4346 link
= XEXP (link
, 0);
4353 /* ??? Sometimes global register allocation will have
4354 deleted the insn pointed to by LOG_LINKS. Try
4355 scanning backward to find where the register is set. */
4356 for (scan
= PREV_INSN (insn
);
4357 scan
&& GET_CODE (scan
) != CODE_LABEL
;
4358 scan
= PREV_INSN (scan
))
4360 if (! INSN_P (scan
))
4363 if (! reg_mentioned_p (reg
, scan
))
4366 if (noncall_uses_reg (reg
, scan
, &set
))
4380 /* The register is set at LINK. */
4382 /* We can only optimize the function call if the register is
4383 being set to a symbol. In theory, we could sometimes
4384 optimize calls to a constant location, but the assembler
4385 and linker do not support that at present. */
4386 if (GET_CODE (SET_SRC (set
)) != SYMBOL_REF
4387 && GET_CODE (SET_SRC (set
)) != LABEL_REF
)
4390 /* Scan forward from LINK to the place where REG dies, and
4391 make sure that the only insns which use REG are
4392 themselves function calls. */
4394 /* ??? This doesn't work for call targets that were allocated
4395 by reload, since there may not be a REG_DEAD note for the
4399 for (scan
= NEXT_INSN (link
); scan
; scan
= NEXT_INSN (scan
))
4403 /* Don't try to trace forward past a CODE_LABEL if we haven't
4404 seen INSN yet. Ordinarily, we will only find the setting insn
4405 in LOG_LINKS if it is in the same basic block. However,
4406 cross-jumping can insert code labels in between the load and
4407 the call, and can result in situations where a single call
4408 insn may have two targets depending on where we came from. */
4410 if (GET_CODE (scan
) == CODE_LABEL
&& ! foundinsn
)
4413 if (! INSN_P (scan
))
4416 /* Don't try to trace forward past a JUMP. To optimize
4417 safely, we would have to check that all the
4418 instructions at the jump destination did not use REG. */
4420 if (GET_CODE (scan
) == JUMP_INSN
)
4423 if (! reg_mentioned_p (reg
, scan
))
4426 if (noncall_uses_reg (reg
, scan
, &scanset
))
4433 && (GET_CODE (scan
) == CALL_INSN
|| sfunc_uses_reg (scan
)))
4435 /* There is a function call to this register other
4436 than the one we are checking. If we optimize
4437 this call, we need to rescan again below. */
4441 /* ??? We shouldn't have to worry about SCANSET here.
4442 We should just be able to check for a REG_DEAD note
4443 on a function call. However, the REG_DEAD notes are
4444 apparently not dependable around libcalls; c-torture
4445 execute/920501-2 is a test case. If SCANSET is set,
4446 then this insn sets the register, so it must have
4447 died earlier. Unfortunately, this will only handle
4448 the cases in which the register is, in fact, set in a
4451 /* ??? We shouldn't have to use FOUNDINSN here.
4452 However, the LOG_LINKS fields are apparently not
4453 entirely reliable around libcalls;
4454 newlib/libm/math/e_pow.c is a test case. Sometimes
4455 an insn will appear in LOG_LINKS even though it is
4456 not the most recent insn which sets the register. */
4460 || find_reg_note (scan
, REG_DEAD
, reg
)))
4469 /* Either there was a branch, or some insn used REG
4470 other than as a function call address. */
4474 /* Create a code label, and put it in a REG_LABEL note on
4475 the insn which sets the register, and on each call insn
4476 which uses the register. In final_prescan_insn we look
4477 for the REG_LABEL notes, and output the appropriate label
4480 label
= gen_label_rtx ();
4481 REG_NOTES (link
) = gen_rtx_INSN_LIST (REG_LABEL
, label
,
4483 REG_NOTES (insn
) = gen_rtx_INSN_LIST (REG_LABEL
, label
,
4492 scan
= NEXT_INSN (scan
);
4494 && ((GET_CODE (scan
) == CALL_INSN
4495 && reg_mentioned_p (reg
, scan
))
4496 || ((reg2
= sfunc_uses_reg (scan
))
4497 && REGNO (reg2
) == REGNO (reg
))))
4499 = gen_rtx_INSN_LIST (REG_LABEL
, label
, REG_NOTES (scan
));
4501 while (scan
!= dies
);
4507 fixup_addr_diff_vecs (first
);
4511 mdep_reorg_phase
= SH_SHORTEN_BRANCHES0
;
4512 shorten_branches (first
);
4514 /* Scan the function looking for move instructions which have to be
4515 changed to pc-relative loads and insert the literal tables. */
4517 mdep_reorg_phase
= SH_FIXUP_PCLOAD
;
4518 for (insn
= first
, num_mova
= 0; insn
; insn
= NEXT_INSN (insn
))
4522 /* ??? basic block reordering can move a switch table dispatch
4523 below the switch table. Check if that has happened.
4524 We only have the addresses available when optimizing; but then,
4525 this check shouldn't be needed when not optimizing. */
4526 rtx label_ref
= XVECEXP (SET_SRC (PATTERN (insn
)), 0, 0);
4528 && (INSN_ADDRESSES (INSN_UID (insn
))
4529 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref
, 0)))))
4531 /* Change the mova into a load.
4532 broken_move will then return true for it. */
4535 else if (! num_mova
++)
4538 else if (GET_CODE (insn
) == JUMP_INSN
4539 && GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
4547 /* Some code might have been inserted between the mova and
4548 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4549 for (scan
= mova
, total
= 0; scan
!= insn
; scan
= NEXT_INSN (scan
))
4550 total
+= get_attr_length (scan
);
4552 /* range of mova is 1020, add 4 because pc counts from address of
4553 second instruction after this one, subtract 2 in case pc is 2
4554 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4555 cancels out with alignment effects of the mova itself. */
4558 /* Change the mova into a load, and restart scanning
4559 there. broken_move will then return true for mova. */
4564 if (broken_move (insn
)
4565 || (GET_CODE (insn
) == INSN
4566 && recog_memoized (insn
) == CODE_FOR_casesi_worker_2
))
4569 /* Scan ahead looking for a barrier to stick the constant table
4571 rtx barrier
= find_barrier (num_mova
, mova
, insn
);
4572 rtx last_float_move
= NULL_RTX
, last_float
= 0, *last_float_addr
= NULL
;
4573 int need_aligned_label
= 0;
4575 if (num_mova
&& ! mova_p (mova
))
4577 /* find_barrier had to change the first mova into a
4578 pcload; thus, we have to start with this new pcload. */
4582 /* Now find all the moves between the points and modify them. */
4583 for (scan
= insn
; scan
!= barrier
; scan
= NEXT_INSN (scan
))
4585 if (GET_CODE (scan
) == CODE_LABEL
)
4587 if (GET_CODE (scan
) == INSN
4588 && recog_memoized (scan
) == CODE_FOR_casesi_worker_2
)
4589 need_aligned_label
= 1;
4590 if (broken_move (scan
))
4592 rtx
*patp
= &PATTERN (scan
), pat
= *patp
;
4596 enum machine_mode mode
;
4598 if (GET_CODE (pat
) == PARALLEL
)
4599 patp
= &XVECEXP (pat
, 0, 0), pat
= *patp
;
4600 src
= SET_SRC (pat
);
4601 dst
= SET_DEST (pat
);
4602 mode
= GET_MODE (dst
);
4604 if (mode
== SImode
&& hi_const (src
)
4605 && REGNO (dst
) != FPUL_REG
)
4610 while (GET_CODE (dst
) == SUBREG
)
4612 offset
+= subreg_regno_offset (REGNO (SUBREG_REG (dst
)),
4613 GET_MODE (SUBREG_REG (dst
)),
4616 dst
= SUBREG_REG (dst
);
4618 dst
= gen_rtx_REG (HImode
, REGNO (dst
) + offset
);
4620 if (GET_CODE (dst
) == REG
&& FP_ANY_REGISTER_P (REGNO (dst
)))
4622 /* This must be an insn that clobbers r0. */
4623 rtx
*clobberp
= &XVECEXP (PATTERN (scan
), 0,
4624 XVECLEN (PATTERN (scan
), 0)
4626 rtx clobber
= *clobberp
;
4628 gcc_assert (GET_CODE (clobber
) == CLOBBER
4629 && rtx_equal_p (XEXP (clobber
, 0), r0_rtx
));
4632 && reg_set_between_p (r0_rtx
, last_float_move
, scan
))
4636 && GET_MODE_SIZE (mode
) != 4
4637 && GET_MODE_SIZE (GET_MODE (last_float
)) == 4)
4639 lab
= add_constant (src
, mode
, last_float
);
4641 emit_insn_before (gen_mova (lab
), scan
);
4644 /* There will be a REG_UNUSED note for r0 on
4645 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4646 lest reorg:mark_target_live_regs will not
4647 consider r0 to be used, and we end up with delay
4648 slot insn in front of SCAN that clobbers r0. */
4650 = find_regno_note (last_float_move
, REG_UNUSED
, 0);
4652 /* If we are not optimizing, then there may not be
4655 PUT_MODE (note
, REG_INC
);
4657 *last_float_addr
= r0_inc_rtx
;
4659 last_float_move
= scan
;
4661 newsrc
= gen_const_mem (mode
,
4662 (((TARGET_SH4
&& ! TARGET_FMOVD
)
4663 || REGNO (dst
) == FPUL_REG
)
4666 last_float_addr
= &XEXP (newsrc
, 0);
4668 /* Remove the clobber of r0. */
4669 *clobberp
= gen_rtx_CLOBBER (GET_MODE (clobber
),
4670 gen_rtx_SCRATCH (Pmode
));
4672 /* This is a mova needing a label. Create it. */
4673 else if (GET_CODE (src
) == UNSPEC
4674 && XINT (src
, 1) == UNSPEC_MOVA
4675 && GET_CODE (XVECEXP (src
, 0, 0)) == CONST
)
4677 lab
= add_constant (XVECEXP (src
, 0, 0), mode
, 0);
4678 newsrc
= gen_rtx_LABEL_REF (VOIDmode
, lab
);
4679 newsrc
= gen_rtx_UNSPEC (SImode
,
4680 gen_rtvec (1, newsrc
),
4685 lab
= add_constant (src
, mode
, 0);
4686 newsrc
= gen_rtx_LABEL_REF (VOIDmode
, lab
);
4687 newsrc
= gen_const_mem (mode
, newsrc
);
4689 *patp
= gen_rtx_SET (VOIDmode
, dst
, newsrc
);
4690 INSN_CODE (scan
) = -1;
4693 dump_table (need_aligned_label
? insn
: 0, barrier
);
4698 mdep_reorg_phase
= SH_SHORTEN_BRANCHES1
;
4699 INSN_ADDRESSES_FREE ();
4700 split_branches (first
);
4702 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4703 also has an effect on the register that holds the address of the sfunc.
4704 Insert an extra dummy insn in front of each sfunc that pretends to
4705 use this register. */
4706 if (flag_delayed_branch
)
4708 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4710 rtx reg
= sfunc_uses_reg (insn
);
4714 emit_insn_before (gen_use_sfunc_addr (reg
), insn
);
4718 /* fpscr is not actually a user variable, but we pretend it is for the
4719 sake of the previous optimization passes, since we want it handled like
4720 one. However, we don't have any debugging information for it, so turn
4721 it into a non-user variable now. */
4723 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4725 mdep_reorg_phase
= SH_AFTER_MDEP_REORG
;
4729 get_dest_uid (rtx label
, int max_uid
)
4731 rtx dest
= next_real_insn (label
);
4734 /* This can happen for an undefined label. */
4736 dest_uid
= INSN_UID (dest
);
4737 /* If this is a newly created branch redirection blocking instruction,
4738 we cannot index the branch_uid or insn_addresses arrays with its
4739 uid. But then, we won't need to, because the actual destination is
4740 the following branch. */
4741 while (dest_uid
>= max_uid
)
4743 dest
= NEXT_INSN (dest
);
4744 dest_uid
= INSN_UID (dest
);
4746 if (GET_CODE (dest
) == JUMP_INSN
&& GET_CODE (PATTERN (dest
)) == RETURN
)
4751 /* Split condbranches that are out of range. Also add clobbers for
4752 scratch registers that are needed in far jumps.
4753 We do this before delay slot scheduling, so that it can take our
4754 newly created instructions into account. It also allows us to
4755 find branches with common targets more easily. */
4758 split_branches (rtx first
)
4761 struct far_branch
**uid_branch
, *far_branch_list
= 0;
4762 int max_uid
= get_max_uid ();
4765 /* Find out which branches are out of range. */
4766 shorten_branches (first
);
4768 uid_branch
= (struct far_branch
**) alloca (max_uid
* sizeof *uid_branch
);
4769 memset ((char *) uid_branch
, 0, max_uid
* sizeof *uid_branch
);
4771 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
4772 if (! INSN_P (insn
))
4774 else if (INSN_DELETED_P (insn
))
4776 /* Shorten_branches would split this instruction again,
4777 so transform it into a note. */
4778 PUT_CODE (insn
, NOTE
);
4779 NOTE_LINE_NUMBER (insn
) = NOTE_INSN_DELETED
;
4780 NOTE_SOURCE_FILE (insn
) = 0;
4782 else if (GET_CODE (insn
) == JUMP_INSN
4783 /* Don't mess with ADDR_DIFF_VEC */
4784 && (GET_CODE (PATTERN (insn
)) == SET
4785 || GET_CODE (PATTERN (insn
)) == RETURN
))
4787 enum attr_type type
= get_attr_type (insn
);
4788 if (type
== TYPE_CBRANCH
)
4792 if (get_attr_length (insn
) > 4)
4794 rtx src
= SET_SRC (PATTERN (insn
));
4795 rtx olabel
= XEXP (XEXP (src
, 1), 0);
4796 int addr
= INSN_ADDRESSES (INSN_UID (insn
));
4798 int dest_uid
= get_dest_uid (olabel
, max_uid
);
4799 struct far_branch
*bp
= uid_branch
[dest_uid
];
4801 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4802 the label if the LABEL_NUSES count drops to zero. There is
4803 always a jump_optimize pass that sets these values, but it
4804 proceeds to delete unreferenced code, and then if not
4805 optimizing, to un-delete the deleted instructions, thus
4806 leaving labels with too low uses counts. */
4809 JUMP_LABEL (insn
) = olabel
;
4810 LABEL_NUSES (olabel
)++;
4814 bp
= (struct far_branch
*) alloca (sizeof *bp
);
4815 uid_branch
[dest_uid
] = bp
;
4816 bp
->prev
= far_branch_list
;
4817 far_branch_list
= bp
;
4819 = XEXP (XEXP (SET_SRC (PATTERN (insn
)), 1), 0);
4820 LABEL_NUSES (bp
->far_label
)++;
4824 label
= bp
->near_label
;
4825 if (! label
&& bp
->address
- addr
>= CONDJUMP_MIN
)
4827 rtx block
= bp
->insert_place
;
4829 if (GET_CODE (PATTERN (block
)) == RETURN
)
4830 block
= PREV_INSN (block
);
4832 block
= gen_block_redirect (block
,
4834 label
= emit_label_after (gen_label_rtx (),
4836 bp
->near_label
= label
;
4838 else if (label
&& ! NEXT_INSN (label
))
4840 if (addr
+ 2 - bp
->address
<= CONDJUMP_MAX
)
4841 bp
->insert_place
= insn
;
4843 gen_far_branch (bp
);
4847 || (NEXT_INSN (label
) && bp
->address
- addr
< CONDJUMP_MIN
))
4849 bp
->near_label
= label
= gen_label_rtx ();
4850 bp
->insert_place
= insn
;
4853 ok
= redirect_jump (insn
, label
, 1);
4858 /* get_attr_length (insn) == 2 */
4859 /* Check if we have a pattern where reorg wants to redirect
4860 the branch to a label from an unconditional branch that
4862 /* We can't use JUMP_LABEL here because it might be undefined
4863 when not optimizing. */
4864 /* A syntax error might cause beyond to be NULL_RTX. */
4866 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn
)), 1),
4870 && (GET_CODE (beyond
) == JUMP_INSN
4871 || ((beyond
= next_active_insn (beyond
))
4872 && GET_CODE (beyond
) == JUMP_INSN
))
4873 && GET_CODE (PATTERN (beyond
)) == SET
4874 && recog_memoized (beyond
) == CODE_FOR_jump_compact
4876 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond
)), 0)))
4877 - INSN_ADDRESSES (INSN_UID (insn
)) + (unsigned) 252)
4879 gen_block_redirect (beyond
,
4880 INSN_ADDRESSES (INSN_UID (beyond
)), 1);
4883 next
= next_active_insn (insn
);
4885 if ((GET_CODE (next
) == JUMP_INSN
4886 || ((next
= next_active_insn (next
))
4887 && GET_CODE (next
) == JUMP_INSN
))
4888 && GET_CODE (PATTERN (next
)) == SET
4889 && recog_memoized (next
) == CODE_FOR_jump_compact
4891 (INSN_UID (XEXP (SET_SRC (PATTERN (next
)), 0)))
4892 - INSN_ADDRESSES (INSN_UID (insn
)) + (unsigned) 252)
4894 gen_block_redirect (next
, INSN_ADDRESSES (INSN_UID (next
)), 1);
4896 else if (type
== TYPE_JUMP
|| type
== TYPE_RETURN
)
4898 int addr
= INSN_ADDRESSES (INSN_UID (insn
));
4901 struct far_branch
*bp
;
4903 if (type
== TYPE_JUMP
)
4905 far_label
= XEXP (SET_SRC (PATTERN (insn
)), 0);
4906 dest_uid
= get_dest_uid (far_label
, max_uid
);
4909 /* Parse errors can lead to labels outside
4911 if (! NEXT_INSN (far_label
))
4916 JUMP_LABEL (insn
) = far_label
;
4917 LABEL_NUSES (far_label
)++;
4919 redirect_jump (insn
, NULL_RTX
, 1);
4923 bp
= uid_branch
[dest_uid
];
4926 bp
= (struct far_branch
*) alloca (sizeof *bp
);
4927 uid_branch
[dest_uid
] = bp
;
4928 bp
->prev
= far_branch_list
;
4929 far_branch_list
= bp
;
4931 bp
->far_label
= far_label
;
4933 LABEL_NUSES (far_label
)++;
4935 else if (bp
->near_label
&& ! NEXT_INSN (bp
->near_label
))
4936 if (addr
- bp
->address
<= CONDJUMP_MAX
)
4937 emit_label_after (bp
->near_label
, PREV_INSN (insn
));
4940 gen_far_branch (bp
);
4946 bp
->insert_place
= insn
;
4948 emit_insn_before (gen_block_branch_redirect (const0_rtx
), insn
);
4950 gen_block_redirect (insn
, addr
, bp
->near_label
? 2 : 0);
4953 /* Generate all pending far branches,
4954 and free our references to the far labels. */
4955 while (far_branch_list
)
4957 if (far_branch_list
->near_label
4958 && ! NEXT_INSN (far_branch_list
->near_label
))
4959 gen_far_branch (far_branch_list
);
4961 && far_branch_list
->far_label
4962 && ! --LABEL_NUSES (far_branch_list
->far_label
))
4963 delete_insn (far_branch_list
->far_label
);
4964 far_branch_list
= far_branch_list
->prev
;
4967 /* Instruction length information is no longer valid due to the new
4968 instructions that have been generated. */
4969 init_insn_lengths ();
4972 /* Dump out instruction addresses, which is useful for debugging the
4973 constant pool table stuff.
4975 If relaxing, output the label and pseudo-ops used to link together
4976 calls and the instruction which set the registers. */
4978 /* ??? The addresses printed by this routine for insns are nonsense for
4979 insns which are inside of a sequence where none of the inner insns have
4980 variable length. This is because the second pass of shorten_branches
4981 does not bother to update them. */
4984 final_prescan_insn (rtx insn
, rtx
*opvec ATTRIBUTE_UNUSED
,
4985 int noperands ATTRIBUTE_UNUSED
)
4987 if (TARGET_DUMPISIZE
)
4988 fprintf (asm_out_file
, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn
)));
4994 note
= find_reg_note (insn
, REG_LABEL
, NULL_RTX
);
4999 pattern
= PATTERN (insn
);
5000 if (GET_CODE (pattern
) == PARALLEL
)
5001 pattern
= XVECEXP (pattern
, 0, 0);
5002 switch (GET_CODE (pattern
))
5005 if (GET_CODE (SET_SRC (pattern
)) != CALL
5006 && get_attr_type (insn
) != TYPE_SFUNC
)
5008 targetm
.asm_out
.internal_label
5009 (asm_out_file
, "L", CODE_LABEL_NUMBER (XEXP (note
, 0)));
5012 /* else FALLTHROUGH */
5014 asm_fprintf (asm_out_file
, "\t.uses %LL%d\n",
5015 CODE_LABEL_NUMBER (XEXP (note
, 0)));
5025 /* Dump out any constants accumulated in the final pass. These will
5029 output_jump_label_table (void)
5035 fprintf (asm_out_file
, "\t.align 2\n");
5036 for (i
= 0; i
< pool_size
; i
++)
5038 pool_node
*p
= &pool_vector
[i
];
5040 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5041 CODE_LABEL_NUMBER (p
->label
));
5042 output_asm_insn (".long %O0", &p
->value
);
5050 /* A full frame looks like:
5054 [ if current_function_anonymous_args
5067 local-0 <- fp points here. */
5069 /* Number of bytes pushed for anonymous args, used to pass information
5070 between expand_prologue and expand_epilogue. */
5072 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5073 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5074 for an epilogue and a negative value means that it's for a sibcall
5075 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5076 all the registers that are about to be restored, and hence dead. */
5079 output_stack_adjust (int size
, rtx reg
, int epilogue_p
,
5080 HARD_REG_SET
*live_regs_mask
)
5082 rtx (*emit_fn
) (rtx
) = epilogue_p
? &emit_insn
: &frame_insn
;
5085 HOST_WIDE_INT align
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5087 /* This test is bogus, as output_stack_adjust is used to re-align the
5090 gcc_assert (!(size
% align
));
5093 if (CONST_OK_FOR_ADD (size
))
5094 emit_fn (GEN_ADD3 (reg
, reg
, GEN_INT (size
)));
5095 /* Try to do it with two partial adjustments; however, we must make
5096 sure that the stack is properly aligned at all times, in case
5097 an interrupt occurs between the two partial adjustments. */
5098 else if (CONST_OK_FOR_ADD (size
/ 2 & -align
)
5099 && CONST_OK_FOR_ADD (size
- (size
/ 2 & -align
)))
5101 emit_fn (GEN_ADD3 (reg
, reg
, GEN_INT (size
/ 2 & -align
)));
5102 emit_fn (GEN_ADD3 (reg
, reg
, GEN_INT (size
- (size
/ 2 & -align
))));
5108 int temp
= epilogue_p
? 7 : (TARGET_SH5
? 0 : 1);
5111 /* If TEMP is invalid, we could temporarily save a general
5112 register to MACL. However, there is currently no need
5113 to handle this case, so just die when we see it. */
5115 || current_function_interrupt
5116 || ! call_really_used_regs
[temp
] || fixed_regs
[temp
])
5118 if (temp
< 0 && ! current_function_interrupt
5119 && (TARGET_SHMEDIA
|| epilogue_p
>= 0))
5122 COPY_HARD_REG_SET (temps
, call_used_reg_set
);
5123 AND_COMPL_HARD_REG_SET (temps
, call_fixed_reg_set
);
5127 if (current_function_return_rtx
)
5129 enum machine_mode mode
;
5130 mode
= GET_MODE (current_function_return_rtx
);
5131 if (BASE_RETURN_VALUE_REG (mode
) == FIRST_RET_REG
)
5132 nreg
= HARD_REGNO_NREGS (FIRST_RET_REG
, mode
);
5134 for (i
= 0; i
< nreg
; i
++)
5135 CLEAR_HARD_REG_BIT (temps
, FIRST_RET_REG
+ i
);
5136 if (current_function_calls_eh_return
)
5138 CLEAR_HARD_REG_BIT (temps
, EH_RETURN_STACKADJ_REGNO
);
5139 for (i
= 0; i
<= 3; i
++)
5140 CLEAR_HARD_REG_BIT (temps
, EH_RETURN_DATA_REGNO (i
));
5143 if (TARGET_SHMEDIA
&& epilogue_p
< 0)
5144 for (i
= FIRST_TARGET_REG
; i
<= LAST_TARGET_REG
; i
++)
5145 CLEAR_HARD_REG_BIT (temps
, i
);
5146 if (epilogue_p
<= 0)
5148 for (i
= FIRST_PARM_REG
;
5149 i
< FIRST_PARM_REG
+ NPARM_REGS (SImode
); i
++)
5150 CLEAR_HARD_REG_BIT (temps
, i
);
5151 if (cfun
->static_chain_decl
!= NULL
)
5152 CLEAR_HARD_REG_BIT (temps
, STATIC_CHAIN_REGNUM
);
5154 temp
= scavenge_reg (&temps
);
5156 if (temp
< 0 && live_regs_mask
)
5160 COPY_HARD_REG_SET (temps
, *live_regs_mask
);
5161 CLEAR_HARD_REG_BIT (temps
, REGNO (reg
));
5162 temp
= scavenge_reg (&temps
);
5166 rtx adj_reg
, tmp_reg
, mem
;
5168 /* If we reached here, the most likely case is the (sibcall)
5169 epilogue for non SHmedia. Put a special push/pop sequence
5170 for such case as the last resort. This looks lengthy but
5171 would not be problem because it seems to be very
5174 gcc_assert (!TARGET_SHMEDIA
&& epilogue_p
);
5177 /* ??? There is still the slight possibility that r4 or
5178 r5 have been reserved as fixed registers or assigned
5179 as global registers, and they change during an
5180 interrupt. There are possible ways to handle this:
5182 - If we are adjusting the frame pointer (r14), we can do
5183 with a single temp register and an ordinary push / pop
5185 - Grab any call-used or call-saved registers (i.e. not
5186 fixed or globals) for the temps we need. We might
5187 also grab r14 if we are adjusting the stack pointer.
5188 If we can't find enough available registers, issue
5189 a diagnostic and die - the user must have reserved
5190 way too many registers.
5191 But since all this is rather unlikely to happen and
5192 would require extra testing, we just die if r4 / r5
5193 are not available. */
5194 gcc_assert (!fixed_regs
[4] && !fixed_regs
[5]
5195 && !global_regs
[4] && !global_regs
[5]);
5197 adj_reg
= gen_rtx_REG (GET_MODE (reg
), 4);
5198 tmp_reg
= gen_rtx_REG (GET_MODE (reg
), 5);
5199 emit_move_insn (gen_tmp_stack_mem (Pmode
, reg
), adj_reg
);
5200 emit_insn (GEN_MOV (adj_reg
, GEN_INT (size
)));
5201 emit_insn (GEN_ADD3 (adj_reg
, adj_reg
, reg
));
5202 mem
= gen_tmp_stack_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
, adj_reg
));
5203 emit_move_insn (mem
, tmp_reg
);
5204 emit_move_insn (tmp_reg
, gen_tmp_stack_mem (Pmode
, reg
));
5205 mem
= gen_tmp_stack_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
, adj_reg
));
5206 emit_move_insn (mem
, tmp_reg
);
5207 emit_move_insn (reg
, adj_reg
);
5208 mem
= gen_tmp_stack_mem (Pmode
, gen_rtx_POST_INC (Pmode
, reg
));
5209 emit_move_insn (adj_reg
, mem
);
5210 mem
= gen_tmp_stack_mem (Pmode
, gen_rtx_POST_INC (Pmode
, reg
));
5211 emit_move_insn (tmp_reg
, mem
);
5212 /* Tell flow the insns that pop r4/r5 aren't dead. */
5213 emit_insn (gen_rtx_USE (VOIDmode
, tmp_reg
));
5214 emit_insn (gen_rtx_USE (VOIDmode
, adj_reg
));
5217 const_reg
= gen_rtx_REG (GET_MODE (reg
), temp
);
5219 /* If SIZE is negative, subtract the positive value.
5220 This sometimes allows a constant pool entry to be shared
5221 between prologue and epilogue code. */
5224 emit_insn (GEN_MOV (const_reg
, GEN_INT (-size
)));
5225 insn
= emit_fn (GEN_SUB3 (reg
, reg
, const_reg
));
5229 emit_insn (GEN_MOV (const_reg
, GEN_INT (size
)));
5230 insn
= emit_fn (GEN_ADD3 (reg
, reg
, const_reg
));
5234 = (gen_rtx_EXPR_LIST
5235 (REG_FRAME_RELATED_EXPR
,
5236 gen_rtx_SET (VOIDmode
, reg
,
5237 gen_rtx_PLUS (SImode
, reg
, GEN_INT (size
))),
5247 RTX_FRAME_RELATED_P (x
) = 1;
5251 /* Output RTL to push register RN onto the stack. */
5258 x
= gen_push_fpul ();
5259 else if (rn
== FPSCR_REG
)
5260 x
= gen_push_fpscr ();
5261 else if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
) && TARGET_FMOVD
&& ! TARGET_FPU_SINGLE
5262 && FP_OR_XD_REGISTER_P (rn
))
5264 if (FP_REGISTER_P (rn
) && (rn
- FIRST_FP_REG
) & 1)
5266 x
= gen_push_4 (gen_rtx_REG (DFmode
, rn
));
5268 else if (TARGET_SH2E
&& FP_REGISTER_P (rn
))
5269 x
= gen_push_e (gen_rtx_REG (SFmode
, rn
));
5271 x
= gen_push (gen_rtx_REG (SImode
, rn
));
5275 = gen_rtx_EXPR_LIST (REG_INC
,
5276 gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
), 0);
5280 /* Output RTL to pop register RN from the stack. */
5287 x
= gen_pop_fpul ();
5288 else if (rn
== FPSCR_REG
)
5289 x
= gen_pop_fpscr ();
5290 else if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
) && TARGET_FMOVD
&& ! TARGET_FPU_SINGLE
5291 && FP_OR_XD_REGISTER_P (rn
))
5293 if (FP_REGISTER_P (rn
) && (rn
- FIRST_FP_REG
) & 1)
5295 x
= gen_pop_4 (gen_rtx_REG (DFmode
, rn
));
5297 else if (TARGET_SH2E
&& FP_REGISTER_P (rn
))
5298 x
= gen_pop_e (gen_rtx_REG (SFmode
, rn
));
5300 x
= gen_pop (gen_rtx_REG (SImode
, rn
));
5304 = gen_rtx_EXPR_LIST (REG_INC
,
5305 gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
), 0);
5308 /* Generate code to push the regs specified in the mask. */
5311 push_regs (HARD_REG_SET
*mask
, int interrupt_handler
)
5316 /* Push PR last; this gives better latencies after the prologue, and
5317 candidates for the return delay slot when there are no general
5318 registers pushed. */
5319 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
5321 /* If this is an interrupt handler, and the SZ bit varies,
5322 and we have to push any floating point register, we need
5323 to switch to the correct precision first. */
5324 if (i
== FIRST_FP_REG
&& interrupt_handler
&& TARGET_FMOVD
5325 && hard_regs_intersect_p (mask
, ®_class_contents
[DF_REGS
]))
5327 HARD_REG_SET unsaved
;
5330 COMPL_HARD_REG_SET (unsaved
, *mask
);
5331 fpscr_set_from_mem (NORMAL_MODE (FP_MODE
), unsaved
);
5335 && (i
!= FPSCR_REG
|| ! skip_fpscr
)
5336 && TEST_HARD_REG_BIT (*mask
, i
))
5339 if (TEST_HARD_REG_BIT (*mask
, PR_REG
))
5343 /* Calculate how much extra space is needed to save all callee-saved
5345 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5348 shmedia_target_regs_stack_space (HARD_REG_SET
*live_regs_mask
)
5351 int stack_space
= 0;
5352 int interrupt_handler
= sh_cfun_interrupt_handler_p ();
5354 for (reg
= LAST_TARGET_REG
; reg
>= FIRST_TARGET_REG
; reg
--)
5355 if ((! call_really_used_regs
[reg
] || interrupt_handler
)
5356 && ! TEST_HARD_REG_BIT (*live_regs_mask
, reg
))
5357 /* Leave space to save this target register on the stack,
5358 in case target register allocation wants to use it. */
5359 stack_space
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg
));
5363 /* Decide whether we should reserve space for callee-save target registers,
5364 in case target register allocation wants to use them. REGS_SAVED is
5365 the space, in bytes, that is already required for register saves.
5366 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5369 shmedia_reserve_space_for_target_registers_p (int regs_saved
,
5370 HARD_REG_SET
*live_regs_mask
)
5374 return shmedia_target_regs_stack_space (live_regs_mask
) <= regs_saved
;
5377 /* Decide how much space to reserve for callee-save target registers
5378 in case target register allocation wants to use them.
5379 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5382 shmedia_target_regs_stack_adjust (HARD_REG_SET
*live_regs_mask
)
5384 if (shmedia_space_reserved_for_target_registers
)
5385 return shmedia_target_regs_stack_space (live_regs_mask
);
5390 /* Work out the registers which need to be saved, both as a mask and a
5391 count of saved words. Return the count.
5393 If doing a pragma interrupt function, then push all regs used by the
5394 function, and if we call another function (we can tell by looking at PR),
5395 make sure that all the regs it clobbers are safe too. */
5398 calc_live_regs (HARD_REG_SET
*live_regs_mask
)
5403 bool interrupt_or_trapa_handler
, trapa_handler
, interrupt_handler
;
5404 bool nosave_low_regs
;
5405 int pr_live
, has_call
;
5407 attrs
= DECL_ATTRIBUTES (current_function_decl
);
5408 interrupt_or_trapa_handler
= sh_cfun_interrupt_handler_p ();
5409 trapa_handler
= lookup_attribute ("trapa_handler", attrs
) != NULL_TREE
;
5410 interrupt_handler
= interrupt_or_trapa_handler
&& ! trapa_handler
;
5411 nosave_low_regs
= lookup_attribute ("nosave_low_regs", attrs
) != NULL_TREE
;
5413 CLEAR_HARD_REG_SET (*live_regs_mask
);
5414 if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
) && TARGET_FMOVD
&& interrupt_handler
5415 && regs_ever_live
[FPSCR_REG
])
5416 target_flags
&= ~MASK_FPU_SINGLE
;
5417 /* If we can save a lot of saves by switching to double mode, do that. */
5418 else if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
) && TARGET_FMOVD
&& TARGET_FPU_SINGLE
)
5419 for (count
= 0, reg
= FIRST_FP_REG
; reg
<= LAST_FP_REG
; reg
+= 2)
5420 if (regs_ever_live
[reg
] && regs_ever_live
[reg
+1]
5421 && (! call_really_used_regs
[reg
]
5422 || interrupt_handler
)
5425 target_flags
&= ~MASK_FPU_SINGLE
;
5428 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5429 knows how to use it. That means the pseudo originally allocated for
5430 the initial value can become the PR_MEDIA_REG hard register, as seen for
5431 execute/20010122-1.c:test9. */
5433 /* ??? this function is called from initial_elimination_offset, hence we
5434 can't use the result of sh_media_register_for_return here. */
5435 pr_live
= sh_pr_n_sets ();
5438 rtx pr_initial
= has_hard_reg_initial_val (Pmode
, PR_REG
);
5439 pr_live
= (pr_initial
5440 ? (GET_CODE (pr_initial
) != REG
5441 || REGNO (pr_initial
) != (PR_REG
))
5442 : regs_ever_live
[PR_REG
]);
5443 /* For Shcompact, if not optimizing, we end up with a memory reference
5444 using the return address pointer for __builtin_return_address even
5445 though there is no actual need to put the PR register on the stack. */
5446 pr_live
|= regs_ever_live
[RETURN_ADDRESS_POINTER_REGNUM
];
5448 /* Force PR to be live if the prologue has to call the SHmedia
5449 argument decoder or register saver. */
5450 if (TARGET_SHCOMPACT
5451 && ((current_function_args_info
.call_cookie
5452 & ~ CALL_COOKIE_RET_TRAMP (1))
5453 || current_function_has_nonlocal_label
))
5455 has_call
= TARGET_SHMEDIA
? ! leaf_function_p () : pr_live
;
5456 for (count
= 0, reg
= FIRST_PSEUDO_REGISTER
; reg
-- != 0; )
5458 if (reg
== (TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
)
5461 ? (/* Need to save all the regs ever live. */
5462 (regs_ever_live
[reg
]
5463 || (call_really_used_regs
[reg
]
5464 && (! fixed_regs
[reg
] || reg
== MACH_REG
|| reg
== MACL_REG
5465 || reg
== PIC_OFFSET_TABLE_REGNUM
)
5467 || (TARGET_SHMEDIA
&& has_call
5468 && REGISTER_NATURAL_MODE (reg
) == SImode
5469 && (GENERAL_REGISTER_P (reg
) || TARGET_REGISTER_P (reg
))))
5470 && reg
!= STACK_POINTER_REGNUM
&& reg
!= ARG_POINTER_REGNUM
5471 && reg
!= RETURN_ADDRESS_POINTER_REGNUM
5472 && reg
!= T_REG
&& reg
!= GBR_REG
5473 /* Push fpscr only on targets which have FPU */
5474 && (reg
!= FPSCR_REG
|| TARGET_FPU_ANY
))
5475 : (/* Only push those regs which are used and need to be saved. */
5478 && current_function_args_info
.call_cookie
5479 && reg
== PIC_OFFSET_TABLE_REGNUM
)
5480 || (regs_ever_live
[reg
]
5481 && (!call_really_used_regs
[reg
]
5482 || (trapa_handler
&& reg
== FPSCR_REG
&& TARGET_FPU_ANY
)))
5483 || (current_function_calls_eh_return
5484 && (reg
== EH_RETURN_DATA_REGNO (0)
5485 || reg
== EH_RETURN_DATA_REGNO (1)
5486 || reg
== EH_RETURN_DATA_REGNO (2)
5487 || reg
== EH_RETURN_DATA_REGNO (3)))
5488 || ((reg
== MACL_REG
|| reg
== MACH_REG
)
5489 && regs_ever_live
[reg
]
5490 && sh_cfun_attr_renesas_p ())
5493 SET_HARD_REG_BIT (*live_regs_mask
, reg
);
5494 count
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg
));
5496 if ((TARGET_SH4
|| TARGET_SH2A_DOUBLE
|| TARGET_SH5
) && TARGET_FMOVD
5497 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg
)) == MODE_FLOAT
)
5499 if (FP_REGISTER_P (reg
))
5501 if (! TARGET_FPU_SINGLE
&& ! regs_ever_live
[reg
^ 1])
5503 SET_HARD_REG_BIT (*live_regs_mask
, (reg
^ 1));
5504 count
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg
^ 1));
5507 else if (XD_REGISTER_P (reg
))
5509 /* Must switch to double mode to access these registers. */
5510 target_flags
&= ~MASK_FPU_SINGLE
;
5514 if (nosave_low_regs
&& reg
== R8_REG
)
5517 /* If we have a target register optimization pass after prologue / epilogue
5518 threading, we need to assume all target registers will be live even if
5520 if (flag_branch_target_load_optimize2
5521 && TARGET_SAVE_ALL_TARGET_REGS
5522 && shmedia_space_reserved_for_target_registers
)
5523 for (reg
= LAST_TARGET_REG
; reg
>= FIRST_TARGET_REG
; reg
--)
5524 if ((! call_really_used_regs
[reg
] || interrupt_handler
)
5525 && ! TEST_HARD_REG_BIT (*live_regs_mask
, reg
))
5527 SET_HARD_REG_BIT (*live_regs_mask
, reg
);
5528 count
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg
));
5530 /* If this is an interrupt handler, we don't have any call-clobbered
5531 registers we can conveniently use for target register save/restore.
5532 Make sure we save at least one general purpose register when we need
5533 to save target registers. */
5534 if (interrupt_handler
5535 && hard_regs_intersect_p (live_regs_mask
,
5536 ®_class_contents
[TARGET_REGS
])
5537 && ! hard_regs_intersect_p (live_regs_mask
,
5538 ®_class_contents
[GENERAL_REGS
]))
5540 SET_HARD_REG_BIT (*live_regs_mask
, R0_REG
);
5541 count
+= GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG
));
5547 /* Code to generate prologue and epilogue sequences */
5549 /* PUSHED is the number of bytes that are being pushed on the
5550 stack for register saves. Return the frame size, padded
5551 appropriately so that the stack stays properly aligned. */
5552 static HOST_WIDE_INT
5553 rounded_frame_size (int pushed
)
5555 HOST_WIDE_INT size
= get_frame_size ();
5556 HOST_WIDE_INT align
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5558 return ((size
+ pushed
+ align
- 1) & -align
) - pushed
;
5561 /* Choose a call-clobbered target-branch register that remains
5562 unchanged along the whole function. We set it up as the return
5563 value in the prologue. */
5565 sh_media_register_for_return (void)
5570 if (! current_function_is_leaf
)
5572 if (lookup_attribute ("interrupt_handler",
5573 DECL_ATTRIBUTES (current_function_decl
)))
5575 if (sh_cfun_interrupt_handler_p ())
5578 tr0_used
= flag_pic
&& regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
];
5580 for (regno
= FIRST_TARGET_REG
+ tr0_used
; regno
<= LAST_TARGET_REG
; regno
++)
5581 if (call_really_used_regs
[regno
] && ! regs_ever_live
[regno
])
5587 /* The maximum registers we need to save are:
5588 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5589 - 32 floating point registers (for each pair, we save none,
5590 one single precision value, or a double precision value).
5591 - 8 target registers
5592 - add 1 entry for a delimiter. */
5593 #define MAX_SAVED_REGS (62+32+8)
5595 typedef struct save_entry_s
5604 /* There will be a delimiter entry with VOIDmode both at the start and the
5605 end of a filled in schedule. The end delimiter has the offset of the
5606 save with the smallest (i.e. most negative) offset. */
5607 typedef struct save_schedule_s
5609 save_entry entries
[MAX_SAVED_REGS
+ 2];
5610 int temps
[MAX_TEMPS
+1];
5613 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5614 use reverse order. Returns the last entry written to (not counting
5615 the delimiter). OFFSET_BASE is a number to be added to all offset
5619 sh5_schedule_saves (HARD_REG_SET
*live_regs_mask
, save_schedule
*schedule
,
5623 save_entry
*entry
= schedule
->entries
;
5627 if (! current_function_interrupt
)
5628 for (i
= FIRST_GENERAL_REG
; tmpx
< MAX_TEMPS
&& i
<= LAST_GENERAL_REG
; i
++)
5629 if (call_really_used_regs
[i
] && ! fixed_regs
[i
] && i
!= PR_MEDIA_REG
5630 && ! FUNCTION_ARG_REGNO_P (i
)
5631 && i
!= FIRST_RET_REG
5632 && ! (cfun
->static_chain_decl
!= NULL
&& i
== STATIC_CHAIN_REGNUM
)
5633 && ! (current_function_calls_eh_return
5634 && (i
== EH_RETURN_STACKADJ_REGNO
5635 || ((unsigned) i
>= EH_RETURN_DATA_REGNO (0)
5636 && (unsigned) i
<= EH_RETURN_DATA_REGNO (3)))))
5637 schedule
->temps
[tmpx
++] = i
;
5639 entry
->mode
= VOIDmode
;
5640 entry
->offset
= offset_base
;
5642 /* We loop twice: first, we save 8-byte aligned registers in the
5643 higher addresses, that are known to be aligned. Then, we
5644 proceed to saving 32-bit registers that don't need 8-byte
5646 If this is an interrupt function, all registers that need saving
5647 need to be saved in full. moreover, we need to postpone saving
5648 target registers till we have saved some general purpose registers
5649 we can then use as scratch registers. */
5650 offset
= offset_base
;
5651 for (align
= 1; align
>= 0; align
--)
5653 for (i
= FIRST_PSEUDO_REGISTER
- 1; i
>= 0; i
--)
5654 if (TEST_HARD_REG_BIT (*live_regs_mask
, i
))
5656 enum machine_mode mode
= REGISTER_NATURAL_MODE (i
);
5659 if (current_function_interrupt
)
5661 if (TARGET_REGISTER_P (i
))
5663 if (GENERAL_REGISTER_P (i
))
5666 if (mode
== SFmode
&& (i
% 2) == 1
5667 && ! TARGET_FPU_SINGLE
&& FP_REGISTER_P (i
)
5668 && (TEST_HARD_REG_BIT (*live_regs_mask
, (i
^ 1))))
5675 /* If we're doing the aligned pass and this is not aligned,
5676 or we're doing the unaligned pass and this is aligned,
5678 if ((GET_MODE_SIZE (mode
) % (STACK_BOUNDARY
/ BITS_PER_UNIT
) == 0)
5682 if (current_function_interrupt
5683 && GENERAL_REGISTER_P (i
)
5684 && tmpx
< MAX_TEMPS
)
5685 schedule
->temps
[tmpx
++] = i
;
5687 offset
-= GET_MODE_SIZE (mode
);
5690 entry
->offset
= offset
;
5693 if (align
&& current_function_interrupt
)
5694 for (i
= LAST_TARGET_REG
; i
>= FIRST_TARGET_REG
; i
--)
5695 if (TEST_HARD_REG_BIT (*live_regs_mask
, i
))
5697 offset
-= GET_MODE_SIZE (DImode
);
5699 entry
->mode
= DImode
;
5700 entry
->offset
= offset
;
5705 entry
->mode
= VOIDmode
;
5706 entry
->offset
= offset
;
5707 schedule
->temps
[tmpx
] = -1;
5712 sh_expand_prologue (void)
5714 HARD_REG_SET live_regs_mask
;
5717 int save_flags
= target_flags
;
5720 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl
));
5722 current_function_interrupt
= sh_cfun_interrupt_handler_p ();
5724 /* We have pretend args if we had an object sent partially in registers
5725 and partially on the stack, e.g. a large structure. */
5726 pretend_args
= current_function_pretend_args_size
;
5727 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl
)
5728 && (NPARM_REGS(SImode
)
5729 > current_function_args_info
.arg_count
[(int) SH_ARG_INT
]))
5731 output_stack_adjust (-pretend_args
5732 - current_function_args_info
.stack_regs
* 8,
5733 stack_pointer_rtx
, 0, NULL
);
5735 if (TARGET_SHCOMPACT
&& flag_pic
&& current_function_args_info
.call_cookie
)
5736 /* We're going to use the PIC register to load the address of the
5737 incoming-argument decoder and/or of the return trampoline from
5738 the GOT, so make sure the PIC register is preserved and
5740 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5742 if (TARGET_SHCOMPACT
5743 && (current_function_args_info
.call_cookie
& ~ CALL_COOKIE_RET_TRAMP(1)))
5747 /* First, make all registers with incoming arguments that will
5748 be pushed onto the stack live, so that register renaming
5749 doesn't overwrite them. */
5750 for (reg
= 0; reg
< NPARM_REGS (SImode
); reg
++)
5751 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info
.call_cookie
)
5752 >= NPARM_REGS (SImode
) - reg
)
5753 for (; reg
< NPARM_REGS (SImode
); reg
++)
5754 emit_insn (gen_shcompact_preserve_incoming_args
5755 (gen_rtx_REG (SImode
, FIRST_PARM_REG
+ reg
)));
5756 else if (CALL_COOKIE_INT_REG_GET
5757 (current_function_args_info
.call_cookie
, reg
) == 1)
5758 emit_insn (gen_shcompact_preserve_incoming_args
5759 (gen_rtx_REG (SImode
, FIRST_PARM_REG
+ reg
)));
5761 emit_move_insn (gen_rtx_REG (Pmode
, MACL_REG
),
5763 emit_move_insn (gen_rtx_REG (SImode
, R0_REG
),
5764 GEN_INT (current_function_args_info
.call_cookie
));
5765 emit_move_insn (gen_rtx_REG (SImode
, MACH_REG
),
5766 gen_rtx_REG (SImode
, R0_REG
));
5768 else if (TARGET_SHMEDIA
)
5770 int tr
= sh_media_register_for_return ();
5774 rtx insn
= emit_move_insn (gen_rtx_REG (DImode
, tr
),
5775 gen_rtx_REG (DImode
, PR_MEDIA_REG
));
5777 /* ??? We should suppress saving pr when we don't need it, but this
5778 is tricky because of builtin_return_address. */
5780 /* If this function only exits with sibcalls, this copy
5781 will be flagged as dead. */
5782 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
,
5788 /* Emit the code for SETUP_VARARGS. */
5789 if (current_function_stdarg
)
5791 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl
))
5793 /* Push arg regs as if they'd been provided by caller in stack. */
5794 for (i
= 0; i
< NPARM_REGS(SImode
); i
++)
5796 int rn
= NPARM_REGS(SImode
) + FIRST_PARM_REG
- i
- 1;
5799 if (i
>= (NPARM_REGS(SImode
)
5800 - current_function_args_info
.arg_count
[(int) SH_ARG_INT
]
5804 RTX_FRAME_RELATED_P (insn
) = 0;
5809 /* If we're supposed to switch stacks at function entry, do so now. */
5812 /* The argument specifies a variable holding the address of the
5813 stack the interrupt function should switch to/from at entry/exit. */
5815 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr
)));
5816 rtx sp_switch
= gen_rtx_SYMBOL_REF (Pmode
, s
);
5818 emit_insn (gen_sp_switch_1 (sp_switch
));
5821 d
= calc_live_regs (&live_regs_mask
);
5822 /* ??? Maybe we could save some switching if we can move a mode switch
5823 that already happens to be at the function start into the prologue. */
5824 if (target_flags
!= save_flags
&& ! current_function_interrupt
)
5825 emit_insn (gen_toggle_sz ());
5829 int offset_base
, offset
;
5831 int offset_in_r0
= -1;
5833 int tregs_space
= shmedia_target_regs_stack_adjust (&live_regs_mask
);
5834 int total_size
, save_size
;
5835 save_schedule schedule
;
5839 if (call_really_used_regs
[R0_REG
] && ! fixed_regs
[R0_REG
]
5840 && ! current_function_interrupt
)
5841 r0
= gen_rtx_REG (Pmode
, R0_REG
);
5843 /* D is the actual number of bytes that we need for saving registers,
5844 however, in initial_elimination_offset we have committed to using
5845 an additional TREGS_SPACE amount of bytes - in order to keep both
5846 addresses to arguments supplied by the caller and local variables
5847 valid, we must keep this gap. Place it between the incoming
5848 arguments and the actually saved registers in a bid to optimize
5849 locality of reference. */
5850 total_size
= d
+ tregs_space
;
5851 total_size
+= rounded_frame_size (total_size
);
5852 save_size
= total_size
- rounded_frame_size (d
);
5853 if (save_size
% (STACK_BOUNDARY
/ BITS_PER_UNIT
))
5854 d_rounding
= ((STACK_BOUNDARY
/ BITS_PER_UNIT
)
5855 - save_size
% (STACK_BOUNDARY
/ BITS_PER_UNIT
));
5857 /* If adjusting the stack in a single step costs nothing extra, do so.
5858 I.e. either if a single addi is enough, or we need a movi anyway,
5859 and we don't exceed the maximum offset range (the test for the
5860 latter is conservative for simplicity). */
5862 && (CONST_OK_FOR_I10 (-total_size
)
5863 || (! CONST_OK_FOR_I10 (-(save_size
+ d_rounding
))
5864 && total_size
<= 2044)))
5865 d_rounding
= total_size
- save_size
;
5867 offset_base
= d
+ d_rounding
;
5869 output_stack_adjust (-(save_size
+ d_rounding
), stack_pointer_rtx
,
5872 sh5_schedule_saves (&live_regs_mask
, &schedule
, offset_base
);
5873 tmp_pnt
= schedule
.temps
;
5874 for (entry
= &schedule
.entries
[1]; entry
->mode
!= VOIDmode
; entry
++)
5876 enum machine_mode mode
= entry
->mode
;
5877 unsigned int reg
= entry
->reg
;
5878 rtx reg_rtx
, mem_rtx
, pre_dec
= NULL_RTX
;
5881 offset
= entry
->offset
;
5883 reg_rtx
= gen_rtx_REG (mode
, reg
);
5885 mem_rtx
= gen_frame_mem (mode
,
5886 gen_rtx_PLUS (Pmode
,
5890 GO_IF_LEGITIMATE_ADDRESS (mode
, XEXP (mem_rtx
, 0), try_pre_dec
);
5897 if (HAVE_PRE_DECREMENT
5898 && (offset_in_r0
- offset
== GET_MODE_SIZE (mode
)
5899 || mem_rtx
== NULL_RTX
5900 || reg
== PR_REG
|| SPECIAL_REGISTER_P (reg
)))
5902 pre_dec
= gen_frame_mem (mode
, gen_rtx_PRE_DEC (Pmode
, r0
));
5904 GO_IF_LEGITIMATE_ADDRESS (mode
, XEXP (pre_dec
, 0),
5913 offset
+= GET_MODE_SIZE (mode
);
5917 if (mem_rtx
!= NULL_RTX
)
5920 if (offset_in_r0
== -1)
5922 emit_move_insn (r0
, GEN_INT (offset
));
5923 offset_in_r0
= offset
;
5925 else if (offset
!= offset_in_r0
)
5930 GEN_INT (offset
- offset_in_r0
)));
5931 offset_in_r0
+= offset
- offset_in_r0
;
5934 if (pre_dec
!= NULL_RTX
)
5940 (Pmode
, r0
, stack_pointer_rtx
));
5944 offset
-= GET_MODE_SIZE (mode
);
5945 offset_in_r0
-= GET_MODE_SIZE (mode
);
5950 mem_rtx
= gen_frame_mem (mode
, r0
);
5952 mem_rtx
= gen_frame_mem (mode
,
5953 gen_rtx_PLUS (Pmode
,
5957 /* We must not use an r0-based address for target-branch
5958 registers or for special registers without pre-dec
5959 memory addresses, since we store their values in r0
5961 gcc_assert (!TARGET_REGISTER_P (reg
)
5962 && ((reg
!= PR_REG
&& !SPECIAL_REGISTER_P (reg
))
5963 || mem_rtx
== pre_dec
));
5966 orig_reg_rtx
= reg_rtx
;
5967 if (TARGET_REGISTER_P (reg
)
5968 || ((reg
== PR_REG
|| SPECIAL_REGISTER_P (reg
))
5969 && mem_rtx
!= pre_dec
))
5971 rtx tmp_reg
= gen_rtx_REG (GET_MODE (reg_rtx
), *tmp_pnt
);
5973 emit_move_insn (tmp_reg
, reg_rtx
);
5975 if (REGNO (tmp_reg
) == R0_REG
)
5979 gcc_assert (!refers_to_regno_p
5980 (R0_REG
, R0_REG
+1, mem_rtx
, (rtx
*) 0));
5983 if (*++tmp_pnt
<= 0)
5984 tmp_pnt
= schedule
.temps
;
5991 /* Mark as interesting for dwarf cfi generator */
5992 insn
= emit_move_insn (mem_rtx
, reg_rtx
);
5993 RTX_FRAME_RELATED_P (insn
) = 1;
5994 /* If we use an intermediate register for the save, we can't
5995 describe this exactly in cfi as a copy of the to-be-saved
5996 register into the temporary register and then the temporary
5997 register on the stack, because the temporary register can
5998 have a different natural size than the to-be-saved register.
5999 Thus, we gloss over the intermediate copy and pretend we do
6000 a direct save from the to-be-saved register. */
6001 if (REGNO (reg_rtx
) != reg
)
6005 set
= gen_rtx_SET (VOIDmode
, mem_rtx
, orig_reg_rtx
);
6006 note_rtx
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, set
,
6008 REG_NOTES (insn
) = note_rtx
;
6011 if (TARGET_SHCOMPACT
&& (offset_in_r0
!= -1))
6013 rtx reg_rtx
= gen_rtx_REG (mode
, reg
);
6015 rtx mem_rtx
= gen_frame_mem (mode
,
6016 gen_rtx_PLUS (Pmode
,
6020 set
= gen_rtx_SET (VOIDmode
, mem_rtx
, reg_rtx
);
6021 note_rtx
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, set
,
6023 REG_NOTES (insn
) = note_rtx
;
6028 gcc_assert (entry
->offset
== d_rounding
);
6031 push_regs (&live_regs_mask
, current_function_interrupt
);
6033 if (flag_pic
&& regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
])
6035 rtx insn
= get_last_insn ();
6036 rtx last
= emit_insn (gen_GOTaddr2picreg ());
6038 /* Mark these insns as possibly dead. Sometimes, flow2 may
6039 delete all uses of the PIC register. In this case, let it
6040 delete the initialization too. */
6043 insn
= NEXT_INSN (insn
);
6045 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
,
6049 while (insn
!= last
);
6052 if (SHMEDIA_REGS_STACK_ADJUST ())
6054 /* This must NOT go through the PLT, otherwise mach and macl
6055 may be clobbered. */
6056 function_symbol (gen_rtx_REG (Pmode
, R0_REG
),
6058 ? "__GCC_push_shmedia_regs"
6059 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT
);
6060 emit_insn (gen_shmedia_save_restore_regs_compact
6061 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6064 if (target_flags
!= save_flags
&& ! current_function_interrupt
)
6066 rtx insn
= emit_insn (gen_toggle_sz ());
6068 /* If we're lucky, a mode switch in the function body will
6069 overwrite fpscr, turning this insn dead. Tell flow this
6070 insn is ok to delete. */
6071 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
,
6076 target_flags
= save_flags
;
6078 output_stack_adjust (-rounded_frame_size (d
) + d_rounding
,
6079 stack_pointer_rtx
, 0, NULL
);
6081 if (frame_pointer_needed
)
6082 frame_insn (GEN_MOV (hard_frame_pointer_rtx
, stack_pointer_rtx
));
6084 if (TARGET_SHCOMPACT
6085 && (current_function_args_info
.call_cookie
& ~ CALL_COOKIE_RET_TRAMP(1)))
6087 /* This must NOT go through the PLT, otherwise mach and macl
6088 may be clobbered. */
6089 function_symbol (gen_rtx_REG (Pmode
, R0_REG
),
6090 "__GCC_shcompact_incoming_args", SFUNC_GOT
);
6091 emit_insn (gen_shcompact_incoming_args ());
6096 sh_expand_epilogue (bool sibcall_p
)
6098 HARD_REG_SET live_regs_mask
;
6102 int save_flags
= target_flags
;
6103 int frame_size
, save_size
;
6104 int fpscr_deferred
= 0;
6105 int e
= sibcall_p
? -1 : 1;
6107 d
= calc_live_regs (&live_regs_mask
);
6110 frame_size
= rounded_frame_size (d
);
6114 int tregs_space
= shmedia_target_regs_stack_adjust (&live_regs_mask
);
6116 if (d
% (STACK_BOUNDARY
/ BITS_PER_UNIT
))
6117 d_rounding
= ((STACK_BOUNDARY
/ BITS_PER_UNIT
)
6118 - d
% (STACK_BOUNDARY
/ BITS_PER_UNIT
));
6120 total_size
= d
+ tregs_space
;
6121 total_size
+= rounded_frame_size (total_size
);
6122 save_size
= total_size
- frame_size
;
6124 /* If adjusting the stack in a single step costs nothing extra, do so.
6125 I.e. either if a single addi is enough, or we need a movi anyway,
6126 and we don't exceed the maximum offset range (the test for the
6127 latter is conservative for simplicity). */
6129 && ! frame_pointer_needed
6130 && (CONST_OK_FOR_I10 (total_size
)
6131 || (! CONST_OK_FOR_I10 (save_size
+ d_rounding
)
6132 && total_size
<= 2044)))
6133 d_rounding
= frame_size
;
6135 frame_size
-= d_rounding
;
6138 if (frame_pointer_needed
)
6140 /* We must avoid scheduling the epilogue with previous basic blocks
6141 when exception handling is enabled. See PR/18032. */
6142 if (flag_exceptions
)
6143 emit_insn (gen_blockage ());
6144 output_stack_adjust (frame_size
, hard_frame_pointer_rtx
, e
,
6147 /* We must avoid moving the stack pointer adjustment past code
6148 which reads from the local frame, else an interrupt could
6149 occur after the SP adjustment and clobber data in the local
6151 emit_insn (gen_blockage ());
6152 emit_insn (GEN_MOV (stack_pointer_rtx
, hard_frame_pointer_rtx
));
6154 else if (frame_size
)
6156 /* We must avoid moving the stack pointer adjustment past code
6157 which reads from the local frame, else an interrupt could
6158 occur after the SP adjustment and clobber data in the local
6160 emit_insn (gen_blockage ());
6161 output_stack_adjust (frame_size
, stack_pointer_rtx
, e
, &live_regs_mask
);
6164 if (SHMEDIA_REGS_STACK_ADJUST ())
6166 function_symbol (gen_rtx_REG (Pmode
, R0_REG
),
6168 ? "__GCC_pop_shmedia_regs"
6169 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT
);
6170 /* This must NOT go through the PLT, otherwise mach and macl
6171 may be clobbered. */
6172 emit_insn (gen_shmedia_save_restore_regs_compact
6173 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6176 /* Pop all the registers. */
6178 if (target_flags
!= save_flags
&& ! current_function_interrupt
)
6179 emit_insn (gen_toggle_sz ());
6182 int offset_base
, offset
;
6183 int offset_in_r0
= -1;
6185 rtx r0
= gen_rtx_REG (Pmode
, R0_REG
);
6186 save_schedule schedule
;
6190 entry
= sh5_schedule_saves (&live_regs_mask
, &schedule
, d_rounding
);
6191 offset_base
= -entry
[1].offset
+ d_rounding
;
6192 tmp_pnt
= schedule
.temps
;
6193 for (; entry
->mode
!= VOIDmode
; entry
--)
6195 enum machine_mode mode
= entry
->mode
;
6196 int reg
= entry
->reg
;
6197 rtx reg_rtx
, mem_rtx
, post_inc
= NULL_RTX
, insn
;
6199 offset
= offset_base
+ entry
->offset
;
6200 reg_rtx
= gen_rtx_REG (mode
, reg
);
6202 mem_rtx
= gen_frame_mem (mode
,
6203 gen_rtx_PLUS (Pmode
,
6207 GO_IF_LEGITIMATE_ADDRESS (mode
, XEXP (mem_rtx
, 0), try_post_inc
);
6213 if (HAVE_POST_INCREMENT
6214 && (offset
== offset_in_r0
6215 || (offset
+ GET_MODE_SIZE (mode
) != d
+ d_rounding
6216 && mem_rtx
== NULL_RTX
)
6217 || reg
== PR_REG
|| SPECIAL_REGISTER_P (reg
)))
6219 post_inc
= gen_frame_mem (mode
, gen_rtx_POST_INC (Pmode
, r0
));
6221 GO_IF_LEGITIMATE_ADDRESS (mode
, XEXP (post_inc
, 0),
6224 post_inc
= NULL_RTX
;
6233 if (mem_rtx
!= NULL_RTX
)
6236 if (offset_in_r0
== -1)
6238 emit_move_insn (r0
, GEN_INT (offset
));
6239 offset_in_r0
= offset
;
6241 else if (offset
!= offset_in_r0
)
6246 GEN_INT (offset
- offset_in_r0
)));
6247 offset_in_r0
+= offset
- offset_in_r0
;
6250 if (post_inc
!= NULL_RTX
)
6256 (Pmode
, r0
, stack_pointer_rtx
));
6262 offset_in_r0
+= GET_MODE_SIZE (mode
);
6265 mem_rtx
= gen_frame_mem (mode
, r0
);
6267 mem_rtx
= gen_frame_mem (mode
,
6268 gen_rtx_PLUS (Pmode
,
6272 gcc_assert ((reg
!= PR_REG
&& !SPECIAL_REGISTER_P (reg
))
6273 || mem_rtx
== post_inc
);
6276 if ((reg
== PR_REG
|| SPECIAL_REGISTER_P (reg
))
6277 && mem_rtx
!= post_inc
)
6279 insn
= emit_move_insn (r0
, mem_rtx
);
6282 else if (TARGET_REGISTER_P (reg
))
6284 rtx tmp_reg
= gen_rtx_REG (mode
, *tmp_pnt
);
6286 /* Give the scheduler a bit of freedom by using up to
6287 MAX_TEMPS registers in a round-robin fashion. */
6288 insn
= emit_move_insn (tmp_reg
, mem_rtx
);
6291 tmp_pnt
= schedule
.temps
;
6294 insn
= emit_move_insn (reg_rtx
, mem_rtx
);
6295 if (reg
== PR_MEDIA_REG
&& sh_media_register_for_return () >= 0)
6296 /* This is dead, unless we return with a sibcall. */
6297 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
,
6302 gcc_assert (entry
->offset
+ offset_base
== d
+ d_rounding
);
6304 else /* ! TARGET_SH5 */
6307 if (TEST_HARD_REG_BIT (live_regs_mask
, PR_REG
))
6309 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
6311 int j
= (FIRST_PSEUDO_REGISTER
- 1) - i
;
6313 if (j
== FPSCR_REG
&& current_function_interrupt
&& TARGET_FMOVD
6314 && hard_regs_intersect_p (&live_regs_mask
,
6315 ®_class_contents
[DF_REGS
]))
6317 else if (j
!= PR_REG
&& TEST_HARD_REG_BIT (live_regs_mask
, j
))
6319 if (j
== FIRST_FP_REG
&& fpscr_deferred
)
6324 if (target_flags
!= save_flags
&& ! current_function_interrupt
)
6325 emit_insn (gen_toggle_sz ());
6326 target_flags
= save_flags
;
6328 output_stack_adjust (current_function_pretend_args_size
6329 + save_size
+ d_rounding
6330 + current_function_args_info
.stack_regs
* 8,
6331 stack_pointer_rtx
, e
, NULL
);
6333 if (current_function_calls_eh_return
)
6334 emit_insn (GEN_ADD3 (stack_pointer_rtx
, stack_pointer_rtx
,
6335 EH_RETURN_STACKADJ_RTX
));
6337 /* Switch back to the normal stack if necessary. */
6338 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl
)))
6339 emit_insn (gen_sp_switch_2 ());
6341 /* Tell flow the insn that pops PR isn't dead. */
6342 /* PR_REG will never be live in SHmedia mode, and we don't need to
6343 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6344 by the return pattern. */
6345 if (TEST_HARD_REG_BIT (live_regs_mask
, PR_REG
))
6346 emit_insn (gen_rtx_USE (VOIDmode
, gen_rtx_REG (SImode
, PR_REG
)));
6349 static int sh_need_epilogue_known
= 0;
6352 sh_need_epilogue (void)
6354 if (! sh_need_epilogue_known
)
6359 sh_expand_epilogue (0);
6360 epilogue
= get_insns ();
6362 sh_need_epilogue_known
= (epilogue
== NULL
? -1 : 1);
6364 return sh_need_epilogue_known
> 0;
6367 /* Emit code to change the current function's return address to RA.
6368 TEMP is available as a scratch register, if needed. */
6371 sh_set_return_address (rtx ra
, rtx tmp
)
6373 HARD_REG_SET live_regs_mask
;
6375 int pr_reg
= TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
;
6378 d
= calc_live_regs (&live_regs_mask
);
6380 /* If pr_reg isn't life, we can set it (or the register given in
6381 sh_media_register_for_return) directly. */
6382 if (! TEST_HARD_REG_BIT (live_regs_mask
, pr_reg
))
6388 int rr_regno
= sh_media_register_for_return ();
6393 rr
= gen_rtx_REG (DImode
, rr_regno
);
6396 rr
= gen_rtx_REG (SImode
, pr_reg
);
6398 emit_insn (GEN_MOV (rr
, ra
));
6399 /* Tell flow the register for return isn't dead. */
6400 emit_insn (gen_rtx_USE (VOIDmode
, rr
));
6407 save_schedule schedule
;
6410 entry
= sh5_schedule_saves (&live_regs_mask
, &schedule
, 0);
6411 offset
= entry
[1].offset
;
6412 for (; entry
->mode
!= VOIDmode
; entry
--)
6413 if (entry
->reg
== pr_reg
)
6416 /* We can't find pr register. */
6420 offset
= entry
->offset
- offset
;
6421 pr_offset
= (rounded_frame_size (d
) + offset
6422 + SHMEDIA_REGS_STACK_ADJUST ());
6425 pr_offset
= rounded_frame_size (d
);
6427 emit_insn (GEN_MOV (tmp
, GEN_INT (pr_offset
)));
6428 emit_insn (GEN_ADD3 (tmp
, tmp
, hard_frame_pointer_rtx
));
6430 tmp
= gen_frame_mem (Pmode
, tmp
);
6431 emit_insn (GEN_MOV (tmp
, ra
));
6434 /* Clear variables at function end. */
6437 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6438 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6440 sh_need_epilogue_known
= 0;
6444 sh_builtin_saveregs (void)
6446 /* First unnamed integer register. */
6447 int first_intreg
= current_function_args_info
.arg_count
[(int) SH_ARG_INT
];
6448 /* Number of integer registers we need to save. */
6449 int n_intregs
= MAX (0, NPARM_REGS (SImode
) - first_intreg
);
6450 /* First unnamed SFmode float reg */
6451 int first_floatreg
= current_function_args_info
.arg_count
[(int) SH_ARG_FLOAT
];
6452 /* Number of SFmode float regs to save. */
6453 int n_floatregs
= MAX (0, NPARM_REGS (SFmode
) - first_floatreg
);
6456 HOST_WIDE_INT alias_set
;
6462 int pushregs
= n_intregs
;
6464 while (pushregs
< NPARM_REGS (SImode
) - 1
6465 && (CALL_COOKIE_INT_REG_GET
6466 (current_function_args_info
.call_cookie
,
6467 NPARM_REGS (SImode
) - pushregs
)
6470 current_function_args_info
.call_cookie
6471 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode
)
6476 if (pushregs
== NPARM_REGS (SImode
))
6477 current_function_args_info
.call_cookie
6478 |= (CALL_COOKIE_INT_REG (0, 1)
6479 | CALL_COOKIE_STACKSEQ (pushregs
- 1));
6481 current_function_args_info
.call_cookie
6482 |= CALL_COOKIE_STACKSEQ (pushregs
);
6484 current_function_pretend_args_size
+= 8 * n_intregs
;
6486 if (TARGET_SHCOMPACT
)
6490 if (! TARGET_SH2E
&& ! TARGET_SH4
&& ! TARGET_SH5
)
6492 error ("__builtin_saveregs not supported by this subtarget");
6499 /* Allocate block of memory for the regs. */
6500 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6501 Or can assign_stack_local accept a 0 SIZE argument? */
6502 bufsize
= (n_intregs
* UNITS_PER_WORD
) + (n_floatregs
* UNITS_PER_WORD
);
6505 regbuf
= gen_frame_mem (BLKmode
, gen_rtx_REG (Pmode
, ARG_POINTER_REGNUM
));
6506 else if (n_floatregs
& 1)
6510 regbuf
= assign_stack_local (BLKmode
, bufsize
+ UNITS_PER_WORD
, 0);
6511 addr
= copy_to_mode_reg (Pmode
, XEXP (regbuf
, 0));
6512 emit_insn (gen_iorsi3 (addr
, addr
, GEN_INT (UNITS_PER_WORD
)));
6513 regbuf
= change_address (regbuf
, BLKmode
, addr
);
6515 else if (STACK_BOUNDARY
< 64 && TARGET_FPU_DOUBLE
&& n_floatregs
)
6519 regbuf
= assign_stack_local (BLKmode
, bufsize
+ UNITS_PER_WORD
, 0);
6520 addr
= copy_to_mode_reg (Pmode
, plus_constant (XEXP (regbuf
, 0), 4));
6521 mask
= copy_to_mode_reg (Pmode
, GEN_INT (-8));
6522 emit_insn (gen_andsi3 (addr
, addr
, mask
));
6523 regbuf
= change_address (regbuf
, BLKmode
, addr
);
6526 regbuf
= assign_stack_local (BLKmode
, bufsize
, TARGET_FPU_DOUBLE
? 64 : 0);
6527 alias_set
= get_varargs_alias_set ();
6528 set_mem_alias_set (regbuf
, alias_set
);
6531 This is optimized to only save the regs that are necessary. Explicitly
6532 named args need not be saved. */
6534 move_block_from_reg (BASE_ARG_REG (SImode
) + first_intreg
,
6535 adjust_address (regbuf
, BLKmode
,
6536 n_floatregs
* UNITS_PER_WORD
),
6540 /* Return the address of the regbuf. */
6541 return XEXP (regbuf
, 0);
6544 This is optimized to only save the regs that are necessary. Explicitly
6545 named args need not be saved.
6546 We explicitly build a pointer to the buffer because it halves the insn
6547 count when not optimizing (otherwise the pointer is built for each reg
6549 We emit the moves in reverse order so that we can use predecrement. */
6551 fpregs
= copy_to_mode_reg (Pmode
,
6552 plus_constant (XEXP (regbuf
, 0),
6553 n_floatregs
* UNITS_PER_WORD
));
6554 if (TARGET_SH4
|| TARGET_SH2A_DOUBLE
)
6557 for (regno
= NPARM_REGS (DFmode
) - 2; regno
>= first_floatreg
; regno
-= 2)
6559 emit_insn (gen_addsi3 (fpregs
, fpregs
,
6560 GEN_INT (-2 * UNITS_PER_WORD
)));
6561 mem
= change_address (regbuf
, DFmode
, fpregs
);
6562 emit_move_insn (mem
,
6563 gen_rtx_REG (DFmode
, BASE_ARG_REG (DFmode
) + regno
));
6565 regno
= first_floatreg
;
6568 emit_insn (gen_addsi3 (fpregs
, fpregs
, GEN_INT (-UNITS_PER_WORD
)));
6569 mem
= change_address (regbuf
, SFmode
, fpregs
);
6570 emit_move_insn (mem
,
6571 gen_rtx_REG (SFmode
, BASE_ARG_REG (SFmode
) + regno
6572 - (TARGET_LITTLE_ENDIAN
!= 0)));
6576 for (regno
= NPARM_REGS (SFmode
) - 1; regno
>= first_floatreg
; regno
--)
6580 emit_insn (gen_addsi3 (fpregs
, fpregs
, GEN_INT (-UNITS_PER_WORD
)));
6581 mem
= change_address (regbuf
, SFmode
, fpregs
);
6582 emit_move_insn (mem
,
6583 gen_rtx_REG (SFmode
, BASE_ARG_REG (SFmode
) + regno
));
6586 /* Return the address of the regbuf. */
6587 return XEXP (regbuf
, 0);
6590 /* Define the `__builtin_va_list' type for the ABI. */
6593 sh_build_builtin_va_list (void)
6595 tree f_next_o
, f_next_o_limit
, f_next_fp
, f_next_fp_limit
, f_next_stack
;
6598 if (TARGET_SH5
|| (! TARGET_SH2E
&& ! TARGET_SH4
)
6599 || TARGET_HITACHI
|| sh_cfun_attr_renesas_p ())
6600 return ptr_type_node
;
6602 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
6604 f_next_o
= build_decl (FIELD_DECL
, get_identifier ("__va_next_o"),
6606 f_next_o_limit
= build_decl (FIELD_DECL
,
6607 get_identifier ("__va_next_o_limit"),
6609 f_next_fp
= build_decl (FIELD_DECL
, get_identifier ("__va_next_fp"),
6611 f_next_fp_limit
= build_decl (FIELD_DECL
,
6612 get_identifier ("__va_next_fp_limit"),
6614 f_next_stack
= build_decl (FIELD_DECL
, get_identifier ("__va_next_stack"),
6617 DECL_FIELD_CONTEXT (f_next_o
) = record
;
6618 DECL_FIELD_CONTEXT (f_next_o_limit
) = record
;
6619 DECL_FIELD_CONTEXT (f_next_fp
) = record
;
6620 DECL_FIELD_CONTEXT (f_next_fp_limit
) = record
;
6621 DECL_FIELD_CONTEXT (f_next_stack
) = record
;
6623 TYPE_FIELDS (record
) = f_next_o
;
6624 TREE_CHAIN (f_next_o
) = f_next_o_limit
;
6625 TREE_CHAIN (f_next_o_limit
) = f_next_fp
;
6626 TREE_CHAIN (f_next_fp
) = f_next_fp_limit
;
6627 TREE_CHAIN (f_next_fp_limit
) = f_next_stack
;
6629 layout_type (record
);
6634 /* Implement `va_start' for varargs and stdarg. */
6637 sh_va_start (tree valist
, rtx nextarg
)
6639 tree f_next_o
, f_next_o_limit
, f_next_fp
, f_next_fp_limit
, f_next_stack
;
6640 tree next_o
, next_o_limit
, next_fp
, next_fp_limit
, next_stack
;
6646 expand_builtin_saveregs ();
6647 std_expand_builtin_va_start (valist
, nextarg
);
6651 if ((! TARGET_SH2E
&& ! TARGET_SH4
)
6652 || TARGET_HITACHI
|| sh_cfun_attr_renesas_p ())
6654 std_expand_builtin_va_start (valist
, nextarg
);
6658 f_next_o
= TYPE_FIELDS (va_list_type_node
);
6659 f_next_o_limit
= TREE_CHAIN (f_next_o
);
6660 f_next_fp
= TREE_CHAIN (f_next_o_limit
);
6661 f_next_fp_limit
= TREE_CHAIN (f_next_fp
);
6662 f_next_stack
= TREE_CHAIN (f_next_fp_limit
);
6664 next_o
= build (COMPONENT_REF
, TREE_TYPE (f_next_o
), valist
, f_next_o
,
6666 next_o_limit
= build (COMPONENT_REF
, TREE_TYPE (f_next_o_limit
),
6667 valist
, f_next_o_limit
, NULL_TREE
);
6668 next_fp
= build (COMPONENT_REF
, TREE_TYPE (f_next_fp
), valist
, f_next_fp
,
6670 next_fp_limit
= build (COMPONENT_REF
, TREE_TYPE (f_next_fp_limit
),
6671 valist
, f_next_fp_limit
, NULL_TREE
);
6672 next_stack
= build (COMPONENT_REF
, TREE_TYPE (f_next_stack
),
6673 valist
, f_next_stack
, NULL_TREE
);
6675 /* Call __builtin_saveregs. */
6676 u
= make_tree (ptr_type_node
, expand_builtin_saveregs ());
6677 t
= build (MODIFY_EXPR
, ptr_type_node
, next_fp
, u
);
6678 TREE_SIDE_EFFECTS (t
) = 1;
6679 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6681 nfp
= current_function_args_info
.arg_count
[SH_ARG_FLOAT
];
6686 u
= fold (build (PLUS_EXPR
, ptr_type_node
, u
,
6687 build_int_cst (NULL_TREE
, UNITS_PER_WORD
* nfp
)));
6688 t
= build (MODIFY_EXPR
, ptr_type_node
, next_fp_limit
, u
);
6689 TREE_SIDE_EFFECTS (t
) = 1;
6690 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6692 t
= build (MODIFY_EXPR
, ptr_type_node
, next_o
, u
);
6693 TREE_SIDE_EFFECTS (t
) = 1;
6694 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6696 nint
= current_function_args_info
.arg_count
[SH_ARG_INT
];
6701 u
= fold (build (PLUS_EXPR
, ptr_type_node
, u
,
6702 build_int_cst (NULL_TREE
, UNITS_PER_WORD
* nint
)));
6703 t
= build (MODIFY_EXPR
, ptr_type_node
, next_o_limit
, u
);
6704 TREE_SIDE_EFFECTS (t
) = 1;
6705 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6707 u
= make_tree (ptr_type_node
, nextarg
);
6708 t
= build (MODIFY_EXPR
, ptr_type_node
, next_stack
, u
);
6709 TREE_SIDE_EFFECTS (t
) = 1;
6710 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6713 /* TYPE is a RECORD_TYPE. If there is only a single non-zero-sized
6714 member, return it. */
6716 find_sole_member (tree type
)
6718 tree field
, member
= NULL_TREE
;
6720 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6722 if (TREE_CODE (field
) != FIELD_DECL
)
6724 if (!DECL_SIZE (field
))
6726 if (integer_zerop (DECL_SIZE (field
)))
6734 /* Implement `va_arg'. */
6737 sh_gimplify_va_arg_expr (tree valist
, tree type
, tree
*pre_p
,
6738 tree
*post_p ATTRIBUTE_UNUSED
)
6740 HOST_WIDE_INT size
, rsize
;
6741 tree tmp
, pptr_type_node
;
6742 tree addr
, lab_over
= NULL
, result
= NULL
;
6743 int pass_by_ref
= targetm
.calls
.must_pass_in_stack (TYPE_MODE (type
), type
);
6747 type
= build_pointer_type (type
);
6749 size
= int_size_in_bytes (type
);
6750 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
6751 pptr_type_node
= build_pointer_type (ptr_type_node
);
6753 if (! TARGET_SH5
&& (TARGET_SH2E
|| TARGET_SH4
)
6754 && ! (TARGET_HITACHI
|| sh_cfun_attr_renesas_p ()))
6756 tree f_next_o
, f_next_o_limit
, f_next_fp
, f_next_fp_limit
, f_next_stack
;
6757 tree next_o
, next_o_limit
, next_fp
, next_fp_limit
, next_stack
;
6762 f_next_o
= TYPE_FIELDS (va_list_type_node
);
6763 f_next_o_limit
= TREE_CHAIN (f_next_o
);
6764 f_next_fp
= TREE_CHAIN (f_next_o_limit
);
6765 f_next_fp_limit
= TREE_CHAIN (f_next_fp
);
6766 f_next_stack
= TREE_CHAIN (f_next_fp_limit
);
6768 next_o
= build (COMPONENT_REF
, TREE_TYPE (f_next_o
), valist
, f_next_o
,
6770 next_o_limit
= build (COMPONENT_REF
, TREE_TYPE (f_next_o_limit
),
6771 valist
, f_next_o_limit
, NULL_TREE
);
6772 next_fp
= build (COMPONENT_REF
, TREE_TYPE (f_next_fp
),
6773 valist
, f_next_fp
, NULL_TREE
);
6774 next_fp_limit
= build (COMPONENT_REF
, TREE_TYPE (f_next_fp_limit
),
6775 valist
, f_next_fp_limit
, NULL_TREE
);
6776 next_stack
= build (COMPONENT_REF
, TREE_TYPE (f_next_stack
),
6777 valist
, f_next_stack
, NULL_TREE
);
6779 /* Structures with a single member with a distinct mode are passed
6780 like their member. This is relevant if the latter has a REAL_TYPE
6781 or COMPLEX_TYPE type. */
6783 while (TREE_CODE (eff_type
) == RECORD_TYPE
6784 && (member
= find_sole_member (eff_type
))
6785 && (TREE_CODE (TREE_TYPE (member
)) == REAL_TYPE
6786 || TREE_CODE (TREE_TYPE (member
)) == COMPLEX_TYPE
6787 || TREE_CODE (TREE_TYPE (member
)) == RECORD_TYPE
))
6789 tree field_type
= TREE_TYPE (member
);
6791 if (TYPE_MODE (eff_type
) == TYPE_MODE (field_type
))
6792 eff_type
= field_type
;
6795 gcc_assert ((TYPE_ALIGN (eff_type
)
6796 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type
)))
6797 || (TYPE_ALIGN (eff_type
)
6798 > GET_MODE_BITSIZE (TYPE_MODE (field_type
))));
6805 pass_as_float
= ((TREE_CODE (eff_type
) == REAL_TYPE
&& size
<= 8)
6806 || (TREE_CODE (eff_type
) == COMPLEX_TYPE
6807 && TREE_CODE (TREE_TYPE (eff_type
)) == REAL_TYPE
6812 pass_as_float
= (TREE_CODE (eff_type
) == REAL_TYPE
&& size
== 4);
6815 addr
= create_tmp_var (pptr_type_node
, NULL
);
6816 lab_false
= create_artificial_label ();
6817 lab_over
= create_artificial_label ();
6819 valist
= build1 (INDIRECT_REF
, ptr_type_node
, addr
);
6823 tree next_fp_tmp
= create_tmp_var (TREE_TYPE (f_next_fp
), NULL
);
6825 bool is_double
= size
== 8 && TREE_CODE (eff_type
) == REAL_TYPE
;
6827 tmp
= build1 (ADDR_EXPR
, pptr_type_node
, next_fp
);
6828 tmp
= build2 (MODIFY_EXPR
, void_type_node
, addr
, tmp
);
6829 gimplify_and_add (tmp
, pre_p
);
6831 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, next_fp_tmp
, valist
);
6832 gimplify_and_add (tmp
, pre_p
);
6833 tmp
= next_fp_limit
;
6834 if (size
> 4 && !is_double
)
6835 tmp
= build2 (PLUS_EXPR
, TREE_TYPE (tmp
), tmp
,
6836 fold_convert (TREE_TYPE (tmp
), size_int (4 - size
)));
6837 tmp
= build (GE_EXPR
, boolean_type_node
, next_fp_tmp
, tmp
);
6838 cmp
= build (COND_EXPR
, void_type_node
, tmp
,
6839 build (GOTO_EXPR
, void_type_node
, lab_false
),
6842 gimplify_and_add (cmp
, pre_p
);
6844 if (TYPE_ALIGN (eff_type
) > BITS_PER_WORD
6845 || (is_double
|| size
== 16))
6847 tmp
= fold_convert (ptr_type_node
, size_int (UNITS_PER_WORD
));
6848 tmp
= build (BIT_AND_EXPR
, ptr_type_node
, next_fp_tmp
, tmp
);
6849 tmp
= build (PLUS_EXPR
, ptr_type_node
, next_fp_tmp
, tmp
);
6850 tmp
= build (MODIFY_EXPR
, ptr_type_node
, next_fp_tmp
, tmp
);
6851 gimplify_and_add (tmp
, pre_p
);
6854 gimplify_and_add (cmp
, pre_p
);
6856 #ifdef FUNCTION_ARG_SCmode_WART
6857 if (TYPE_MODE (eff_type
) == SCmode
6858 && TARGET_SH4
&& TARGET_LITTLE_ENDIAN
)
6860 tree subtype
= TREE_TYPE (eff_type
);
6864 = std_gimplify_va_arg_expr (next_fp_tmp
, subtype
, pre_p
, NULL
);
6865 imag
= get_initialized_tmp_var (imag
, pre_p
, NULL
);
6868 = std_gimplify_va_arg_expr (next_fp_tmp
, subtype
, pre_p
, NULL
);
6869 real
= get_initialized_tmp_var (real
, pre_p
, NULL
);
6871 result
= build (COMPLEX_EXPR
, type
, real
, imag
);
6872 result
= get_initialized_tmp_var (result
, pre_p
, NULL
);
6874 #endif /* FUNCTION_ARG_SCmode_WART */
6876 tmp
= build (GOTO_EXPR
, void_type_node
, lab_over
);
6877 gimplify_and_add (tmp
, pre_p
);
6879 tmp
= build (LABEL_EXPR
, void_type_node
, lab_false
);
6880 gimplify_and_add (tmp
, pre_p
);
6882 tmp
= build1 (ADDR_EXPR
, pptr_type_node
, next_stack
);
6883 tmp
= build (MODIFY_EXPR
, void_type_node
, addr
, tmp
);
6884 gimplify_and_add (tmp
, pre_p
);
6885 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, next_fp_tmp
, valist
);
6886 gimplify_and_add (tmp
, pre_p
);
6888 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, valist
, next_fp_tmp
);
6889 gimplify_and_add (tmp
, post_p
);
6890 valist
= next_fp_tmp
;
6894 tmp
= fold_convert (ptr_type_node
, size_int (rsize
));
6895 tmp
= build (PLUS_EXPR
, ptr_type_node
, next_o
, tmp
);
6896 tmp
= build (GT_EXPR
, boolean_type_node
, tmp
, next_o_limit
);
6897 tmp
= build (COND_EXPR
, void_type_node
, tmp
,
6898 build (GOTO_EXPR
, void_type_node
, lab_false
),
6900 gimplify_and_add (tmp
, pre_p
);
6902 tmp
= build1 (ADDR_EXPR
, pptr_type_node
, next_o
);
6903 tmp
= build (MODIFY_EXPR
, void_type_node
, addr
, tmp
);
6904 gimplify_and_add (tmp
, pre_p
);
6906 tmp
= build (GOTO_EXPR
, void_type_node
, lab_over
);
6907 gimplify_and_add (tmp
, pre_p
);
6909 tmp
= build (LABEL_EXPR
, void_type_node
, lab_false
);
6910 gimplify_and_add (tmp
, pre_p
);
6912 if (size
> 4 && ! TARGET_SH4
)
6914 tmp
= build (MODIFY_EXPR
, ptr_type_node
, next_o
, next_o_limit
);
6915 gimplify_and_add (tmp
, pre_p
);
6918 tmp
= build1 (ADDR_EXPR
, pptr_type_node
, next_stack
);
6919 tmp
= build (MODIFY_EXPR
, void_type_node
, addr
, tmp
);
6920 gimplify_and_add (tmp
, pre_p
);
6925 tmp
= build (LABEL_EXPR
, void_type_node
, lab_over
);
6926 gimplify_and_add (tmp
, pre_p
);
6930 /* ??? In va-sh.h, there had been code to make values larger than
6931 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6933 tmp
= std_gimplify_va_arg_expr (valist
, type
, pre_p
, NULL
);
6936 tmp
= build (MODIFY_EXPR
, void_type_node
, result
, tmp
);
6937 gimplify_and_add (tmp
, pre_p
);
6939 tmp
= build (LABEL_EXPR
, void_type_node
, lab_over
);
6940 gimplify_and_add (tmp
, pre_p
);
6946 result
= build_va_arg_indirect_ref (result
);
6952 sh_promote_prototypes (tree type
)
6958 return ! sh_attr_renesas_p (type
);
6961 /* Whether an argument must be passed by reference. On SHcompact, we
6962 pretend arguments wider than 32-bits that would have been passed in
6963 registers are passed by reference, so that an SHmedia trampoline
6964 loads them into the full 64-bits registers. */
6967 shcompact_byref (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6968 tree type
, bool named
)
6970 unsigned HOST_WIDE_INT size
;
6973 size
= int_size_in_bytes (type
);
6975 size
= GET_MODE_SIZE (mode
);
6977 if (cum
->arg_count
[SH_ARG_INT
] < NPARM_REGS (SImode
)
6979 || GET_SH_ARG_CLASS (mode
) == SH_ARG_INT
6980 || (GET_SH_ARG_CLASS (mode
) == SH_ARG_FLOAT
6981 && cum
->arg_count
[SH_ARG_FLOAT
] >= NPARM_REGS (SFmode
)))
6983 && !SHCOMPACT_FORCE_ON_STACK (mode
, type
)
6984 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum
, mode
, type
, named
))
6991 sh_pass_by_reference (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6992 tree type
, bool named
)
6994 if (targetm
.calls
.must_pass_in_stack (mode
, type
))
6997 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6998 wants to know about pass-by-reference semantics for incoming
7003 if (TARGET_SHCOMPACT
)
7005 cum
->byref
= shcompact_byref (cum
, mode
, type
, named
);
7006 return cum
->byref
!= 0;
7013 sh_callee_copies (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7014 tree type
, bool named ATTRIBUTE_UNUSED
)
7016 /* ??? How can it possibly be correct to return true only on the
7017 caller side of the equation? Is there someplace else in the
7018 sh backend that's magically producing the copies? */
7019 return (cum
->outgoing
7020 && ((mode
== BLKmode
? TYPE_ALIGN (type
) : GET_MODE_ALIGNMENT (mode
))
7021 % SH_MIN_ALIGN_FOR_CALLEE_COPY
== 0));
7025 sh_arg_partial_bytes (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7026 tree type
, bool named ATTRIBUTE_UNUSED
)
7031 && PASS_IN_REG_P (*cum
, mode
, type
)
7032 && !(TARGET_SH4
|| TARGET_SH2A_DOUBLE
)
7033 && (ROUND_REG (*cum
, mode
)
7035 ? ROUND_ADVANCE (GET_MODE_SIZE (mode
))
7036 : ROUND_ADVANCE (int_size_in_bytes (type
)))
7037 > NPARM_REGS (mode
)))
7038 words
= NPARM_REGS (mode
) - ROUND_REG (*cum
, mode
);
7040 else if (!TARGET_SHCOMPACT
7041 && SH5_WOULD_BE_PARTIAL_NREGS (*cum
, mode
, type
, named
))
7042 words
= NPARM_REGS (SImode
) - cum
->arg_count
[SH_ARG_INT
];
7044 return words
* UNITS_PER_WORD
;
7048 /* Define where to put the arguments to a function.
7049 Value is zero to push the argument on the stack,
7050 or a hard register in which to store the argument.
7052 MODE is the argument's machine mode.
7053 TYPE is the data type of the argument (as a tree).
7054 This is null for libcalls where that information may
7056 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7057 the preceding args and about the function being called.
7058 NAMED is nonzero if this argument is a named parameter
7059 (otherwise it is an extra parameter matching an ellipsis).
7061 On SH the first args are normally in registers
7062 and the rest are pushed. Any arg that starts within the first
7063 NPARM_REGS words is at least partially passed in a register unless
7064 its data type forbids. */
7068 sh_function_arg (CUMULATIVE_ARGS
*ca
, enum machine_mode mode
,
7069 tree type
, int named
)
7071 if (! TARGET_SH5
&& mode
== VOIDmode
)
7072 return GEN_INT (ca
->renesas_abi
? 1 : 0);
7075 && PASS_IN_REG_P (*ca
, mode
, type
)
7076 && (named
|| ! (TARGET_HITACHI
|| ca
->renesas_abi
)))
7080 if (mode
== SCmode
&& TARGET_SH4
&& TARGET_LITTLE_ENDIAN
7081 && (! FUNCTION_ARG_SCmode_WART
|| (ROUND_REG (*ca
, mode
) & 1)))
7083 rtx r1
= gen_rtx_EXPR_LIST (VOIDmode
,
7084 gen_rtx_REG (SFmode
,
7086 + (ROUND_REG (*ca
, mode
) ^ 1)),
7088 rtx r2
= gen_rtx_EXPR_LIST (VOIDmode
,
7089 gen_rtx_REG (SFmode
,
7091 + ((ROUND_REG (*ca
, mode
) + 1) ^ 1)),
7093 return gen_rtx_PARALLEL(SCmode
, gen_rtvec(2, r1
, r2
));
7096 /* If the alignment of a DF value causes an SF register to be
7097 skipped, we will use that skipped register for the next SF
7099 if ((TARGET_HITACHI
|| ca
->renesas_abi
)
7100 && ca
->free_single_fp_reg
7102 return gen_rtx_REG (mode
, ca
->free_single_fp_reg
);
7104 regno
= (BASE_ARG_REG (mode
) + ROUND_REG (*ca
, mode
))
7105 ^ (mode
== SFmode
&& TARGET_SH4
7106 && TARGET_LITTLE_ENDIAN
!= 0
7107 && ! TARGET_HITACHI
&& ! ca
->renesas_abi
);
7108 return gen_rtx_REG (mode
, regno
);
7114 if (mode
== VOIDmode
&& TARGET_SHCOMPACT
)
7115 return GEN_INT (ca
->call_cookie
);
7117 /* The following test assumes unnamed arguments are promoted to
7119 if (mode
== SFmode
&& ca
->free_single_fp_reg
)
7120 return SH5_PROTOTYPED_FLOAT_ARG (*ca
, mode
, ca
->free_single_fp_reg
);
7122 if ((GET_SH_ARG_CLASS (mode
) == SH_ARG_FLOAT
)
7123 && (named
|| ! ca
->prototype_p
)
7124 && ca
->arg_count
[(int) SH_ARG_FLOAT
] < NPARM_REGS (SFmode
))
7126 if (! ca
->prototype_p
&& TARGET_SHMEDIA
)
7127 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca
, mode
);
7129 return SH5_PROTOTYPED_FLOAT_ARG (*ca
, mode
,
7131 + ca
->arg_count
[(int) SH_ARG_FLOAT
]);
7134 if (ca
->arg_count
[(int) SH_ARG_INT
] < NPARM_REGS (SImode
)
7135 && (! TARGET_SHCOMPACT
7136 || (! SHCOMPACT_FORCE_ON_STACK (mode
, type
)
7137 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca
, mode
,
7140 return gen_rtx_REG (mode
, (FIRST_PARM_REG
7141 + ca
->arg_count
[(int) SH_ARG_INT
]));
7150 /* Update the data in CUM to advance over an argument
7151 of mode MODE and data type TYPE.
7152 (TYPE is null for libcalls where that information may not be
7156 sh_function_arg_advance (CUMULATIVE_ARGS
*ca
, enum machine_mode mode
,
7157 tree type
, int named
)
7161 else if (TARGET_SH5
)
7163 tree type2
= (ca
->byref
&& type
7166 enum machine_mode mode2
= (ca
->byref
&& type
7169 int dwords
= ((ca
->byref
7172 ? int_size_in_bytes (type2
)
7173 : GET_MODE_SIZE (mode2
)) + 7) / 8;
7174 int numregs
= MIN (dwords
, NPARM_REGS (SImode
)
7175 - ca
->arg_count
[(int) SH_ARG_INT
]);
7179 ca
->arg_count
[(int) SH_ARG_INT
] += numregs
;
7180 if (TARGET_SHCOMPACT
7181 && SHCOMPACT_FORCE_ON_STACK (mode2
, type2
))
7184 |= CALL_COOKIE_INT_REG (ca
->arg_count
[(int) SH_ARG_INT
]
7186 /* N.B. We want this also for outgoing. */
7187 ca
->stack_regs
+= numregs
;
7192 ca
->stack_regs
+= numregs
;
7193 ca
->byref_regs
+= numregs
;
7197 |= CALL_COOKIE_INT_REG (ca
->arg_count
[(int) SH_ARG_INT
]
7201 |= CALL_COOKIE_INT_REG (ca
->arg_count
[(int) SH_ARG_INT
]
7204 else if (dwords
> numregs
)
7206 int pushregs
= numregs
;
7208 if (TARGET_SHCOMPACT
)
7209 ca
->stack_regs
+= numregs
;
7210 while (pushregs
< NPARM_REGS (SImode
) - 1
7211 && (CALL_COOKIE_INT_REG_GET
7213 NPARM_REGS (SImode
) - pushregs
)
7217 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode
)
7221 if (numregs
== NPARM_REGS (SImode
))
7223 |= CALL_COOKIE_INT_REG (0, 1)
7224 | CALL_COOKIE_STACKSEQ (numregs
- 1);
7227 |= CALL_COOKIE_STACKSEQ (numregs
);
7230 if (GET_SH_ARG_CLASS (mode2
) == SH_ARG_FLOAT
7231 && (named
|| ! ca
->prototype_p
))
7233 if (mode2
== SFmode
&& ca
->free_single_fp_reg
)
7234 ca
->free_single_fp_reg
= 0;
7235 else if (ca
->arg_count
[(int) SH_ARG_FLOAT
]
7236 < NPARM_REGS (SFmode
))
7239 = MIN ((GET_MODE_SIZE (mode2
) + 7) / 8 * 2,
7241 - ca
->arg_count
[(int) SH_ARG_FLOAT
]);
7243 ca
->arg_count
[(int) SH_ARG_FLOAT
] += numfpregs
;
7245 if (TARGET_SHCOMPACT
&& ! ca
->prototype_p
)
7247 if (ca
->outgoing
&& numregs
> 0)
7251 |= (CALL_COOKIE_INT_REG
7252 (ca
->arg_count
[(int) SH_ARG_INT
]
7253 - numregs
+ ((numfpregs
- 2) / 2),
7254 4 + (ca
->arg_count
[(int) SH_ARG_FLOAT
]
7257 while (numfpregs
-= 2);
7259 else if (mode2
== SFmode
&& (named
)
7260 && (ca
->arg_count
[(int) SH_ARG_FLOAT
]
7261 < NPARM_REGS (SFmode
)))
7262 ca
->free_single_fp_reg
7263 = FIRST_FP_PARM_REG
- numfpregs
7264 + ca
->arg_count
[(int) SH_ARG_FLOAT
] + 1;
7270 if ((TARGET_HITACHI
|| ca
->renesas_abi
) && TARGET_FPU_DOUBLE
)
7272 /* Note that we've used the skipped register. */
7273 if (mode
== SFmode
&& ca
->free_single_fp_reg
)
7275 ca
->free_single_fp_reg
= 0;
7278 /* When we have a DF after an SF, there's an SF register that get
7279 skipped in order to align the DF value. We note this skipped
7280 register, because the next SF value will use it, and not the
7281 SF that follows the DF. */
7283 && ROUND_REG (*ca
, DFmode
) != ROUND_REG (*ca
, SFmode
))
7285 ca
->free_single_fp_reg
= (ROUND_REG (*ca
, SFmode
)
7286 + BASE_ARG_REG (mode
));
7290 if (! ((TARGET_SH4
|| TARGET_SH2A
) || ca
->renesas_abi
)
7291 || PASS_IN_REG_P (*ca
, mode
, type
))
7292 (ca
->arg_count
[(int) GET_SH_ARG_CLASS (mode
)]
7293 = (ROUND_REG (*ca
, mode
)
7295 ? ROUND_ADVANCE (int_size_in_bytes (type
))
7296 : ROUND_ADVANCE (GET_MODE_SIZE (mode
)))));
7299 /* The Renesas calling convention doesn't quite fit into this scheme since
7300 the address is passed like an invisible argument, but one that is always
7301 passed in memory. */
7303 sh_struct_value_rtx (tree fndecl
, int incoming ATTRIBUTE_UNUSED
)
7305 if (TARGET_HITACHI
|| sh_attr_renesas_p (fndecl
))
7307 return gen_rtx_REG (Pmode
, 2);
7310 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7313 sh_return_in_memory (tree type
, tree fndecl
)
7317 if (TYPE_MODE (type
) == BLKmode
)
7318 return ((unsigned HOST_WIDE_INT
) int_size_in_bytes (type
)) > 8;
7320 return GET_MODE_SIZE (TYPE_MODE (type
)) > 8;
7324 return (TYPE_MODE (type
) == BLKmode
7325 || ((TARGET_HITACHI
|| sh_attr_renesas_p (fndecl
))
7326 && TREE_CODE (type
) == RECORD_TYPE
));
7330 /* We actually emit the code in sh_expand_prologue. We used to use
7331 a static variable to flag that we need to emit this code, but that
7332 doesn't when inlining, when functions are deferred and then emitted
7333 later. Fortunately, we already have two flags that are part of struct
7334 function that tell if a function uses varargs or stdarg. */
7336 sh_setup_incoming_varargs (CUMULATIVE_ARGS
*ca
,
7337 enum machine_mode mode
,
7339 int *pretend_arg_size
,
7340 int second_time ATTRIBUTE_UNUSED
)
7342 gcc_assert (current_function_stdarg
);
7343 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl
))
7345 int named_parm_regs
, anon_parm_regs
;
7347 named_parm_regs
= (ROUND_REG (*ca
, mode
)
7349 ? ROUND_ADVANCE (int_size_in_bytes (type
))
7350 : ROUND_ADVANCE (GET_MODE_SIZE (mode
))));
7351 anon_parm_regs
= NPARM_REGS (SImode
) - named_parm_regs
;
7352 if (anon_parm_regs
> 0)
7353 *pretend_arg_size
= anon_parm_regs
* 4;
7358 sh_strict_argument_naming (CUMULATIVE_ARGS
*ca ATTRIBUTE_UNUSED
)
7364 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS
*ca
)
7366 return ! (TARGET_HITACHI
|| ca
->renesas_abi
) && ! TARGET_SH5
;
7370 /* Define the offset between two registers, one to be eliminated, and
7371 the other its replacement, at the start of a routine. */
7374 initial_elimination_offset (int from
, int to
)
7377 int regs_saved_rounding
= 0;
7378 int total_saved_regs_space
;
7379 int total_auto_space
;
7380 int save_flags
= target_flags
;
7382 HARD_REG_SET live_regs_mask
;
7384 shmedia_space_reserved_for_target_registers
= false;
7385 regs_saved
= calc_live_regs (&live_regs_mask
);
7386 regs_saved
+= SHMEDIA_REGS_STACK_ADJUST ();
7388 if (shmedia_reserve_space_for_target_registers_p (regs_saved
, &live_regs_mask
))
7390 shmedia_space_reserved_for_target_registers
= true;
7391 regs_saved
+= shmedia_target_regs_stack_adjust (&live_regs_mask
);
7394 if (TARGET_SH5
&& regs_saved
% (STACK_BOUNDARY
/ BITS_PER_UNIT
))
7395 regs_saved_rounding
= ((STACK_BOUNDARY
/ BITS_PER_UNIT
)
7396 - regs_saved
% (STACK_BOUNDARY
/ BITS_PER_UNIT
));
7398 total_auto_space
= rounded_frame_size (regs_saved
) - regs_saved_rounding
;
7399 copy_flags
= target_flags
;
7400 target_flags
= save_flags
;
7402 total_saved_regs_space
= regs_saved
+ regs_saved_rounding
;
7404 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
7405 return total_saved_regs_space
+ total_auto_space
7406 + current_function_args_info
.byref_regs
* 8;
7408 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
7409 return total_saved_regs_space
+ total_auto_space
7410 + current_function_args_info
.byref_regs
* 8;
7412 /* Initial gap between fp and sp is 0. */
7413 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
7416 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
7417 return rounded_frame_size (0);
7419 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
7420 return rounded_frame_size (0);
7422 gcc_assert (from
== RETURN_ADDRESS_POINTER_REGNUM
7423 && (to
== HARD_FRAME_POINTER_REGNUM
7424 || to
== STACK_POINTER_REGNUM
));
7427 int n
= total_saved_regs_space
;
7428 int pr_reg
= TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
;
7429 save_schedule schedule
;
7432 n
+= total_auto_space
;
7434 /* If it wasn't saved, there's not much we can do. */
7435 if (! TEST_HARD_REG_BIT (live_regs_mask
, pr_reg
))
7438 target_flags
= copy_flags
;
7440 sh5_schedule_saves (&live_regs_mask
, &schedule
, n
);
7441 for (entry
= &schedule
.entries
[1]; entry
->mode
!= VOIDmode
; entry
++)
7442 if (entry
->reg
== pr_reg
)
7444 target_flags
= save_flags
;
7445 return entry
->offset
;
7450 return total_auto_space
;
7453 /* Insert any deferred function attributes from earlier pragmas. */
7455 sh_insert_attributes (tree node
, tree
*attributes
)
7459 if (TREE_CODE (node
) != FUNCTION_DECL
)
7462 /* We are only interested in fields. */
7466 /* Append the attributes to the deferred attributes. */
7467 *sh_deferred_function_attributes_tail
= *attributes
;
7468 attrs
= sh_deferred_function_attributes
;
7472 /* Some attributes imply or require the interrupt attribute. */
7473 if (!lookup_attribute ("interrupt_handler", attrs
)
7474 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node
)))
7476 /* If we have a trapa_handler, but no interrupt_handler attribute,
7477 insert an interrupt_handler attribute. */
7478 if (lookup_attribute ("trapa_handler", attrs
) != NULL_TREE
)
7479 /* We can't use sh_pr_interrupt here because that's not in the
7482 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE
, attrs
);
7483 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7484 interrupt attribute is missing, we ignore the attribute and warn. */
7485 else if (lookup_attribute ("sp_switch", attrs
)
7486 || lookup_attribute ("trap_exit", attrs
)
7487 || lookup_attribute ("nosave_low_regs", attrs
))
7491 for (tail
= attributes
; attrs
; attrs
= TREE_CHAIN (attrs
))
7493 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs
))
7494 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs
))
7495 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs
)))
7496 warning (OPT_Wattributes
,
7497 "%qs attribute only applies to interrupt functions",
7498 IDENTIFIER_POINTER (TREE_PURPOSE (attrs
)));
7501 *tail
= tree_cons (TREE_PURPOSE (attrs
), NULL_TREE
,
7503 tail
= &TREE_CHAIN (*tail
);
7506 attrs
= *attributes
;
7510 /* Install the processed list. */
7511 *attributes
= attrs
;
7513 /* Clear deferred attributes. */
7514 sh_deferred_function_attributes
= NULL_TREE
;
7515 sh_deferred_function_attributes_tail
= &sh_deferred_function_attributes
;
7520 /* Supported attributes:
7522 interrupt_handler -- specifies this function is an interrupt handler.
7524 trapa_handler - like above, but don't save all registers.
7526 sp_switch -- specifies an alternate stack for an interrupt handler
7529 trap_exit -- use a trapa to exit an interrupt function instead of
7532 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7533 This is useful on the SH3 and upwards,
7534 which has a separate set of low regs for User and Supervisor modes.
7535 This should only be used for the lowest level of interrupts. Higher levels
7536 of interrupts must save the registers in case they themselves are
7539 renesas -- use Renesas calling/layout conventions (functions and
7544 const struct attribute_spec sh_attribute_table
[] =
7546 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7547 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute
},
7548 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute
},
7549 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute
},
7550 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute
},
7551 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute
},
7552 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute
},
7554 /* Symbian support adds three new attributes:
7555 dllexport - for exporting a function/variable that will live in a dll
7556 dllimport - for importing a function/variable from a dll
7558 Microsoft allows multiple declspecs in one __declspec, separating
7559 them with spaces. We do NOT support this. Instead, use __declspec
7561 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute
},
7562 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute
},
7564 { NULL
, 0, 0, false, false, false, NULL
}
7567 /* Handle an "interrupt_handler" attribute; arguments as in
7568 struct attribute_spec.handler. */
7570 sh_handle_interrupt_handler_attribute (tree
*node
, tree name
,
7571 tree args ATTRIBUTE_UNUSED
,
7572 int flags ATTRIBUTE_UNUSED
,
7575 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7577 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
7578 IDENTIFIER_POINTER (name
));
7579 *no_add_attrs
= true;
7581 else if (TARGET_SHCOMPACT
)
7583 error ("attribute interrupt_handler is not compatible with -m5-compact");
7584 *no_add_attrs
= true;
7590 /* Handle an "sp_switch" attribute; arguments as in
7591 struct attribute_spec.handler. */
7593 sh_handle_sp_switch_attribute (tree
*node
, tree name
, tree args
,
7594 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7596 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7598 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
7599 IDENTIFIER_POINTER (name
));
7600 *no_add_attrs
= true;
7602 else if (TREE_CODE (TREE_VALUE (args
)) != STRING_CST
)
7604 /* The argument must be a constant string. */
7605 warning (OPT_Wattributes
, "%qs attribute argument not a string constant",
7606 IDENTIFIER_POINTER (name
));
7607 *no_add_attrs
= true;
7613 /* Handle an "trap_exit" attribute; arguments as in
7614 struct attribute_spec.handler. */
7616 sh_handle_trap_exit_attribute (tree
*node
, tree name
, tree args
,
7617 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7619 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7621 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
7622 IDENTIFIER_POINTER (name
));
7623 *no_add_attrs
= true;
7625 /* The argument specifies a trap number to be used in a trapa instruction
7626 at function exit (instead of an rte instruction). */
7627 else if (TREE_CODE (TREE_VALUE (args
)) != INTEGER_CST
)
7629 /* The argument must be a constant integer. */
7630 warning (OPT_Wattributes
, "%qs attribute argument not an "
7631 "integer constant", IDENTIFIER_POINTER (name
));
7632 *no_add_attrs
= true;
7639 sh_handle_renesas_attribute (tree
*node ATTRIBUTE_UNUSED
,
7640 tree name ATTRIBUTE_UNUSED
,
7641 tree args ATTRIBUTE_UNUSED
,
7642 int flags ATTRIBUTE_UNUSED
,
7643 bool *no_add_attrs ATTRIBUTE_UNUSED
)
7648 /* True if __attribute__((renesas)) or -mrenesas. */
7650 sh_attr_renesas_p (tree td
)
7657 td
= TREE_TYPE (td
);
7658 if (td
== error_mark_node
)
7660 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td
))
7664 /* True if __attribute__((renesas)) or -mrenesas, for the current
7667 sh_cfun_attr_renesas_p (void)
7669 return sh_attr_renesas_p (current_function_decl
);
7673 sh_cfun_interrupt_handler_p (void)
7675 return (lookup_attribute ("interrupt_handler",
7676 DECL_ATTRIBUTES (current_function_decl
))
7680 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7683 sh_check_pch_target_flags (int old_flags
)
7685 if ((old_flags
^ target_flags
) & (MASK_SH1
| MASK_SH2
| MASK_SH3
7686 | MASK_SH_E
| MASK_HARD_SH4
7687 | MASK_FPU_SINGLE
| MASK_SH4
))
7688 return _("created and used with different architectures / ABIs");
7689 if ((old_flags
^ target_flags
) & MASK_HITACHI
)
7690 return _("created and used with different ABIs");
7691 if ((old_flags
^ target_flags
) & MASK_LITTLE_ENDIAN
)
7692 return _("created and used with different endianness");
7696 /* Predicates used by the templates. */
7698 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7699 Used only in general_movsrc_operand. */
7702 system_reg_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7714 /* Nonzero if OP is a floating point value with value 0.0. */
7717 fp_zero_operand (rtx op
)
7721 if (GET_MODE (op
) != SFmode
)
7724 REAL_VALUE_FROM_CONST_DOUBLE (r
, op
);
7725 return REAL_VALUES_EQUAL (r
, dconst0
) && ! REAL_VALUE_MINUS_ZERO (r
);
7728 /* Nonzero if OP is a floating point value with value 1.0. */
7731 fp_one_operand (rtx op
)
7735 if (GET_MODE (op
) != SFmode
)
7738 REAL_VALUE_FROM_CONST_DOUBLE (r
, op
);
7739 return REAL_VALUES_EQUAL (r
, dconst1
);
7742 /* For -m4 and -m4-single-only, mode switching is used. If we are
7743 compiling without -mfmovd, movsf_ie isn't taken into account for
7744 mode switching. We could check in machine_dependent_reorg for
7745 cases where we know we are in single precision mode, but there is
7746 interface to find that out during reload, so we must avoid
7747 choosing an fldi alternative during reload and thus failing to
7748 allocate a scratch register for the constant loading. */
7752 return ! TARGET_SH4
|| TARGET_FMOVD
|| reload_completed
;
7756 tertiary_reload_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7758 enum rtx_code code
= GET_CODE (op
);
7759 return code
== MEM
|| (TARGET_SH4
&& code
== CONST_DOUBLE
);
7762 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7764 tls_symbolic_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7766 if (GET_CODE (op
) != SYMBOL_REF
)
7768 return SYMBOL_REF_TLS_MODEL (op
);
7771 /* Return the destination address of a branch. */
7774 branch_dest (rtx branch
)
7776 rtx dest
= SET_SRC (PATTERN (branch
));
7779 if (GET_CODE (dest
) == IF_THEN_ELSE
)
7780 dest
= XEXP (dest
, 1);
7781 dest
= XEXP (dest
, 0);
7782 dest_uid
= INSN_UID (dest
);
7783 return INSN_ADDRESSES (dest_uid
);
7786 /* Return nonzero if REG is not used after INSN.
7787 We assume REG is a reload reg, and therefore does
7788 not live past labels. It may live past calls or jumps though. */
7790 reg_unused_after (rtx reg
, rtx insn
)
7795 /* If the reg is set by this instruction, then it is safe for our
7796 case. Disregard the case where this is a store to memory, since
7797 we are checking a register used in the store address. */
7798 set
= single_set (insn
);
7799 if (set
&& GET_CODE (SET_DEST (set
)) != MEM
7800 && reg_overlap_mentioned_p (reg
, SET_DEST (set
)))
7803 while ((insn
= NEXT_INSN (insn
)))
7809 code
= GET_CODE (insn
);
7812 /* If this is a label that existed before reload, then the register
7813 if dead here. However, if this is a label added by reorg, then
7814 the register may still be live here. We can't tell the difference,
7815 so we just ignore labels completely. */
7816 if (code
== CODE_LABEL
)
7821 if (code
== JUMP_INSN
)
7824 /* If this is a sequence, we must handle them all at once.
7825 We could have for instance a call that sets the target register,
7826 and an insn in a delay slot that uses the register. In this case,
7827 we must return 0. */
7828 else if (code
== INSN
&& GET_CODE (PATTERN (insn
)) == SEQUENCE
)
7833 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
7835 rtx this_insn
= XVECEXP (PATTERN (insn
), 0, i
);
7836 rtx set
= single_set (this_insn
);
7838 if (GET_CODE (this_insn
) == CALL_INSN
)
7840 else if (GET_CODE (this_insn
) == JUMP_INSN
)
7842 if (INSN_ANNULLED_BRANCH_P (this_insn
))
7847 if (set
&& reg_overlap_mentioned_p (reg
, SET_SRC (set
)))
7849 if (set
&& reg_overlap_mentioned_p (reg
, SET_DEST (set
)))
7851 if (GET_CODE (SET_DEST (set
)) != MEM
)
7857 && reg_overlap_mentioned_p (reg
, PATTERN (this_insn
)))
7862 else if (code
== JUMP_INSN
)
7866 set
= single_set (insn
);
7867 if (set
&& reg_overlap_mentioned_p (reg
, SET_SRC (set
)))
7869 if (set
&& reg_overlap_mentioned_p (reg
, SET_DEST (set
)))
7870 return GET_CODE (SET_DEST (set
)) != MEM
;
7871 if (set
== 0 && reg_overlap_mentioned_p (reg
, PATTERN (insn
)))
7874 if (code
== CALL_INSN
&& call_really_used_regs
[REGNO (reg
)])
7882 static GTY(()) rtx fpscr_rtx
;
7884 get_fpscr_rtx (void)
7888 fpscr_rtx
= gen_rtx_REG (PSImode
, FPSCR_REG
);
7889 REG_USERVAR_P (fpscr_rtx
) = 1;
7890 mark_user_reg (fpscr_rtx
);
7892 if (! reload_completed
|| mdep_reorg_phase
!= SH_AFTER_MDEP_REORG
)
7893 mark_user_reg (fpscr_rtx
);
7897 static GTY(()) tree fpscr_values
;
7900 emit_fpu_switch (rtx scratch
, int index
)
7904 if (fpscr_values
== NULL
)
7908 t
= build_index_type (integer_one_node
);
7909 t
= build_array_type (integer_type_node
, t
);
7910 t
= build_decl (VAR_DECL
, get_identifier ("__fpscr_values"), t
);
7911 DECL_ARTIFICIAL (t
) = 1;
7912 DECL_IGNORED_P (t
) = 1;
7913 DECL_EXTERNAL (t
) = 1;
7914 TREE_STATIC (t
) = 1;
7915 TREE_PUBLIC (t
) = 1;
7921 src
= DECL_RTL (fpscr_values
);
7924 emit_move_insn (scratch
, XEXP (src
, 0));
7926 emit_insn (gen_addsi3 (scratch
, scratch
, GEN_INT (index
* 4)));
7927 src
= adjust_automodify_address (src
, PSImode
, scratch
, index
* 4);
7930 src
= adjust_address (src
, PSImode
, index
* 4);
7932 dst
= get_fpscr_rtx ();
7933 emit_move_insn (dst
, src
);
7937 emit_sf_insn (rtx pat
)
7943 emit_df_insn (rtx pat
)
7949 expand_sf_unop (rtx (*fun
) (rtx
, rtx
, rtx
), rtx
*operands
)
7951 emit_sf_insn ((*fun
) (operands
[0], operands
[1], get_fpscr_rtx ()));
7955 expand_sf_binop (rtx (*fun
) (rtx
, rtx
, rtx
, rtx
), rtx
*operands
)
7957 emit_sf_insn ((*fun
) (operands
[0], operands
[1], operands
[2],
7962 expand_df_unop (rtx (*fun
) (rtx
, rtx
, rtx
), rtx
*operands
)
7964 emit_df_insn ((*fun
) (operands
[0], operands
[1], get_fpscr_rtx ()));
7968 expand_df_binop (rtx (*fun
) (rtx
, rtx
, rtx
, rtx
), rtx
*operands
)
7970 emit_df_insn ((*fun
) (operands
[0], operands
[1], operands
[2],
7974 /* ??? gcc does flow analysis strictly after common subexpression
7975 elimination. As a result, common subexpression elimination fails
7976 when there are some intervening statements setting the same register.
7977 If we did nothing about this, this would hurt the precision switching
7978 for SH4 badly. There is some cse after reload, but it is unable to
7979 undo the extra register pressure from the unused instructions, and
7980 it cannot remove auto-increment loads.
7982 A C code example that shows this flow/cse weakness for (at least) SH
7983 and sparc (as of gcc ss-970706) is this:
7997 So we add another pass before common subexpression elimination, to
7998 remove assignments that are dead due to a following assignment in the
7999 same basic block. */
8002 mark_use (rtx x
, rtx
*reg_set_block
)
8008 code
= GET_CODE (x
);
8013 int regno
= REGNO (x
);
8014 int nregs
= (regno
< FIRST_PSEUDO_REGISTER
8015 ? HARD_REGNO_NREGS (regno
, GET_MODE (x
))
8019 reg_set_block
[regno
+ nregs
- 1] = 0;
8026 rtx dest
= SET_DEST (x
);
8028 if (GET_CODE (dest
) == SUBREG
)
8029 dest
= SUBREG_REG (dest
);
8030 if (GET_CODE (dest
) != REG
)
8031 mark_use (dest
, reg_set_block
);
8032 mark_use (SET_SRC (x
), reg_set_block
);
8039 const char *fmt
= GET_RTX_FORMAT (code
);
8041 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
8044 mark_use (XEXP (x
, i
), reg_set_block
);
8045 else if (fmt
[i
] == 'E')
8046 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
8047 mark_use (XVECEXP (x
, i
, j
), reg_set_block
);
8054 static rtx
get_free_reg (HARD_REG_SET
);
8056 /* This function returns a register to use to load the address to load
8057 the fpscr from. Currently it always returns r1 or r7, but when we are
8058 able to use pseudo registers after combine, or have a better mechanism
8059 for choosing a register, it should be done here. */
8060 /* REGS_LIVE is the liveness information for the point for which we
8061 need this allocation. In some bare-bones exit blocks, r1 is live at the
8062 start. We can even have all of r0..r3 being live:
8063 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8064 INSN before which new insns are placed with will clobber the register
8065 we return. If a basic block consists only of setting the return value
8066 register to a pseudo and using that register, the return value is not
8067 live before or after this block, yet we we'll insert our insns right in
8071 get_free_reg (HARD_REG_SET regs_live
)
8073 if (! TEST_HARD_REG_BIT (regs_live
, 1))
8074 return gen_rtx_REG (Pmode
, 1);
8076 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8077 there shouldn't be anything but a jump before the function end. */
8078 gcc_assert (!TEST_HARD_REG_BIT (regs_live
, 7));
8079 return gen_rtx_REG (Pmode
, 7);
8082 /* This function will set the fpscr from memory.
8083 MODE is the mode we are setting it to. */
8085 fpscr_set_from_mem (int mode
, HARD_REG_SET regs_live
)
8087 enum attr_fp_mode fp_mode
= mode
;
8088 enum attr_fp_mode norm_mode
= ACTUAL_NORMAL_MODE (FP_MODE
);
8089 rtx addr_reg
= get_free_reg (regs_live
);
8091 emit_fpu_switch (addr_reg
, fp_mode
== norm_mode
);
8094 /* Is the given character a logical line separator for the assembler? */
8095 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8096 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8100 sh_insn_length_adjustment (rtx insn
)
8102 /* Instructions with unfilled delay slots take up an extra two bytes for
8103 the nop in the delay slot. */
8104 if (((GET_CODE (insn
) == INSN
8105 && GET_CODE (PATTERN (insn
)) != USE
8106 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
8107 || GET_CODE (insn
) == CALL_INSN
8108 || (GET_CODE (insn
) == JUMP_INSN
8109 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
8110 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
))
8111 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn
)))) != SEQUENCE
8112 && get_attr_needs_delay_slot (insn
) == NEEDS_DELAY_SLOT_YES
)
8115 /* SH2e has a bug that prevents the use of annulled branches, so if
8116 the delay slot is not filled, we'll have to put a NOP in it. */
8117 if (sh_cpu
== CPU_SH2E
8118 && GET_CODE (insn
) == JUMP_INSN
8119 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
8120 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
8121 && get_attr_type (insn
) == TYPE_CBRANCH
8122 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn
)))) != SEQUENCE
)
8125 /* sh-dsp parallel processing insn take four bytes instead of two. */
8127 if (GET_CODE (insn
) == INSN
)
8130 rtx body
= PATTERN (insn
);
8131 const char *template;
8133 int maybe_label
= 1;
8135 if (GET_CODE (body
) == ASM_INPUT
)
8136 template = XSTR (body
, 0);
8137 else if (asm_noperands (body
) >= 0)
8139 = decode_asm_operands (body
, NULL
, NULL
, NULL
, NULL
);
8148 while (c
== ' ' || c
== '\t');
8149 /* all sh-dsp parallel-processing insns start with p.
8150 The only non-ppi sh insn starting with p is pref.
8151 The only ppi starting with pr is prnd. */
8152 if ((c
== 'p' || c
== 'P') && strncasecmp ("re", template, 2))
8154 /* The repeat pseudo-insn expands two three insns, a total of
8155 six bytes in size. */
8156 else if ((c
== 'r' || c
== 'R')
8157 && ! strncasecmp ("epeat", template, 5))
8159 while (c
&& c
!= '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c
))
8161 /* If this is a label, it is obviously not a ppi insn. */
8162 if (c
== ':' && maybe_label
)
8167 else if (c
== '\'' || c
== '"')
8172 maybe_label
= c
!= ':';
8180 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8181 isn't protected by a PIC unspec. */
8183 nonpic_symbol_mentioned_p (rtx x
)
8185 register const char *fmt
;
8188 if (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
8189 || GET_CODE (x
) == PC
)
8192 /* We don't want to look into the possible MEM location of a
8193 CONST_DOUBLE, since we're not going to use it, in general. */
8194 if (GET_CODE (x
) == CONST_DOUBLE
)
8197 if (GET_CODE (x
) == UNSPEC
8198 && (XINT (x
, 1) == UNSPEC_PIC
8199 || XINT (x
, 1) == UNSPEC_GOT
8200 || XINT (x
, 1) == UNSPEC_GOTOFF
8201 || XINT (x
, 1) == UNSPEC_GOTPLT
8202 || XINT (x
, 1) == UNSPEC_GOTTPOFF
8203 || XINT (x
, 1) == UNSPEC_DTPOFF
8204 || XINT (x
, 1) == UNSPEC_PLT
))
8207 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
8208 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
8214 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
8215 if (nonpic_symbol_mentioned_p (XVECEXP (x
, i
, j
)))
8218 else if (fmt
[i
] == 'e' && nonpic_symbol_mentioned_p (XEXP (x
, i
)))
8225 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8226 @GOTOFF in `reg'. */
8228 legitimize_pic_address (rtx orig
, enum machine_mode mode ATTRIBUTE_UNUSED
,
8231 if (tls_symbolic_operand (orig
, Pmode
))
8234 if (GET_CODE (orig
) == LABEL_REF
8235 || (GET_CODE (orig
) == SYMBOL_REF
&& SYMBOL_REF_LOCAL_P (orig
)))
8238 reg
= gen_reg_rtx (Pmode
);
8240 emit_insn (gen_symGOTOFF2reg (reg
, orig
));
8243 else if (GET_CODE (orig
) == SYMBOL_REF
)
8246 reg
= gen_reg_rtx (Pmode
);
8248 emit_insn (gen_symGOT2reg (reg
, orig
));
8254 /* Mark the use of a constant in the literal table. If the constant
8255 has multiple labels, make it unique. */
8257 mark_constant_pool_use (rtx x
)
8259 rtx insn
, lab
, pattern
;
8264 switch (GET_CODE (x
))
8274 /* Get the first label in the list of labels for the same constant
8275 and delete another labels in the list. */
8277 for (insn
= PREV_INSN (x
); insn
; insn
= PREV_INSN (insn
))
8279 if (GET_CODE (insn
) != CODE_LABEL
8280 || LABEL_REFS (insn
) != NEXT_INSN (insn
))
8285 for (insn
= LABEL_REFS (lab
); insn
; insn
= LABEL_REFS (insn
))
8286 INSN_DELETED_P (insn
) = 1;
8288 /* Mark constants in a window. */
8289 for (insn
= NEXT_INSN (x
); insn
; insn
= NEXT_INSN (insn
))
8291 if (GET_CODE (insn
) != INSN
)
8294 pattern
= PATTERN (insn
);
8295 if (GET_CODE (pattern
) != UNSPEC_VOLATILE
)
8298 switch (XINT (pattern
, 1))
8300 case UNSPECV_CONST2
:
8301 case UNSPECV_CONST4
:
8302 case UNSPECV_CONST8
:
8303 XVECEXP (pattern
, 0, 1) = const1_rtx
;
8305 case UNSPECV_WINDOW_END
:
8306 if (XVECEXP (pattern
, 0, 0) == x
)
8309 case UNSPECV_CONST_END
:
8319 /* Return true if it's possible to redirect BRANCH1 to the destination
8320 of an unconditional jump BRANCH2. We only want to do this if the
8321 resulting branch will have a short displacement. */
8323 sh_can_redirect_branch (rtx branch1
, rtx branch2
)
8325 if (flag_expensive_optimizations
&& simplejump_p (branch2
))
8327 rtx dest
= XEXP (SET_SRC (single_set (branch2
)), 0);
8331 for (distance
= 0, insn
= NEXT_INSN (branch1
);
8332 insn
&& distance
< 256;
8333 insn
= PREV_INSN (insn
))
8338 distance
+= get_attr_length (insn
);
8340 for (distance
= 0, insn
= NEXT_INSN (branch1
);
8341 insn
&& distance
< 256;
8342 insn
= NEXT_INSN (insn
))
8347 distance
+= get_attr_length (insn
);
8353 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8355 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED
,
8356 unsigned int new_reg
)
8358 /* Interrupt functions can only use registers that have already been
8359 saved by the prologue, even if they would normally be
8362 if (sh_cfun_interrupt_handler_p () && !regs_ever_live
[new_reg
])
8368 /* Function to update the integer COST
8369 based on the relationship between INSN that is dependent on
8370 DEP_INSN through the dependence LINK. The default is to make no
8371 adjustment to COST. This can be used for example to specify to
8372 the scheduler that an output- or anti-dependence does not incur
8373 the same cost as a data-dependence. The return value should be
8374 the new value for COST. */
8376 sh_adjust_cost (rtx insn
, rtx link ATTRIBUTE_UNUSED
, rtx dep_insn
, int cost
)
8382 /* On SHmedia, if the dependence is an anti-dependence or
8383 output-dependence, there is no cost. */
8384 if (REG_NOTE_KIND (link
) != 0)
8386 /* However, dependencies between target register loads and
8387 uses of the register in a subsequent block that are separated
8388 by a conditional branch are not modelled - we have to do with
8389 the anti-dependency between the target register load and the
8390 conditional branch that ends the current block. */
8391 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
8392 && GET_CODE (PATTERN (dep_insn
)) == SET
8393 && (get_attr_type (dep_insn
) == TYPE_PT_MEDIA
8394 || get_attr_type (dep_insn
) == TYPE_PTABS_MEDIA
)
8395 && get_attr_type (insn
) == TYPE_CBRANCH_MEDIA
)
8397 int orig_cost
= cost
;
8398 rtx note
= find_reg_note (insn
, REG_BR_PROB
, 0);
8399 rtx target
= ((! note
8400 || INTVAL (XEXP (note
, 0)) * 2 < REG_BR_PROB_BASE
)
8401 ? insn
: JUMP_LABEL (insn
));
8402 /* On the likely path, the branch costs 1, on the unlikely path,
8406 target
= next_active_insn (target
);
8407 while (target
&& ! flow_dependent_p (target
, dep_insn
)
8409 /* If two branches are executed in immediate succession, with the
8410 first branch properly predicted, this causes a stall at the
8411 second branch, hence we won't need the target for the
8412 second branch for two cycles after the launch of the first
8414 if (cost
> orig_cost
- 2)
8415 cost
= orig_cost
- 2;
8421 else if (get_attr_is_mac_media (insn
)
8422 && get_attr_is_mac_media (dep_insn
))
8425 else if (! reload_completed
8426 && GET_CODE (PATTERN (insn
)) == SET
8427 && GET_CODE (SET_SRC (PATTERN (insn
))) == FLOAT
8428 && GET_CODE (PATTERN (dep_insn
)) == SET
8429 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn
)), VOIDmode
)
8432 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8433 that is needed at the target. */
8434 else if (get_attr_type (insn
) == TYPE_JUMP_MEDIA
8435 && ! flow_dependent_p (insn
, dep_insn
))
8438 else if (REG_NOTE_KIND (link
) == 0)
8440 enum attr_type dep_type
, type
;
8442 if (recog_memoized (insn
) < 0
8443 || recog_memoized (dep_insn
) < 0)
8446 dep_type
= get_attr_type (dep_insn
);
8447 if (dep_type
== TYPE_FLOAD
|| dep_type
== TYPE_PCFLOAD
)
8449 if ((dep_type
== TYPE_LOAD_SI
|| dep_type
== TYPE_PCLOAD_SI
)
8450 && (type
= get_attr_type (insn
)) != TYPE_CALL
8451 && type
!= TYPE_SFUNC
)
8454 /* The only input for a call that is timing-critical is the
8455 function's address. */
8456 if (GET_CODE(insn
) == CALL_INSN
)
8458 rtx call
= PATTERN (insn
);
8460 if (GET_CODE (call
) == PARALLEL
)
8461 call
= XVECEXP (call
, 0 ,0);
8462 if (GET_CODE (call
) == SET
)
8463 call
= SET_SRC (call
);
8464 if (GET_CODE (call
) == CALL
&& GET_CODE (XEXP (call
, 0)) == MEM
8465 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8466 && (GET_CODE (XEXP (XEXP (call
, 0), 0)) == UNSPEC
8467 || ! reg_set_p (XEXP (XEXP (call
, 0), 0), dep_insn
)))
8470 /* Likewise, the most timing critical input for an sfuncs call
8471 is the function address. However, sfuncs typically start
8472 using their arguments pretty quickly.
8473 Assume a four cycle delay before they are needed. */
8474 /* All sfunc calls are parallels with at least four components.
8475 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8476 else if (GET_CODE (PATTERN (insn
)) == PARALLEL
8477 && XVECLEN (PATTERN (insn
), 0) >= 4
8478 && (reg
= sfunc_uses_reg (insn
)))
8480 if (! reg_set_p (reg
, dep_insn
))
8483 /* When the preceding instruction loads the shift amount of
8484 the following SHAD/SHLD, the latency of the load is increased
8487 && get_attr_type (insn
) == TYPE_DYN_SHIFT
8488 && get_attr_any_int_load (dep_insn
) == ANY_INT_LOAD_YES
8489 && reg_overlap_mentioned_p (SET_DEST (single_set (dep_insn
)),
8490 XEXP (SET_SRC (single_set (insn
)),
8493 /* When an LS group instruction with a latency of less than
8494 3 cycles is followed by a double-precision floating-point
8495 instruction, FIPR, or FTRV, the latency of the first
8496 instruction is increased to 3 cycles. */
8498 && get_attr_insn_class (dep_insn
) == INSN_CLASS_LS_GROUP
8499 && get_attr_dfp_comp (insn
) == DFP_COMP_YES
)
8501 /* The lsw register of a double-precision computation is ready one
8503 else if (reload_completed
8504 && get_attr_dfp_comp (dep_insn
) == DFP_COMP_YES
8505 && (use_pat
= single_set (insn
))
8506 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn
))),
8510 if (get_attr_any_fp_comp (dep_insn
) == ANY_FP_COMP_YES
8511 && get_attr_late_fp_use (insn
) == LATE_FP_USE_YES
)
8514 /* An anti-dependence penalty of two applies if the first insn is a double
8515 precision fadd / fsub / fmul. */
8516 else if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
8517 && recog_memoized (dep_insn
) >= 0
8518 && get_attr_type (dep_insn
) == TYPE_DFP_ARITH
8519 /* A lot of alleged anti-flow dependences are fake,
8520 so check this one is real. */
8521 && flow_dependent_p (dep_insn
, insn
))
8528 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8529 if DEP_INSN is anti-flow dependent on INSN. */
8531 flow_dependent_p (rtx insn
, rtx dep_insn
)
8533 rtx tmp
= PATTERN (insn
);
8535 note_stores (PATTERN (dep_insn
), flow_dependent_p_1
, &tmp
);
8536 return tmp
== NULL_RTX
;
8539 /* A helper function for flow_dependent_p called through note_stores. */
8541 flow_dependent_p_1 (rtx x
, rtx pat ATTRIBUTE_UNUSED
, void *data
)
8543 rtx
* pinsn
= (rtx
*) data
;
8545 if (*pinsn
&& reg_referenced_p (x
, *pinsn
))
8549 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8550 'special function' patterns (type sfunc) that clobber pr, but that
8551 do not look like function calls to leaf_function_p. Hence we must
8552 do this extra check. */
8556 return REG_N_SETS (TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
);
8559 /* Return where to allocate pseudo for a given hard register initial
8562 sh_allocate_initial_value (rtx hard_reg
)
8566 if (REGNO (hard_reg
) == (TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
))
8568 if (current_function_is_leaf
8569 && ! sh_pr_n_sets ()
8570 && ! (TARGET_SHCOMPACT
8571 && ((current_function_args_info
.call_cookie
8572 & ~ CALL_COOKIE_RET_TRAMP (1))
8573 || current_function_has_nonlocal_label
)))
8576 x
= gen_frame_mem (Pmode
, return_address_pointer_rtx
);
8584 /* This function returns "2" to indicate dual issue for the SH4
8585 processor. To be used by the DFA pipeline description. */
8587 sh_issue_rate (void)
8589 if (TARGET_SUPERSCALAR
)
8595 /* Functions for ready queue reordering for sched1. */
8597 /* Get weight for mode for a set x. */
8599 find_set_regmode_weight (rtx x
, enum machine_mode mode
)
8601 if (GET_CODE (x
) == CLOBBER
&& register_operand (SET_DEST (x
), mode
))
8603 if (GET_CODE (x
) == SET
&& register_operand (SET_DEST (x
), mode
))
8605 if (GET_CODE (SET_DEST (x
)) == REG
)
8607 if (!reg_mentioned_p (SET_DEST (x
), SET_SRC (x
)))
8617 /* Get regmode weight for insn. */
8619 find_insn_regmode_weight (rtx insn
, enum machine_mode mode
)
8621 short reg_weight
= 0;
8624 /* Increment weight for each register born here. */
8626 reg_weight
+= find_set_regmode_weight (x
, mode
);
8627 if (GET_CODE (x
) == PARALLEL
)
8630 for (j
= XVECLEN (x
, 0) - 1; j
>= 0; j
--)
8632 x
= XVECEXP (PATTERN (insn
), 0, j
);
8633 reg_weight
+= find_set_regmode_weight (x
, mode
);
8636 /* Decrement weight for each register that dies here. */
8637 for (x
= REG_NOTES (insn
); x
; x
= XEXP (x
, 1))
8639 if (REG_NOTE_KIND (x
) == REG_DEAD
|| REG_NOTE_KIND (x
) == REG_UNUSED
)
8641 rtx note
= XEXP (x
, 0);
8642 if (GET_CODE (note
) == REG
&& GET_MODE (note
) == mode
)
8649 /* Calculate regmode weights for all insns of a basic block. */
8651 find_regmode_weight (int b
, enum machine_mode mode
)
8653 rtx insn
, next_tail
, head
, tail
;
8655 get_block_head_tail (b
, &head
, &tail
);
8656 next_tail
= NEXT_INSN (tail
);
8658 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
8660 /* Handle register life information. */
8665 INSN_REGMODE_WEIGHT (insn
, mode
) =
8666 find_insn_regmode_weight (insn
, mode
) + 2 * find_insn_regmode_weight (insn
, DFmode
);
8667 else if (mode
== SImode
)
8668 INSN_REGMODE_WEIGHT (insn
, mode
) =
8669 find_insn_regmode_weight (insn
, mode
) + 2 * find_insn_regmode_weight (insn
, DImode
);
8673 /* Comparison function for ready queue sorting. */
8675 rank_for_reorder (const void *x
, const void *y
)
8677 rtx tmp
= *(const rtx
*) y
;
8678 rtx tmp2
= *(const rtx
*) x
;
8680 /* The insn in a schedule group should be issued the first. */
8681 if (SCHED_GROUP_P (tmp
) != SCHED_GROUP_P (tmp2
))
8682 return SCHED_GROUP_P (tmp2
) ? 1 : -1;
8684 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8685 minimizes instruction movement, thus minimizing sched's effect on
8686 register pressure. */
8687 return INSN_LUID (tmp
) - INSN_LUID (tmp2
);
8690 /* Resort the array A in which only element at index N may be out of order. */
8692 swap_reorder (rtx
*a
, int n
)
8694 rtx insn
= a
[n
- 1];
8697 while (i
>= 0 && rank_for_reorder (a
+ i
, &insn
) >= 0)
8705 #define SCHED_REORDER(READY, N_READY) \
8708 if ((N_READY) == 2) \
8709 swap_reorder (READY, N_READY); \
8710 else if ((N_READY) > 2) \
8711 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8715 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8718 ready_reorder (rtx
*ready
, int nready
)
8720 SCHED_REORDER (ready
, nready
);
8723 /* Calculate regmode weights for all insns of all basic block. */
8725 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED
,
8726 int verbose ATTRIBUTE_UNUSED
,
8731 regmode_weight
[0] = (short *) xcalloc (old_max_uid
, sizeof (short));
8732 regmode_weight
[1] = (short *) xcalloc (old_max_uid
, sizeof (short));
8734 FOR_EACH_BB_REVERSE (b
)
8736 find_regmode_weight (b
->index
, SImode
);
8737 find_regmode_weight (b
->index
, SFmode
);
8740 CURR_REGMODE_PRESSURE (SImode
) = 0;
8741 CURR_REGMODE_PRESSURE (SFmode
) = 0;
8747 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED
,
8748 int verbose ATTRIBUTE_UNUSED
)
8750 if (regmode_weight
[0])
8752 free (regmode_weight
[0]);
8753 regmode_weight
[0] = NULL
;
8755 if (regmode_weight
[1])
8757 free (regmode_weight
[1]);
8758 regmode_weight
[1] = NULL
;
8762 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8763 keep count of register pressures on SImode and SFmode. */
8765 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
8766 int sched_verbose ATTRIBUTE_UNUSED
,
8770 if (GET_CODE (PATTERN (insn
)) != USE
8771 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
8772 cached_can_issue_more
= can_issue_more
- 1;
8774 cached_can_issue_more
= can_issue_more
;
8776 if (reload_completed
)
8777 return cached_can_issue_more
;
8779 CURR_REGMODE_PRESSURE (SImode
) += INSN_REGMODE_WEIGHT (insn
, SImode
);
8780 CURR_REGMODE_PRESSURE (SFmode
) += INSN_REGMODE_WEIGHT (insn
, SFmode
);
8782 return cached_can_issue_more
;
8786 sh_md_init (FILE *dump ATTRIBUTE_UNUSED
,
8787 int verbose ATTRIBUTE_UNUSED
,
8788 int veclen ATTRIBUTE_UNUSED
)
8790 CURR_REGMODE_PRESSURE (SImode
) = 0;
8791 CURR_REGMODE_PRESSURE (SFmode
) = 0;
8794 /* Some magic numbers. */
8795 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8796 functions that already have high pressure on r0. */
8797 #define R0_MAX_LIFE_REGIONS 2
8798 #define R0_MAX_LIVE_LENGTH 12
8799 /* Register Pressure thresholds for SImode and SFmode registers. */
8800 #define SIMODE_MAX_WEIGHT 5
8801 #define SFMODE_MAX_WEIGHT 10
8803 /* Return true if the pressure is high for MODE. */
8805 high_pressure (enum machine_mode mode
)
8807 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8808 functions that already have high pressure on r0. */
8809 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8810 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH
)
8814 return (CURR_REGMODE_PRESSURE (SFmode
) > SFMODE_MAX_WEIGHT
);
8816 return (CURR_REGMODE_PRESSURE (SImode
) > SIMODE_MAX_WEIGHT
);
8819 /* Reorder ready queue if register pressure is high. */
8821 sh_reorder (FILE *dump ATTRIBUTE_UNUSED
,
8822 int sched_verbose ATTRIBUTE_UNUSED
,
8825 int clock_var ATTRIBUTE_UNUSED
)
8827 if (reload_completed
)
8828 return sh_issue_rate ();
8830 if (high_pressure (SFmode
) || high_pressure (SImode
))
8832 ready_reorder (ready
, *n_readyp
);
8835 return sh_issue_rate ();
8838 /* Skip cycles if the current register pressure is high. */
8840 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
8841 int sched_verbose ATTRIBUTE_UNUSED
,
8842 rtx
*ready ATTRIBUTE_UNUSED
,
8843 int *n_readyp ATTRIBUTE_UNUSED
,
8844 int clock_var ATTRIBUTE_UNUSED
)
8846 if (reload_completed
)
8847 return cached_can_issue_more
;
8849 if (high_pressure(SFmode
) || high_pressure (SImode
))
8852 return cached_can_issue_more
;
8855 /* Skip cycles without sorting the ready queue. This will move insn from
8856 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8857 queue by sh_reorder. */
8859 /* Generally, skipping these many cycles are sufficient for all insns to move
8864 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED
,
8865 int sched_verbose ATTRIBUTE_UNUSED
,
8866 rtx insn ATTRIBUTE_UNUSED
,
8871 if (reload_completed
)
8876 if ((clock_var
- last_clock_var
) < MAX_SKIPS
)
8881 /* If this is the last cycle we are skipping, allow reordering of R. */
8882 if ((clock_var
- last_clock_var
) == MAX_SKIPS
)
8894 /* SHmedia requires registers for branches, so we can't generate new
8895 branches past reload. */
8897 sh_cannot_modify_jumps_p (void)
8899 return (TARGET_SHMEDIA
&& (reload_in_progress
|| reload_completed
));
8903 sh_target_reg_class (void)
8905 return TARGET_SHMEDIA
? TARGET_REGS
: NO_REGS
;
8909 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen
)
8914 if (! shmedia_space_reserved_for_target_registers
)
8916 if (after_prologue_epilogue_gen
&& ! TARGET_SAVE_ALL_TARGET_REGS
)
8918 if (calc_live_regs (&dummy
) >= 6 * 8)
8920 /* This is a borderline case. See if we got a nested loop, or a loop
8921 with a call, or with more than 4 labels inside. */
8922 for (insn
= get_insns(); insn
; insn
= NEXT_INSN (insn
))
8924 if (GET_CODE (insn
) == NOTE
8925 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_LOOP_BEG
)
8931 insn
= NEXT_INSN (insn
);
8932 if ((GET_CODE (insn
) == NOTE
8933 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_LOOP_BEG
)
8934 || GET_CODE (insn
) == CALL_INSN
8935 || (GET_CODE (insn
) == CODE_LABEL
&& ++labels
> 4))
8938 while (GET_CODE (insn
) != NOTE
8939 || NOTE_LINE_NUMBER (insn
) != NOTE_INSN_LOOP_END
);
8946 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED
)
8948 return (TARGET_SH5
|| TARGET_HITACHI
|| sh_attr_renesas_p (record_type
));
8952 On the SH1..SH4, the trampoline looks like
8953 2 0002 D202 mov.l l2,r2
8954 1 0000 D301 mov.l l1,r3
8957 5 0008 00000000 l1: .long area
8958 6 000c 00000000 l2: .long function
8960 SH5 (compact) uses r1 instead of r3 for the static chain. */
8963 /* Emit RTL insns to initialize the variable parts of a trampoline.
8964 FNADDR is an RTX for the address of the function's pure code.
8965 CXT is an RTX for the static chain value for the function. */
8968 sh_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
8970 rtx tramp_mem
= gen_frame_mem (BLKmode
, tramp
);
8972 if (TARGET_SHMEDIA64
)
8977 rtx movi1
= GEN_INT (0xcc000010);
8978 rtx shori1
= GEN_INT (0xc8000010);
8981 /* The following trampoline works within a +- 128 KB range for cxt:
8982 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8983 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8984 gettr tr1,r1; blink tr0,r63 */
8985 /* Address rounding makes it hard to compute the exact bounds of the
8986 offset for this trampoline, but we have a rather generous offset
8987 range, so frame_offset should do fine as an upper bound. */
8988 if (cxt
== virtual_stack_vars_rtx
&& frame_offset
< 0x20000)
8990 /* ??? could optimize this trampoline initialization
8991 by writing DImode words with two insns each. */
8992 rtx mask
= force_reg (DImode
, GEN_INT (0x3fffc00));
8993 rtx insn
= gen_rtx_MINUS (DImode
, cxt
, tramp
);
8994 insn
= gen_rtx_ASHIFT (DImode
, insn
, GEN_INT (10-2));
8995 insn
= gen_rtx_AND (DImode
, insn
, mask
);
8996 /* Or in ptb/u .,tr1 pattern */
8997 insn
= gen_rtx_IOR (DImode
, insn
, gen_int_mode (0xec000010, SImode
));
8998 insn
= force_operand (insn
, NULL_RTX
);
8999 insn
= gen_lowpart (SImode
, insn
);
9000 emit_move_insn (change_address (tramp_mem
, SImode
, NULL_RTX
), insn
);
9001 insn
= gen_rtx_LSHIFTRT (DImode
, fnaddr
, GEN_INT (38));
9002 insn
= gen_rtx_AND (DImode
, insn
, mask
);
9003 insn
= force_operand (gen_rtx_IOR (DImode
, movi1
, insn
), NULL_RTX
);
9004 insn
= gen_lowpart (SImode
, insn
);
9005 emit_move_insn (adjust_address (tramp_mem
, SImode
, 4), insn
);
9006 insn
= gen_rtx_LSHIFTRT (DImode
, fnaddr
, GEN_INT (22));
9007 insn
= gen_rtx_AND (DImode
, insn
, mask
);
9008 insn
= force_operand (gen_rtx_IOR (DImode
, shori1
, insn
), NULL_RTX
);
9009 insn
= gen_lowpart (SImode
, insn
);
9010 emit_move_insn (adjust_address (tramp_mem
, SImode
, 8), insn
);
9011 insn
= gen_rtx_LSHIFTRT (DImode
, fnaddr
, GEN_INT (6));
9012 insn
= gen_rtx_AND (DImode
, insn
, mask
);
9013 insn
= force_operand (gen_rtx_IOR (DImode
, shori1
, insn
), NULL_RTX
);
9014 insn
= gen_lowpart (SImode
, insn
);
9015 emit_move_insn (adjust_address (tramp_mem
, SImode
, 12), insn
);
9016 insn
= gen_rtx_ASHIFT (DImode
, fnaddr
, GEN_INT (10));
9017 insn
= gen_rtx_AND (DImode
, insn
, mask
);
9018 insn
= force_operand (gen_rtx_IOR (DImode
, shori1
, insn
), NULL_RTX
);
9019 insn
= gen_lowpart (SImode
, insn
);
9020 emit_move_insn (adjust_address (tramp_mem
, SImode
, 16), insn
);
9021 emit_move_insn (adjust_address (tramp_mem
, SImode
, 20),
9022 GEN_INT (0x6bf10600));
9023 emit_move_insn (adjust_address (tramp_mem
, SImode
, 24),
9024 GEN_INT (0x4415fc10));
9025 emit_move_insn (adjust_address (tramp_mem
, SImode
, 28),
9026 GEN_INT (0x4401fff0));
9027 emit_insn (gen_ic_invalidate_line (tramp
));
9030 tramp_templ
= gen_rtx_SYMBOL_REF (Pmode
,"__GCC_nested_trampoline");
9031 fixed_len
= TRAMPOLINE_SIZE
- 2 * GET_MODE_SIZE (Pmode
);
9033 tramp_templ
= gen_datalabel_ref (tramp_templ
);
9035 src
= gen_const_mem (BLKmode
, tramp_templ
);
9036 set_mem_align (dst
, 256);
9037 set_mem_align (src
, 64);
9038 emit_block_move (dst
, src
, GEN_INT (fixed_len
), BLOCK_OP_NORMAL
);
9040 emit_move_insn (adjust_address (tramp_mem
, Pmode
, fixed_len
), fnaddr
);
9041 emit_move_insn (adjust_address (tramp_mem
, Pmode
,
9042 fixed_len
+ GET_MODE_SIZE (Pmode
)),
9044 emit_insn (gen_ic_invalidate_line (tramp
));
9047 else if (TARGET_SHMEDIA
)
9049 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9050 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9051 rtx quad0
= gen_reg_rtx (DImode
), cxtload
= gen_reg_rtx (DImode
);
9052 rtx quad1
= gen_reg_rtx (DImode
), quad2
= gen_reg_rtx (DImode
);
9053 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9054 rotated 10 right, and higher 16 bit of every 32 selected. */
9056 = force_reg (V2HImode
, (simplify_gen_subreg
9057 (V2HImode
, GEN_INT (0x4330432), SImode
, 0)));
9058 rtx ptabs
= force_reg (DImode
, GEN_INT (0x6bf10600));
9059 rtx blink
= force_reg (DImode
, GEN_INT (0x4401fff0));
9061 tramp
= force_reg (Pmode
, tramp
);
9062 fnaddr
= force_reg (SImode
, fnaddr
);
9063 cxt
= force_reg (SImode
, cxt
);
9064 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode
, quad0
, 0),
9065 gen_rtx_SUBREG (V2HImode
, fnaddr
, 0),
9067 emit_insn (gen_rotrdi3_mextr (quad0
, quad0
,
9068 GEN_INT (TARGET_LITTLE_ENDIAN
? 24 : 56)));
9069 emit_insn (gen_ashldi3_media (quad0
, quad0
, const2_rtx
));
9070 emit_move_insn (change_address (tramp_mem
, DImode
, NULL_RTX
), quad0
);
9071 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode
, cxtload
, 0),
9072 gen_rtx_SUBREG (V2HImode
, cxt
, 0),
9074 emit_insn (gen_rotrdi3_mextr (cxtload
, cxtload
,
9075 GEN_INT (TARGET_LITTLE_ENDIAN
? 24 : 56)));
9076 emit_insn (gen_ashldi3_media (cxtload
, cxtload
, const2_rtx
));
9077 if (TARGET_LITTLE_ENDIAN
)
9079 emit_insn (gen_mshflo_l_di (quad1
, ptabs
, cxtload
));
9080 emit_insn (gen_mextr4 (quad2
, cxtload
, blink
));
9084 emit_insn (gen_mextr4 (quad1
, cxtload
, ptabs
));
9085 emit_insn (gen_mshflo_l_di (quad2
, blink
, cxtload
));
9087 emit_move_insn (adjust_address (tramp_mem
, DImode
, 8), quad1
);
9088 emit_move_insn (adjust_address (tramp_mem
, DImode
, 16), quad2
);
9089 emit_insn (gen_ic_invalidate_line (tramp
));
9092 else if (TARGET_SHCOMPACT
)
9094 emit_insn (gen_initialize_trampoline (tramp
, cxt
, fnaddr
));
9097 emit_move_insn (change_address (tramp_mem
, SImode
, NULL_RTX
),
9098 gen_int_mode (TARGET_LITTLE_ENDIAN
? 0xd301d202 : 0xd202d301,
9100 emit_move_insn (adjust_address (tramp_mem
, SImode
, 4),
9101 gen_int_mode (TARGET_LITTLE_ENDIAN
? 0x0009422b : 0x422b0009,
9103 emit_move_insn (adjust_address (tramp_mem
, SImode
, 8), cxt
);
9104 emit_move_insn (adjust_address (tramp_mem
, SImode
, 12), fnaddr
);
9107 if (TARGET_USERMODE
)
9108 emit_library_call (function_symbol (NULL
, "__ic_invalidate",
9110 0, VOIDmode
, 1, tramp
, SImode
);
9112 emit_insn (gen_ic_invalidate_line (tramp
));
9116 /* FIXME: This is overly conservative. A SHcompact function that
9117 receives arguments ``by reference'' will have them stored in its
9118 own stack frame, so it must not pass pointers or references to
9119 these arguments to other functions by means of sibling calls. */
9120 /* If PIC, we cannot make sibling calls to global functions
9121 because the PLT requires r12 to be live. */
9123 sh_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
9126 && (! TARGET_SHCOMPACT
9127 || current_function_args_info
.stack_regs
== 0)
9128 && ! sh_cfun_interrupt_handler_p ()
9130 || (decl
&& ! TREE_PUBLIC (decl
))
9131 || (decl
&& DECL_VISIBILITY (decl
) != VISIBILITY_DEFAULT
)));
9134 /* Machine specific built-in functions. */
9136 struct builtin_description
9138 const enum insn_code icode
;
9139 const char *const name
;
9143 /* describe number and signedness of arguments; arg[0] == result
9144 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9145 /* 9: 64 bit pointer, 10: 32 bit pointer */
9146 static const char signature_args
[][4] =
9148 #define SH_BLTIN_V2SI2 0
9150 #define SH_BLTIN_V4HI2 1
9152 #define SH_BLTIN_V2SI3 2
9154 #define SH_BLTIN_V4HI3 3
9156 #define SH_BLTIN_V8QI3 4
9158 #define SH_BLTIN_MAC_HISI 5
9160 #define SH_BLTIN_SH_HI 6
9162 #define SH_BLTIN_SH_SI 7
9164 #define SH_BLTIN_V4HI2V2SI 8
9166 #define SH_BLTIN_V4HI2V8QI 9
9168 #define SH_BLTIN_SISF 10
9170 #define SH_BLTIN_LDUA_L 11
9172 #define SH_BLTIN_LDUA_Q 12
9174 #define SH_BLTIN_STUA_L 13
9176 #define SH_BLTIN_STUA_Q 14
9178 #define SH_BLTIN_LDUA_L64 15
9180 #define SH_BLTIN_LDUA_Q64 16
9182 #define SH_BLTIN_STUA_L64 17
9184 #define SH_BLTIN_STUA_Q64 18
9186 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9187 #define SH_BLTIN_2 19
9188 #define SH_BLTIN_SU 19
9190 #define SH_BLTIN_3 20
9191 #define SH_BLTIN_SUS 20
9193 #define SH_BLTIN_PSSV 21
9195 #define SH_BLTIN_XXUU 22
9196 #define SH_BLTIN_UUUU 22
9198 #define SH_BLTIN_PV 23
9201 /* mcmv: operands considered unsigned. */
9202 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9203 /* mperm: control value considered unsigned int. */
9204 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9205 /* mshards_q: returns signed short. */
9206 /* nsb: takes long long arg, returns unsigned char. */
9207 static const struct builtin_description bdesc
[] =
9209 { CODE_FOR_absv2si2
, "__builtin_absv2si2", SH_BLTIN_V2SI2
},
9210 { CODE_FOR_absv4hi2
, "__builtin_absv4hi2", SH_BLTIN_V4HI2
},
9211 { CODE_FOR_addv2si3
, "__builtin_addv2si3", SH_BLTIN_V2SI3
},
9212 { CODE_FOR_addv4hi3
, "__builtin_addv4hi3", SH_BLTIN_V4HI3
},
9213 { CODE_FOR_ssaddv2si3
,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3
},
9214 { CODE_FOR_usaddv8qi3
,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3
},
9215 { CODE_FOR_ssaddv4hi3
,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3
},
9216 { CODE_FOR_alloco_i
, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV
},
9217 { CODE_FOR_negcmpeqv8qi
,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3
},
9218 { CODE_FOR_negcmpeqv2si
,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3
},
9219 { CODE_FOR_negcmpeqv4hi
,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3
},
9220 { CODE_FOR_negcmpgtuv8qi
,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3
},
9221 { CODE_FOR_negcmpgtv2si
,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3
},
9222 { CODE_FOR_negcmpgtv4hi
,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3
},
9223 { CODE_FOR_mcmv
, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU
},
9224 { CODE_FOR_mcnvs_lw
, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3
},
9225 { CODE_FOR_mcnvs_wb
, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI
},
9226 { CODE_FOR_mcnvs_wub
, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI
},
9227 { CODE_FOR_mextr1
, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3
},
9228 { CODE_FOR_mextr2
, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3
},
9229 { CODE_FOR_mextr3
, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3
},
9230 { CODE_FOR_mextr4
, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3
},
9231 { CODE_FOR_mextr5
, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3
},
9232 { CODE_FOR_mextr6
, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3
},
9233 { CODE_FOR_mextr7
, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3
},
9234 { CODE_FOR_mmacfx_wl
, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI
},
9235 { CODE_FOR_mmacnfx_wl
,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI
},
9236 { CODE_FOR_mulv2si3
, "__builtin_mulv2si3", SH_BLTIN_V2SI3
, },
9237 { CODE_FOR_mulv4hi3
, "__builtin_mulv4hi3", SH_BLTIN_V4HI3
},
9238 { CODE_FOR_mmulfx_l
, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3
},
9239 { CODE_FOR_mmulfx_w
, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3
},
9240 { CODE_FOR_mmulfxrp_w
,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3
},
9241 { CODE_FOR_mmulhi_wl
, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI
},
9242 { CODE_FOR_mmullo_wl
, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI
},
9243 { CODE_FOR_mmulsum_wq
,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU
},
9244 { CODE_FOR_mperm_w
, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI
},
9245 { CODE_FOR_msad_ubq
, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU
},
9246 { CODE_FOR_mshalds_l
, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI
},
9247 { CODE_FOR_mshalds_w
, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI
},
9248 { CODE_FOR_ashrv2si3
, "__builtin_ashrv2si3", SH_BLTIN_SH_SI
},
9249 { CODE_FOR_ashrv4hi3
, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI
},
9250 { CODE_FOR_mshards_q
, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS
},
9251 { CODE_FOR_mshfhi_b
, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3
},
9252 { CODE_FOR_mshfhi_l
, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3
},
9253 { CODE_FOR_mshfhi_w
, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3
},
9254 { CODE_FOR_mshflo_b
, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3
},
9255 { CODE_FOR_mshflo_l
, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3
},
9256 { CODE_FOR_mshflo_w
, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3
},
9257 { CODE_FOR_ashlv2si3
, "__builtin_ashlv2si3", SH_BLTIN_SH_SI
},
9258 { CODE_FOR_ashlv4hi3
, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI
},
9259 { CODE_FOR_lshrv2si3
, "__builtin_lshrv2si3", SH_BLTIN_SH_SI
},
9260 { CODE_FOR_lshrv4hi3
, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI
},
9261 { CODE_FOR_subv2si3
, "__builtin_subv2si3", SH_BLTIN_V2SI3
},
9262 { CODE_FOR_subv4hi3
, "__builtin_subv4hi3", SH_BLTIN_V4HI3
},
9263 { CODE_FOR_sssubv2si3
,"__builtin_sssubv2si3", SH_BLTIN_V2SI3
},
9264 { CODE_FOR_ussubv8qi3
,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3
},
9265 { CODE_FOR_sssubv4hi3
,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3
},
9266 { CODE_FOR_fcosa_s
, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF
},
9267 { CODE_FOR_fsina_s
, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF
},
9268 { CODE_FOR_fipr
, "__builtin_sh_media_FIPR_S", SH_BLTIN_3
},
9269 { CODE_FOR_ftrv
, "__builtin_sh_media_FTRV_S", SH_BLTIN_3
},
9270 { CODE_FOR_mac_media
, "__builtin_sh_media_FMAC_S", SH_BLTIN_3
},
9271 { CODE_FOR_sqrtdf2
, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2
},
9272 { CODE_FOR_sqrtsf2
, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2
},
9273 { CODE_FOR_fsrra_s
, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2
},
9274 { CODE_FOR_ldhi_l
, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L
},
9275 { CODE_FOR_ldhi_q
, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q
},
9276 { CODE_FOR_ldlo_l
, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L
},
9277 { CODE_FOR_ldlo_q
, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q
},
9278 { CODE_FOR_sthi_l
, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L
},
9279 { CODE_FOR_sthi_q
, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q
},
9280 { CODE_FOR_stlo_l
, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L
},
9281 { CODE_FOR_stlo_q
, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q
},
9282 { CODE_FOR_ldhi_l64
, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64
},
9283 { CODE_FOR_ldhi_q64
, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64
},
9284 { CODE_FOR_ldlo_l64
, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64
},
9285 { CODE_FOR_ldlo_q64
, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64
},
9286 { CODE_FOR_sthi_l64
, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64
},
9287 { CODE_FOR_sthi_q64
, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64
},
9288 { CODE_FOR_stlo_l64
, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64
},
9289 { CODE_FOR_stlo_q64
, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64
},
9290 { CODE_FOR_nsb
, "__builtin_sh_media_NSB", SH_BLTIN_SU
},
9291 { CODE_FOR_byterev
, "__builtin_sh_media_BYTEREV", SH_BLTIN_2
},
9292 { CODE_FOR_prefetch
, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV
},
9296 sh_media_init_builtins (void)
9298 tree shared
[SH_BLTIN_NUM_SHARED_SIGNATURES
];
9299 const struct builtin_description
*d
;
9301 memset (shared
, 0, sizeof shared
);
9302 for (d
= bdesc
; d
- bdesc
< (int) ARRAY_SIZE (bdesc
); d
++)
9304 tree type
, arg_type
= 0;
9305 int signature
= d
->signature
;
9308 if (signature
< SH_BLTIN_NUM_SHARED_SIGNATURES
&& shared
[signature
])
9309 type
= shared
[signature
];
9312 int has_result
= signature_args
[signature
][0] != 0;
9314 if ((signature_args
[signature
][1] & 8)
9315 && (((signature_args
[signature
][1] & 1) && TARGET_SHMEDIA32
)
9316 || ((signature_args
[signature
][1] & 2) && TARGET_SHMEDIA64
)))
9318 if (! TARGET_FPU_ANY
9319 && FLOAT_MODE_P (insn_data
[d
->icode
].operand
[0].mode
))
9321 type
= void_list_node
;
9324 int arg
= signature_args
[signature
][i
];
9325 int opno
= i
- 1 + has_result
;
9328 arg_type
= ptr_type_node
;
9330 arg_type
= (*lang_hooks
.types
.type_for_mode
)
9331 (insn_data
[d
->icode
].operand
[opno
].mode
,
9336 arg_type
= void_type_node
;
9339 type
= tree_cons (NULL_TREE
, arg_type
, type
);
9341 type
= build_function_type (arg_type
, type
);
9342 if (signature
< SH_BLTIN_NUM_SHARED_SIGNATURES
)
9343 shared
[signature
] = type
;
9345 lang_hooks
.builtin_function (d
->name
, type
, d
- bdesc
, BUILT_IN_MD
,
9350 /* Implements target hook vector_mode_supported_p. */
9352 sh_vector_mode_supported_p (enum machine_mode mode
)
9355 && ((mode
== V2SFmode
)
9356 || (mode
== V4SFmode
)
9357 || (mode
== V16SFmode
)))
9360 else if (TARGET_SHMEDIA
9361 && ((mode
== V8QImode
)
9362 || (mode
== V2HImode
)
9363 || (mode
== V4HImode
)
9364 || (mode
== V2SImode
)))
9370 /* Implements target hook dwarf_calling_convention. Return an enum
9371 of dwarf_calling_convention. */
9373 sh_dwarf_calling_convention (tree func
)
9375 if (sh_attr_renesas_p (func
))
9376 return DW_CC_GNU_renesas_sh
;
9378 return DW_CC_normal
;
9382 sh_init_builtins (void)
9385 sh_media_init_builtins ();
9388 /* Expand an expression EXP that calls a built-in function,
9389 with result going to TARGET if that's convenient
9390 (and in mode MODE if that's convenient).
9391 SUBTARGET may be used as the target for computing one of EXP's operands.
9392 IGNORE is nonzero if the value is to be ignored. */
9395 sh_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
9396 enum machine_mode mode ATTRIBUTE_UNUSED
, int ignore
)
9398 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
9399 tree arglist
= TREE_OPERAND (exp
, 1);
9400 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
9401 const struct builtin_description
*d
= &bdesc
[fcode
];
9402 enum insn_code icode
= d
->icode
;
9403 int signature
= d
->signature
;
9404 enum machine_mode tmode
= VOIDmode
;
9409 if (signature_args
[signature
][0])
9414 tmode
= insn_data
[icode
].operand
[0].mode
;
9416 || GET_MODE (target
) != tmode
9417 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
9418 target
= gen_reg_rtx (tmode
);
9424 for (i
= 1; i
<= 3; i
++, nop
++)
9427 enum machine_mode opmode
, argmode
;
9430 if (! signature_args
[signature
][i
])
9432 arg
= TREE_VALUE (arglist
);
9433 if (arg
== error_mark_node
)
9435 arglist
= TREE_CHAIN (arglist
);
9436 if (signature_args
[signature
][i
] & 8)
9439 optype
= ptr_type_node
;
9443 opmode
= insn_data
[icode
].operand
[nop
].mode
;
9444 optype
= (*lang_hooks
.types
.type_for_mode
) (opmode
, 0);
9446 argmode
= TYPE_MODE (TREE_TYPE (arg
));
9447 if (argmode
!= opmode
)
9448 arg
= build1 (NOP_EXPR
, optype
, arg
);
9449 op
[nop
] = expand_expr (arg
, NULL_RTX
, opmode
, 0);
9450 if (! (*insn_data
[icode
].operand
[nop
].predicate
) (op
[nop
], opmode
))
9451 op
[nop
] = copy_to_mode_reg (opmode
, op
[nop
]);
9457 pat
= (*insn_data
[d
->icode
].genfun
) (op
[0]);
9460 pat
= (*insn_data
[d
->icode
].genfun
) (op
[0], op
[1]);
9463 pat
= (*insn_data
[d
->icode
].genfun
) (op
[0], op
[1], op
[2]);
9466 pat
= (*insn_data
[d
->icode
].genfun
) (op
[0], op
[1], op
[2], op
[3]);
9478 sh_expand_unop_v2sf (enum rtx_code code
, rtx op0
, rtx op1
)
9480 rtx sel0
= const0_rtx
;
9481 rtx sel1
= const1_rtx
;
9482 rtx (*fn
) (rtx
, rtx
, rtx
, rtx
, rtx
) = gen_unary_sf_op
;
9483 rtx op
= gen_rtx_fmt_e (code
, SFmode
, op1
);
9485 emit_insn ((*fn
) (op0
, op1
, op
, sel0
, sel0
));
9486 emit_insn ((*fn
) (op0
, op1
, op
, sel1
, sel1
));
9490 sh_expand_binop_v2sf (enum rtx_code code
, rtx op0
, rtx op1
, rtx op2
)
9492 rtx sel0
= const0_rtx
;
9493 rtx sel1
= const1_rtx
;
9494 rtx (*fn
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
)
9496 rtx op
= gen_rtx_fmt_ee (code
, SFmode
, op1
, op2
);
9498 emit_insn ((*fn
) (op0
, op1
, op2
, op
, sel0
, sel0
, sel0
, sel1
));
9499 emit_insn ((*fn
) (op0
, op1
, op2
, op
, sel1
, sel1
, sel1
, sel0
));
9502 /* Return the class of registers for which a mode change from FROM to TO
9505 sh_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
9506 enum reg_class
class)
9508 /* We want to enable the use of SUBREGs as a means to
9509 VEC_SELECT a single element of a vector. */
9510 if (to
== SFmode
&& VECTOR_MODE_P (from
) && GET_MODE_INNER (from
) == SFmode
)
9511 return (reg_classes_intersect_p (GENERAL_REGS
, class));
9513 if (GET_MODE_SIZE (from
) != GET_MODE_SIZE (to
))
9515 if (TARGET_LITTLE_ENDIAN
)
9517 if (GET_MODE_SIZE (to
) < 8 || GET_MODE_SIZE (from
) < 8)
9518 return reg_classes_intersect_p (DF_REGS
, class);
9522 if (GET_MODE_SIZE (from
) < 8)
9523 return reg_classes_intersect_p (DF_HI_REGS
, class);
9530 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9531 that label is used. */
9534 sh_mark_label (rtx address
, int nuses
)
9536 if (GOTOFF_P (address
))
9538 /* Extract the label or symbol. */
9539 address
= XEXP (address
, 0);
9540 if (GET_CODE (address
) == PLUS
)
9541 address
= XEXP (address
, 0);
9542 address
= XVECEXP (address
, 0, 0);
9544 if (GET_CODE (address
) == LABEL_REF
9545 && GET_CODE (XEXP (address
, 0)) == CODE_LABEL
)
9546 LABEL_NUSES (XEXP (address
, 0)) += nuses
;
9549 /* Compute extra cost of moving data between one register class
9552 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9553 uses this information. Hence, the general register <-> floating point
9554 register information here is not used for SFmode. */
9557 sh_register_move_cost (enum machine_mode mode
,
9558 enum reg_class srcclass
, enum reg_class dstclass
)
9560 if (dstclass
== T_REGS
|| dstclass
== PR_REGS
)
9563 if (dstclass
== MAC_REGS
&& srcclass
== MAC_REGS
)
9566 if (mode
== SImode
&& ! TARGET_SHMEDIA
&& TARGET_FMOVD
9567 && REGCLASS_HAS_FP_REG (srcclass
)
9568 && REGCLASS_HAS_FP_REG (dstclass
))
9571 if (REGCLASS_HAS_FP_REG (dstclass
) && srcclass
== T_REGS
)
9572 return ((TARGET_HARD_SH4
&& !optimize_size
) ? 10 : 7);
9574 if ((REGCLASS_HAS_FP_REG (dstclass
) && srcclass
== MAC_REGS
)
9575 || (dstclass
== MAC_REGS
&& REGCLASS_HAS_FP_REG (srcclass
)))
9578 if ((REGCLASS_HAS_FP_REG (dstclass
)
9579 && REGCLASS_HAS_GENERAL_REG (srcclass
))
9580 || (REGCLASS_HAS_GENERAL_REG (dstclass
)
9581 && REGCLASS_HAS_FP_REG (srcclass
)))
9582 return ((TARGET_SHMEDIA
? 4 : TARGET_FMOVD
? 8 : 12)
9583 * ((GET_MODE_SIZE (mode
) + 7) / 8U));
9585 if ((dstclass
== FPUL_REGS
9586 && REGCLASS_HAS_GENERAL_REG (srcclass
))
9587 || (srcclass
== FPUL_REGS
9588 && REGCLASS_HAS_GENERAL_REG (dstclass
)))
9591 if ((dstclass
== FPUL_REGS
9592 && (srcclass
== PR_REGS
|| srcclass
== MAC_REGS
|| srcclass
== T_REGS
))
9593 || (srcclass
== FPUL_REGS
9594 && (dstclass
== PR_REGS
|| dstclass
== MAC_REGS
)))
9597 if ((srcclass
== TARGET_REGS
&& ! REGCLASS_HAS_GENERAL_REG (dstclass
))
9598 || ((dstclass
) == TARGET_REGS
&& ! REGCLASS_HAS_GENERAL_REG (srcclass
)))
9601 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9603 && ((srcclass
) == TARGET_REGS
|| (srcclass
) == SIBCALL_REGS
))
9605 if (sh_gettrcost
>= 0)
9606 return sh_gettrcost
;
9607 else if (!TARGET_PT_FIXED
)
9611 if ((srcclass
== FPSCR_REGS
&& ! REGCLASS_HAS_GENERAL_REG (dstclass
))
9612 || (dstclass
== FPSCR_REGS
&& ! REGCLASS_HAS_GENERAL_REG (srcclass
)))
9617 && ! REGCLASS_HAS_GENERAL_REG (srcclass
)
9618 && ! REGCLASS_HAS_GENERAL_REG (dstclass
)))
9619 return 2 * ((GET_MODE_SIZE (mode
) + 7) / 8U);
9621 return 2 * ((GET_MODE_SIZE (mode
) + 3) / 4U);
9624 static rtx
emit_load_ptr (rtx
, rtx
);
9627 emit_load_ptr (rtx reg
, rtx addr
)
9629 rtx mem
= gen_const_mem (ptr_mode
, addr
);
9631 if (Pmode
!= ptr_mode
)
9632 mem
= gen_rtx_SIGN_EXTEND (Pmode
, mem
);
9633 return emit_move_insn (reg
, mem
);
9637 sh_output_mi_thunk (FILE *file
, tree thunk_fndecl ATTRIBUTE_UNUSED
,
9638 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
9641 CUMULATIVE_ARGS cum
;
9642 int structure_value_byref
= 0;
9643 rtx
this, this_value
, sibcall
, insns
, funexp
;
9644 tree funtype
= TREE_TYPE (function
);
9645 int simple_add
= CONST_OK_FOR_ADD (delta
);
9647 rtx scratch0
, scratch1
, scratch2
;
9650 reload_completed
= 1;
9651 epilogue_completed
= 1;
9653 current_function_uses_only_leaf_regs
= 1;
9654 reset_block_changes ();
9656 emit_note (NOTE_INSN_PROLOGUE_END
);
9658 /* Find the "this" pointer. We have such a wide range of ABIs for the
9659 SH that it's best to do this completely machine independently.
9660 "this" is passed as first argument, unless a structure return pointer
9661 comes first, in which case "this" comes second. */
9662 INIT_CUMULATIVE_ARGS (cum
, funtype
, NULL_RTX
, 0, 1);
9663 #ifndef PCC_STATIC_STRUCT_RETURN
9664 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
9665 structure_value_byref
= 1;
9666 #endif /* not PCC_STATIC_STRUCT_RETURN */
9667 if (structure_value_byref
&& sh_struct_value_rtx (function
, 0) == 0)
9669 tree ptype
= build_pointer_type (TREE_TYPE (funtype
));
9671 FUNCTION_ARG_ADVANCE (cum
, Pmode
, ptype
, 1);
9673 this = FUNCTION_ARG (cum
, Pmode
, ptr_type_node
, 1);
9675 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9676 static chain pointer (even if you can't have nested virtual functions
9677 right now, someone might implement them sometime), and the rest of the
9678 registers are used for argument passing, are callee-saved, or reserved. */
9679 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9680 -ffixed-reg has been used. */
9681 if (! call_used_regs
[0] || fixed_regs
[0])
9682 error ("r0 needs to be available as a call-clobbered register");
9683 scratch0
= scratch1
= scratch2
= gen_rtx_REG (Pmode
, 0);
9686 if (call_used_regs
[1] && ! fixed_regs
[1])
9687 scratch1
= gen_rtx_REG (ptr_mode
, 1);
9688 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9689 pointing where to return struct values. */
9690 if (call_used_regs
[3] && ! fixed_regs
[3])
9691 scratch2
= gen_rtx_REG (Pmode
, 3);
9693 else if (TARGET_SHMEDIA
)
9695 for (i
= FIRST_GENERAL_REG
; i
<= LAST_GENERAL_REG
; i
++)
9696 if (i
!= REGNO (scratch0
) &&
9697 call_used_regs
[i
] && ! fixed_regs
[i
] && ! FUNCTION_ARG_REGNO_P (i
))
9699 scratch1
= gen_rtx_REG (ptr_mode
, i
);
9702 if (scratch1
== scratch0
)
9703 error ("Need a second call-clobbered general purpose register");
9704 for (i
= FIRST_TARGET_REG
; i
<= LAST_TARGET_REG
; i
++)
9705 if (call_used_regs
[i
] && ! fixed_regs
[i
])
9707 scratch2
= gen_rtx_REG (Pmode
, i
);
9710 if (scratch2
== scratch0
)
9711 error ("Need a call-clobbered target register");
9714 this_value
= plus_constant (this, delta
);
9716 && (simple_add
|| scratch0
!= scratch1
)
9717 && strict_memory_address_p (ptr_mode
, this_value
))
9719 emit_load_ptr (scratch0
, this_value
);
9725 else if (simple_add
)
9726 emit_move_insn (this, this_value
);
9729 emit_move_insn (scratch1
, GEN_INT (delta
));
9730 emit_insn (gen_add2_insn (this, scratch1
));
9738 emit_load_ptr (scratch0
, this);
9740 offset_addr
= plus_constant (scratch0
, vcall_offset
);
9741 if (strict_memory_address_p (ptr_mode
, offset_addr
))
9743 else if (! TARGET_SH5
&& scratch0
!= scratch1
)
9745 /* scratch0 != scratch1, and we have indexed loads. Get better
9746 schedule by loading the offset into r1 and using an indexed
9747 load - then the load of r1 can issue before the load from
9748 (this + delta) finishes. */
9749 emit_move_insn (scratch1
, GEN_INT (vcall_offset
));
9750 offset_addr
= gen_rtx_PLUS (Pmode
, scratch0
, scratch1
);
9752 else if (CONST_OK_FOR_ADD (vcall_offset
))
9754 emit_insn (gen_add2_insn (scratch0
, GEN_INT (vcall_offset
)));
9755 offset_addr
= scratch0
;
9757 else if (scratch0
!= scratch1
)
9759 emit_move_insn (scratch1
, GEN_INT (vcall_offset
));
9760 emit_insn (gen_add2_insn (scratch0
, scratch1
));
9761 offset_addr
= scratch0
;
9764 gcc_unreachable (); /* FIXME */
9765 emit_load_ptr (scratch0
, offset_addr
);
9767 if (Pmode
!= ptr_mode
)
9768 scratch0
= gen_rtx_TRUNCATE (ptr_mode
, scratch0
);
9769 emit_insn (gen_add2_insn (this, scratch0
));
9772 /* Generate a tail call to the target function. */
9773 if (! TREE_USED (function
))
9775 assemble_external (function
);
9776 TREE_USED (function
) = 1;
9778 funexp
= XEXP (DECL_RTL (function
), 0);
9779 /* If the function is overridden, so is the thunk, hence we don't
9780 need GOT addressing even if this is a public symbol. */
9782 if (TARGET_SH1
&& ! flag_weak
)
9783 sibcall
= gen_sibcalli_thunk (funexp
, const0_rtx
);
9786 if (TARGET_SH2
&& flag_pic
)
9788 sibcall
= gen_sibcall_pcrel (funexp
, const0_rtx
);
9789 XEXP (XVECEXP (sibcall
, 0, 2), 0) = scratch2
;
9793 if (TARGET_SHMEDIA
&& flag_pic
)
9795 funexp
= gen_sym2PIC (funexp
);
9796 PUT_MODE (funexp
, Pmode
);
9798 emit_move_insn (scratch2
, funexp
);
9799 funexp
= gen_rtx_MEM (FUNCTION_MODE
, scratch2
);
9800 sibcall
= gen_sibcall (funexp
, const0_rtx
, NULL_RTX
);
9802 sibcall
= emit_call_insn (sibcall
);
9803 SIBLING_CALL_P (sibcall
) = 1;
9804 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall
), this);
9807 /* Run just enough of rest_of_compilation to do scheduling and get
9808 the insns emitted. Note that use_thunk calls
9809 assemble_start_function and assemble_end_function. */
9811 insn_locators_initialize ();
9812 insns
= get_insns ();
9816 /* Initialize the bitmap obstacks. */
9817 bitmap_obstack_initialize (NULL
);
9818 bitmap_obstack_initialize (®_obstack
);
9821 rtl_register_cfg_hooks ();
9822 init_rtl_bb_info (ENTRY_BLOCK_PTR
);
9823 init_rtl_bb_info (EXIT_BLOCK_PTR
);
9824 ENTRY_BLOCK_PTR
->flags
|= BB_RTL
;
9825 EXIT_BLOCK_PTR
->flags
|= BB_RTL
;
9826 find_basic_blocks (insns
);
9828 if (flag_schedule_insns_after_reload
)
9830 life_analysis (dump_file
, PROP_FINAL
);
9832 split_all_insns (1);
9834 schedule_insns (dump_file
);
9836 /* We must split jmp insn in PIC case. */
9838 split_all_insns_noflow ();
9843 if (optimize
> 0 && flag_delayed_branch
)
9844 dbr_schedule (insns
, dump_file
);
9846 shorten_branches (insns
);
9847 final_start_function (insns
, file
, 1);
9848 final (insns
, file
, 1);
9849 final_end_function ();
9853 /* Release all memory allocated by flow. */
9854 free_basic_block_vars ();
9856 /* Release the bitmap obstacks. */
9857 bitmap_obstack_release (®_obstack
);
9858 bitmap_obstack_release (NULL
);
9861 reload_completed
= 0;
9862 epilogue_completed
= 0;
9867 function_symbol (rtx target
, const char *name
, enum sh_function_kind kind
)
9871 /* If this is not an ordinary function, the name usually comes from a
9872 string literal or an sprintf buffer. Make sure we use the same
9873 string consistently, so that cse will be able to unify address loads. */
9874 if (kind
!= FUNCTION_ORDINARY
)
9875 name
= IDENTIFIER_POINTER (get_identifier (name
));
9876 sym
= gen_rtx_SYMBOL_REF (Pmode
, name
);
9877 SYMBOL_REF_FLAGS (sym
) = SYMBOL_FLAG_FUNCTION
;
9881 case FUNCTION_ORDINARY
:
9885 rtx reg
= target
? target
: gen_reg_rtx (Pmode
);
9887 emit_insn (gen_symGOT2reg (reg
, sym
));
9893 /* ??? To allow cse to work, we use GOTOFF relocations.
9894 we could add combiner patterns to transform this into
9895 straight pc-relative calls with sym2PIC / bsrf when
9896 label load and function call are still 1:1 and in the
9897 same basic block during combine. */
9898 rtx reg
= target
? target
: gen_reg_rtx (Pmode
);
9900 emit_insn (gen_symGOTOFF2reg (reg
, sym
));
9905 if (target
&& sym
!= target
)
9907 emit_move_insn (target
, sym
);
9913 /* Find the number of a general purpose register in S. */
9915 scavenge_reg (HARD_REG_SET
*s
)
9918 for (r
= FIRST_GENERAL_REG
; r
<= LAST_GENERAL_REG
; r
++)
9919 if (TEST_HARD_REG_BIT (*s
, r
))
9925 sh_get_pr_initial_val (void)
9929 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9930 PR register on SHcompact, because it might be clobbered by the prologue.
9931 We check first if that is known to be the case. */
9932 if (TARGET_SHCOMPACT
9933 && ((current_function_args_info
.call_cookie
9934 & ~ CALL_COOKIE_RET_TRAMP (1))
9935 || current_function_has_nonlocal_label
))
9936 return gen_frame_mem (SImode
, return_address_pointer_rtx
);
9938 /* If we haven't finished rtl generation, there might be a nonlocal label
9939 that we haven't seen yet.
9940 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9941 is set, unless it has been called before for the same register. And even
9942 then, we end in trouble if we didn't use the register in the same
9943 basic block before. So call get_hard_reg_initial_val now and wrap it
9944 in an unspec if we might need to replace it. */
9945 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9946 combine can put the pseudo returned by get_hard_reg_initial_val into
9947 instructions that need a general purpose registers, which will fail to
9948 be recognized when the pseudo becomes allocated to PR. */
9950 = get_hard_reg_initial_val (Pmode
, TARGET_SHMEDIA
? PR_MEDIA_REG
: PR_REG
);
9952 return gen_rtx_UNSPEC (SImode
, gen_rtvec (1, val
), UNSPEC_RA
);
9957 sh_expand_t_scc (enum rtx_code code
, rtx target
)
9959 rtx result
= target
;
9962 if (GET_CODE (sh_compare_op0
) != REG
|| REGNO (sh_compare_op0
) != T_REG
9963 || GET_CODE (sh_compare_op1
) != CONST_INT
)
9965 if (GET_CODE (result
) != REG
)
9966 result
= gen_reg_rtx (SImode
);
9967 val
= INTVAL (sh_compare_op1
);
9968 if ((code
== EQ
&& val
== 1) || (code
== NE
&& val
== 0))
9969 emit_insn (gen_movt (result
));
9970 else if ((code
== EQ
&& val
== 0) || (code
== NE
&& val
== 1))
9972 emit_insn (gen_rtx_CLOBBER (VOIDmode
, result
));
9973 emit_insn (gen_subc (result
, result
, result
));
9974 emit_insn (gen_addsi3 (result
, result
, const1_rtx
));
9976 else if (code
== EQ
|| code
== NE
)
9977 emit_insn (gen_move_insn (result
, GEN_INT (code
== NE
)));
9980 if (result
!= target
)
9981 emit_move_insn (target
, result
);
9985 /* INSN is an sfunc; return the rtx that describes the address used. */
9987 extract_sfunc_addr (rtx insn
)
9989 rtx pattern
, part
= NULL_RTX
;
9992 pattern
= PATTERN (insn
);
9993 len
= XVECLEN (pattern
, 0);
9994 for (i
= 0; i
< len
; i
++)
9996 part
= XVECEXP (pattern
, 0, i
);
9997 if (GET_CODE (part
) == USE
&& GET_MODE (XEXP (part
, 0)) == Pmode
9998 && GENERAL_REGISTER_P (true_regnum (XEXP (part
, 0))))
9999 return XEXP (part
, 0);
10001 gcc_assert (GET_CODE (XVECEXP (pattern
, 0, 0)) == UNSPEC_VOLATILE
);
10002 return XVECEXP (XVECEXP (pattern
, 0, 0), 0, 1);
10005 /* Verify that the register in use_sfunc_addr still agrees with the address
10006 used in the sfunc. This prevents fill_slots_from_thread from changing
10008 INSN is the use_sfunc_addr instruction, and REG is the register it
10011 check_use_sfunc_addr (rtx insn
, rtx reg
)
10013 /* Search for the sfunc. It should really come right after INSN. */
10014 while ((insn
= NEXT_INSN (insn
)))
10016 if (GET_CODE (insn
) == CODE_LABEL
|| GET_CODE (insn
) == JUMP_INSN
)
10018 if (! INSN_P (insn
))
10021 if (GET_CODE (PATTERN (insn
)) == SEQUENCE
)
10022 insn
= XVECEXP (PATTERN (insn
), 0, 0);
10023 if (GET_CODE (PATTERN (insn
)) != PARALLEL
10024 || get_attr_type (insn
) != TYPE_SFUNC
)
10026 return rtx_equal_p (extract_sfunc_addr (insn
), reg
);
10028 gcc_unreachable ();
10031 /* This function returns a constant rtx that represents pi / 2**15 in
10032 SFmode. it's used to scale SFmode angles, in radians, to a
10033 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10034 maps to 0x10000). */
10036 static GTY(()) rtx sh_fsca_sf2int_rtx
;
10039 sh_fsca_sf2int (void)
10041 if (! sh_fsca_sf2int_rtx
)
10043 REAL_VALUE_TYPE rv
;
10045 real_from_string (&rv
, "10430.378350470453");
10046 sh_fsca_sf2int_rtx
= const_double_from_real_value (rv
, SFmode
);
10049 return sh_fsca_sf2int_rtx
;
10052 /* This function returns a constant rtx that represents pi / 2**15 in
10053 DFmode. it's used to scale DFmode angles, in radians, to a
10054 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10055 maps to 0x10000). */
10057 static GTY(()) rtx sh_fsca_df2int_rtx
;
10060 sh_fsca_df2int (void)
10062 if (! sh_fsca_df2int_rtx
)
10064 REAL_VALUE_TYPE rv
;
10066 real_from_string (&rv
, "10430.378350470453");
10067 sh_fsca_df2int_rtx
= const_double_from_real_value (rv
, DFmode
);
10070 return sh_fsca_df2int_rtx
;
10073 /* This function returns a constant rtx that represents 2**15 / pi in
10074 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10075 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10078 static GTY(()) rtx sh_fsca_int2sf_rtx
;
10081 sh_fsca_int2sf (void)
10083 if (! sh_fsca_int2sf_rtx
)
10085 REAL_VALUE_TYPE rv
;
10087 real_from_string (&rv
, "9.587379924285257e-5");
10088 sh_fsca_int2sf_rtx
= const_double_from_real_value (rv
, SFmode
);
10091 return sh_fsca_int2sf_rtx
;
10094 /* Initialize the CUMULATIVE_ARGS structure. */
10097 sh_init_cumulative_args (CUMULATIVE_ARGS
* pcum
,
10099 rtx libname ATTRIBUTE_UNUSED
,
10101 signed int n_named_args
,
10102 enum machine_mode mode
)
10104 pcum
->arg_count
[(int) SH_ARG_FLOAT
] = 0;
10105 pcum
->free_single_fp_reg
= 0;
10106 pcum
->stack_regs
= 0;
10107 pcum
->byref_regs
= 0;
10109 pcum
->outgoing
= (n_named_args
== -1) ? 0 : 1;
10111 /* XXX - Should we check TARGET_HITACHI here ??? */
10112 pcum
->renesas_abi
= sh_attr_renesas_p (fntype
) ? 1 : 0;
10116 pcum
->force_mem
= ((TARGET_HITACHI
|| pcum
->renesas_abi
)
10117 && aggregate_value_p (TREE_TYPE (fntype
), fndecl
));
10118 pcum
->prototype_p
= TYPE_ARG_TYPES (fntype
) ? TRUE
: FALSE
;
10119 pcum
->arg_count
[(int) SH_ARG_INT
]
10120 = TARGET_SH5
&& aggregate_value_p (TREE_TYPE (fntype
), fndecl
);
10123 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10124 && pcum
->arg_count
[(int) SH_ARG_INT
] == 0
10125 && (TYPE_MODE (TREE_TYPE (fntype
)) == BLKmode
10126 ? int_size_in_bytes (TREE_TYPE (fntype
))
10127 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype
)))) > 4
10128 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype
)))
10129 == FIRST_RET_REG
));
10133 pcum
->arg_count
[(int) SH_ARG_INT
] = 0;
10134 pcum
->prototype_p
= FALSE
;
10135 if (mode
!= VOIDmode
)
10137 pcum
->call_cookie
=
10138 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10139 && GET_MODE_SIZE (mode
) > 4
10140 && BASE_RETURN_VALUE_REG (mode
) == FIRST_RET_REG
);
10142 /* If the default ABI is the Renesas ABI then all library
10143 calls must assume that the library will be using the
10144 Renesas ABI. So if the function would return its result
10145 in memory then we must force the address of this memory
10146 block onto the stack. Ideally we would like to call
10147 targetm.calls.return_in_memory() here but we do not have
10148 the TYPE or the FNDECL available so we synthesize the
10149 contents of that function as best we can. */
10151 (TARGET_DEFAULT
& MASK_HITACHI
)
10152 && (mode
== BLKmode
10153 || (GET_MODE_SIZE (mode
) > 4
10154 && !(mode
== DFmode
10155 && TARGET_FPU_DOUBLE
)));
10159 pcum
->call_cookie
= 0;
10160 pcum
->force_mem
= FALSE
;
10165 /* Determine if two hard register sets intersect.
10166 Return 1 if they do. */
10169 hard_regs_intersect_p (HARD_REG_SET
*a
, HARD_REG_SET
*b
)
10172 COPY_HARD_REG_SET (c
, *a
);
10173 AND_HARD_REG_SET (c
, *b
);
10174 GO_IF_HARD_REG_SUBSET (c
, reg_class_contents
[(int) NO_REGS
], lose
);
10180 #ifdef TARGET_ADJUST_UNROLL_MAX
10182 sh_adjust_unroll_max (struct loop
* loop
, int insn_count
,
10183 int max_unrolled_insns
, int strength_reduce_p
,
10186 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10187 if (TARGET_ADJUST_UNROLL
&& TARGET_SHMEDIA
)
10189 /* Throttle back loop unrolling so that the costs of using more
10190 targets than the eight target register we have don't outweigh
10191 the benefits of unrolling. */
10193 int n_labels
= 0, n_calls
= 0, n_exit_dest
= 0, n_inner_loops
= -1;
10194 int n_barriers
= 0;
10199 int unroll_benefit
= 0, mem_latency
= 0;
10200 int base_cost
, best_cost
, cost
;
10201 int factor
, best_factor
;
10203 unsigned max_iterations
= 32767;
10205 int need_precond
= 0, precond
= 0;
10206 basic_block
* bbs
= get_loop_body (loop
);
10207 struct niter_desc
*desc
;
10209 /* Assume that all labels inside the loop are used from inside the
10210 loop. If the loop has multiple entry points, it is unlikely to
10211 be unrolled anyways.
10212 Also assume that all calls are to different functions. That is
10213 somewhat pessimistic, but if you have lots of calls, unrolling the
10214 loop is not likely to gain you much in the first place. */
10215 i
= loop
->num_nodes
- 1;
10216 for (insn
= BB_HEAD (bbs
[i
]); ; )
10218 if (GET_CODE (insn
) == CODE_LABEL
)
10220 else if (GET_CODE (insn
) == CALL_INSN
)
10222 else if (GET_CODE (insn
) == NOTE
10223 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_LOOP_BEG
)
10225 else if (GET_CODE (insn
) == BARRIER
)
10227 if (insn
!= BB_END (bbs
[i
]))
10228 insn
= NEXT_INSN (insn
);
10230 insn
= BB_HEAD (bbs
[i
]);
10235 /* One label for the loop top is normal, and it won't be duplicated by
10238 return max_unrolled_insns
;
10239 if (n_inner_loops
> 0)
10241 for (dest
= loop
->exit_labels
; dest
&& n_exit_dest
< 8;
10242 dest
= LABEL_NEXTREF (dest
))
10244 for (i
= n_exit_dest
- 1;
10245 i
>= 0 && XEXP (dest
, 0) != XEXP (exit_dest
[i
], 0); i
--);
10247 exit_dest
[n_exit_dest
++] = dest
;
10249 /* If the loop top and call and exit destinations are enough to fill up
10250 the target registers, we're unlikely to do any more damage by
10252 if (n_calls
+ n_exit_dest
>= 7)
10253 return max_unrolled_insns
;
10255 /* ??? In the new loop unroller, there is no longer any strength
10256 reduction information available. Thus, when it comes to unrolling,
10257 we know the cost of everything, but we know the value of nothing. */
10259 if (strength_reduce_p
10260 && (unroll_type
== LPT_UNROLL_RUNTIME
10261 || unroll_type
== LPT_UNROLL_CONSTANT
10262 || unroll_type
== LPT_PEEL_COMPLETELY
))
10264 struct loop_ivs
*ivs
= LOOP_IVS (loop
);
10265 struct iv_class
*bl
;
10267 /* We'll save one compare-and-branch in each loop body copy
10268 but the last one. */
10269 unroll_benefit
= 1;
10270 /* Assess the benefit of removing biv & giv updates. */
10271 for (bl
= ivs
->list
; bl
; bl
= bl
->next
)
10273 rtx increment
= biv_total_increment (bl
);
10274 struct induction
*v
;
10276 if (increment
&& GET_CODE (increment
) == CONST_INT
)
10279 for (v
= bl
->giv
; v
; v
= v
->next_iv
)
10281 if (! v
->ignore
&& v
->same
== 0
10282 && GET_CODE (v
->mult_val
) == CONST_INT
)
10284 /* If this giv uses an array, try to determine
10285 a maximum iteration count from the size of the
10286 array. This need not be correct all the time,
10287 but should not be too far off the mark too often. */
10288 while (v
->giv_type
== DEST_ADDR
)
10290 rtx mem
= PATTERN (v
->insn
);
10291 tree mem_expr
, type
, size_tree
;
10293 if (GET_CODE (SET_SRC (mem
)) == MEM
)
10294 mem
= SET_SRC (mem
);
10295 else if (GET_CODE (SET_DEST (mem
)) == MEM
)
10296 mem
= SET_DEST (mem
);
10299 mem_expr
= MEM_EXPR (mem
);
10302 type
= TREE_TYPE (mem_expr
);
10303 if (TREE_CODE (type
) != ARRAY_TYPE
10304 || ! TYPE_SIZE (type
) || ! TYPE_SIZE_UNIT (type
))
10306 size_tree
= fold (build (TRUNC_DIV_EXPR
,
10309 TYPE_SIZE_UNIT (type
)));
10310 if (TREE_CODE (size_tree
) == INTEGER_CST
10311 && ! TREE_INT_CST_HIGH (size_tree
)
10312 && TREE_INT_CST_LOW (size_tree
) < max_iterations
)
10313 max_iterations
= TREE_INT_CST_LOW (size_tree
);
10321 /* Assume there is at least some benefit. */
10322 unroll_benefit
= 1;
10325 desc
= get_simple_loop_desc (loop
);
10326 n_iterations
= desc
->const_iter
? desc
->niter
: 0;
10328 = max_iterations
< desc
->niter_max
? max_iterations
: desc
->niter_max
;
10330 if (! strength_reduce_p
|| ! n_iterations
)
10332 if (! n_iterations
)
10335 = max_iterations
< 3 ? max_iterations
: max_iterations
* 3 / 4;
10336 if (! n_iterations
)
10339 #if 0 /* ??? See above - missing induction variable information. */
10340 while (unroll_benefit
> 1) /* no loop */
10342 /* We include the benefit of biv/ giv updates. Check if some or
10343 all of these updates are likely to fit into a scheduling
10345 We check for the following case:
10346 - All the insns leading to the first JUMP_INSN are in a strict
10348 - there is at least one memory reference in them.
10350 When we find such a pattern, we assume that we can hide as many
10351 updates as the total of the load latency is, if we have an
10352 unroll factor of at least two. We might or might not also do
10353 this without unrolling, so rather than considering this as an
10354 extra unroll benefit, discount it in the unroll benefits of unroll
10355 factors higher than two. */
10359 insn
= next_active_insn (loop
->start
);
10360 last_set
= single_set (insn
);
10363 if (GET_CODE (SET_SRC (last_set
)) == MEM
)
10365 for (insn
= NEXT_INSN (insn
); insn
!= end
; insn
= NEXT_INSN (insn
))
10367 if (! INSN_P (insn
))
10369 if (GET_CODE (insn
) == JUMP_INSN
)
10371 if (! reg_referenced_p (SET_DEST (last_set
), PATTERN (insn
)))
10373 /* Check if this is a to-be-reduced giv insn. */
10374 struct loop_ivs
*ivs
= LOOP_IVS (loop
);
10375 struct iv_class
*bl
;
10376 struct induction
*v
;
10377 for (bl
= ivs
->list
; bl
; bl
= bl
->next
)
10379 if (bl
->biv
->insn
== insn
)
10381 for (v
= bl
->giv
; v
; v
= v
->next_iv
)
10382 if (v
->insn
== insn
)
10390 set
= single_set (insn
);
10393 if (GET_CODE (SET_SRC (set
)) == MEM
)
10397 if (mem_latency
< 0)
10399 else if (mem_latency
> unroll_benefit
- 1)
10400 mem_latency
= unroll_benefit
- 1;
10404 if (n_labels
+ (unroll_benefit
+ n_labels
* 8) / n_iterations
10406 return max_unrolled_insns
;
10408 n_dest
= n_labels
+ n_calls
+ n_exit_dest
;
10409 base_cost
= n_dest
<= 8 ? 0 : n_dest
- 7;
10412 if (n_barriers
* 2 > n_labels
- 1)
10413 n_barriers
= (n_labels
- 1) / 2;
10414 for (factor
= 2; factor
<= 8; factor
++)
10416 /* Bump up preconditioning cost for each power of two. */
10417 if (! (factor
& (factor
-1)))
10419 /* When preconditioning, only powers of two will be considered. */
10420 else if (need_precond
)
10422 n_dest
= ((unroll_type
!= LPT_PEEL_COMPLETELY
)
10423 + (n_labels
- 1) * factor
+ n_calls
+ n_exit_dest
10424 - (n_barriers
* factor
>> 1)
10427 = ((n_dest
<= 8 ? 0 : n_dest
- 7)
10428 - base_cost
* factor
10429 - ((factor
> 2 ? unroll_benefit
- mem_latency
: unroll_benefit
)
10430 * (factor
- (unroll_type
!= LPT_PEEL_COMPLETELY
)))
10431 + ((unroll_benefit
+ 1 + (n_labels
- 1) * factor
)
10434 cost
+= (precond
+ unroll_benefit
* factor
/ 2) / n_iterations
;
10435 if (cost
< best_cost
)
10438 best_factor
= factor
;
10441 threshold
= best_factor
* insn_count
;
10442 if (max_unrolled_insns
> threshold
)
10443 max_unrolled_insns
= threshold
;
10445 return max_unrolled_insns
;
10447 #endif /* TARGET_ADJUST_UNROLL_MAX */
10449 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10450 not enter into CONST_DOUBLE for the replace.
10452 Note that copying is not done so X must not be shared unless all copies
10453 are to be modified.
10455 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10456 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10457 replacements[n*2+1] - and that we take mode changes into account.
10459 If a replacement is ambiguous, return NULL_RTX.
10461 If MODIFY is zero, don't modify any rtl in place,
10462 just return zero or nonzero for failure / success. */
10465 replace_n_hard_rtx (rtx x
, rtx
*replacements
, int n_replacements
, int modify
)
10470 /* The following prevents loops occurrence when we change MEM in
10471 CONST_DOUBLE onto the same CONST_DOUBLE. */
10472 if (x
!= 0 && GET_CODE (x
) == CONST_DOUBLE
)
10475 for (i
= n_replacements
- 1; i
>= 0 ; i
--)
10476 if (x
== replacements
[i
*2] && GET_MODE (x
) == GET_MODE (replacements
[i
*2+1]))
10477 return replacements
[i
*2+1];
10479 /* Allow this function to make replacements in EXPR_LISTs. */
10483 if (GET_CODE (x
) == SUBREG
)
10485 rtx
new = replace_n_hard_rtx (SUBREG_REG (x
), replacements
,
10486 n_replacements
, modify
);
10488 if (GET_CODE (new) == CONST_INT
)
10490 x
= simplify_subreg (GET_MODE (x
), new,
10491 GET_MODE (SUBREG_REG (x
)),
10497 SUBREG_REG (x
) = new;
10501 else if (GET_CODE (x
) == REG
)
10503 unsigned regno
= REGNO (x
);
10504 unsigned nregs
= (regno
< FIRST_PSEUDO_REGISTER
10505 ? HARD_REGNO_NREGS (regno
, GET_MODE (x
)) : 1);
10506 rtx result
= NULL_RTX
;
10508 for (i
= n_replacements
- 1; i
>= 0; i
--)
10510 rtx from
= replacements
[i
*2];
10511 rtx to
= replacements
[i
*2+1];
10512 unsigned from_regno
, from_nregs
, to_regno
, new_regno
;
10514 if (GET_CODE (from
) != REG
)
10516 from_regno
= REGNO (from
);
10517 from_nregs
= (from_regno
< FIRST_PSEUDO_REGISTER
10518 ? HARD_REGNO_NREGS (from_regno
, GET_MODE (from
)) : 1);
10519 if (regno
< from_regno
+ from_nregs
&& regno
+ nregs
> from_regno
)
10521 if (regno
< from_regno
10522 || regno
+ nregs
> from_regno
+ nregs
10523 || GET_CODE (to
) != REG
10526 to_regno
= REGNO (to
);
10527 if (to_regno
< FIRST_PSEUDO_REGISTER
)
10529 new_regno
= regno
+ to_regno
- from_regno
;
10530 if ((unsigned) HARD_REGNO_NREGS (new_regno
, GET_MODE (x
))
10533 result
= gen_rtx_REG (GET_MODE (x
), new_regno
);
10535 else if (GET_MODE (x
) <= GET_MODE (to
))
10536 result
= gen_lowpart_common (GET_MODE (x
), to
);
10538 result
= gen_lowpart_SUBREG (GET_MODE (x
), to
);
10541 return result
? result
: x
;
10543 else if (GET_CODE (x
) == ZERO_EXTEND
)
10545 rtx
new = replace_n_hard_rtx (XEXP (x
, 0), replacements
,
10546 n_replacements
, modify
);
10548 if (GET_CODE (new) == CONST_INT
)
10550 x
= simplify_unary_operation (ZERO_EXTEND
, GET_MODE (x
),
10551 new, GET_MODE (XEXP (x
, 0)));
10561 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
10562 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
10568 new = replace_n_hard_rtx (XEXP (x
, i
), replacements
,
10569 n_replacements
, modify
);
10575 else if (fmt
[i
] == 'E')
10576 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
10578 new = replace_n_hard_rtx (XVECEXP (x
, i
, j
), replacements
,
10579 n_replacements
, modify
);
10583 XVECEXP (x
, i
, j
) = new;
10591 sh_gen_truncate (enum machine_mode mode
, rtx x
, int need_sign_ext
)
10593 enum rtx_code code
= TRUNCATE
;
10595 if (GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
)
10597 rtx inner
= XEXP (x
, 0);
10598 enum machine_mode inner_mode
= GET_MODE (inner
);
10600 if (inner_mode
== mode
)
10602 else if (GET_MODE_SIZE (inner_mode
) >= GET_MODE_SIZE (mode
))
10604 else if (GET_MODE_SIZE (inner_mode
) < GET_MODE_SIZE (mode
)
10605 && (! need_sign_ext
|| GET_CODE (x
) == SIGN_EXTEND
))
10607 code
= GET_CODE (x
);
10611 return gen_rtx_fmt_e (code
, mode
, x
);
10614 /* called via for_each_rtx after reload, to clean up truncates of
10615 registers that span multiple actual hard registers. */
10617 shmedia_cleanup_truncate (rtx
*p
, void *n_changes
)
10621 if (GET_CODE (x
) != TRUNCATE
)
10624 if (GET_MODE_SIZE (GET_MODE (reg
)) > 8 && GET_CODE (reg
) == REG
)
10626 enum machine_mode reg_mode
= GET_MODE (reg
);
10627 XEXP (x
, 0) = simplify_subreg (DImode
, reg
, reg_mode
,
10628 subreg_lowpart_offset (DImode
, reg_mode
));
10629 *(int*) n_changes
+= 1;
10635 /* Load and store depend on the highpart of the address. However,
10636 set_attr_alternative does not give well-defined results before reload,
10637 so we must look at the rtl ourselves to see if any of the feeding
10638 registers is used in a memref. */
10640 /* Called by sh_contains_memref_p via for_each_rtx. */
10642 sh_contains_memref_p_1 (rtx
*loc
, void *data ATTRIBUTE_UNUSED
)
10644 return (GET_CODE (*loc
) == MEM
);
10647 /* Return nonzero iff INSN contains a MEM. */
10649 sh_contains_memref_p (rtx insn
)
10651 return for_each_rtx (&PATTERN (insn
), &sh_contains_memref_p_1
, NULL
);
10654 /* FNADDR is the MEM expression from a call expander. Return an address
10655 to use in an SHmedia insn pattern. */
10657 shmedia_prepare_call_address (rtx fnaddr
, int is_sibcall
)
10661 fnaddr
= XEXP (fnaddr
, 0);
10662 is_sym
= GET_CODE (fnaddr
) == SYMBOL_REF
;
10663 if (flag_pic
&& is_sym
)
10665 if (! SYMBOL_REF_LOCAL_P (fnaddr
))
10667 rtx reg
= gen_reg_rtx (Pmode
);
10669 /* We must not use GOTPLT for sibcalls, because PIC_REG
10670 must be restored before the PLT code gets to run. */
10672 emit_insn (gen_symGOT2reg (reg
, fnaddr
));
10674 emit_insn (gen_symGOTPLT2reg (reg
, fnaddr
));
10679 fnaddr
= gen_sym2PIC (fnaddr
);
10680 PUT_MODE (fnaddr
, Pmode
);
10683 /* If ptabs might trap, make this visible to the rest of the compiler.
10684 We generally assume that symbols pertain to valid locations, but
10685 it is possible to generate invalid symbols with asm or linker tricks.
10686 In a list of functions where each returns its successor, an invalid
10687 symbol might denote an empty list. */
10688 if (!TARGET_PT_FIXED
10689 && (!is_sym
|| TARGET_INVALID_SYMBOLS
)
10690 && (!REG_P (fnaddr
) || ! TARGET_REGISTER_P (REGNO (fnaddr
))))
10692 rtx tr
= gen_reg_rtx (PDImode
);
10694 emit_insn (gen_ptabs (tr
, fnaddr
));
10697 else if (! target_reg_operand (fnaddr
, Pmode
))
10698 fnaddr
= copy_to_mode_reg (Pmode
, fnaddr
);
10702 /* ??? insn-conditions.c contains the insn conditions from sh.md,
10703 but does not include tree.h. This is fixed in 4.2 20060127. */
10705 sh_cfun_trap_exit_p (void)
10707 return (lookup_attribute ("trap_exit",
10708 DECL_ATTRIBUTES (current_function_decl
))
10712 enum sh_divide_strategy_e sh_div_strategy
= SH_DIV_STRATEGY_DEFAULT
;
10714 /* This defines the storage for the variable part of a -mboard= option.
10715 It is only required when using the sh-superh-elf target */
10717 const char * boardtype
= "7750p2";
10718 const char * osruntime
= "bare";