1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999-2025 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
26 #include "coretypes.h"
35 #include "stringpool.h"
41 #include "diagnostic-core.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
48 #include "insn-attr.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
61 #include "tm-constrs.h"
62 #include "sel-sched.h"
68 /* This file should be included last. */
69 #include "target-def.h"
71 /* This is used for communication between ASM_OUTPUT_LABEL and
72 ASM_OUTPUT_LABELREF. */
73 int ia64_asm_output_label
= 0;
75 /* Register names for ia64_expand_prologue. */
76 static const char * const ia64_reg_numbers
[96] =
77 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
78 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
79 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
80 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
81 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
82 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
83 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
84 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
85 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
86 "r104","r105","r106","r107","r108","r109","r110","r111",
87 "r112","r113","r114","r115","r116","r117","r118","r119",
88 "r120","r121","r122","r123","r124","r125","r126","r127"};
90 /* ??? These strings could be shared with REGISTER_NAMES. */
91 static const char * const ia64_input_reg_names
[8] =
92 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
94 /* ??? These strings could be shared with REGISTER_NAMES. */
95 static const char * const ia64_local_reg_names
[80] =
96 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
97 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
98 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
99 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
100 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
101 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
102 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
103 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
104 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
105 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
107 /* ??? These strings could be shared with REGISTER_NAMES. */
108 static const char * const ia64_output_reg_names
[8] =
109 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
111 /* Variables which are this size or smaller are put in the sdata/sbss
114 unsigned int ia64_section_threshold
;
116 /* The following variable is used by the DFA insn scheduler. The value is
117 TRUE if we do insn bundling instead of insn scheduling. */
129 number_of_ia64_frame_regs
132 /* Structure to be filled in by ia64_compute_frame_size with register
133 save masks and offsets for the current function. */
135 struct ia64_frame_info
137 HOST_WIDE_INT total_size
; /* size of the stack frame, not including
138 the caller's scratch area. */
139 HOST_WIDE_INT spill_cfa_off
; /* top of the reg spill area from the cfa. */
140 HOST_WIDE_INT spill_size
; /* size of the gr/br/fr spill area. */
141 HOST_WIDE_INT extra_spill_size
; /* size of spill area for others. */
142 HARD_REG_SET mask
; /* mask of saved registers. */
143 unsigned int gr_used_mask
; /* mask of registers in use as gr spill
144 registers or long-term scratches. */
145 int n_spilled
; /* number of spilled registers. */
146 int r
[number_of_ia64_frame_regs
]; /* Frame related registers. */
147 int n_input_regs
; /* number of input registers used. */
148 int n_local_regs
; /* number of local registers used. */
149 int n_output_regs
; /* number of output registers used. */
150 int n_rotate_regs
; /* number of rotating registers used. */
152 char need_regstk
; /* true if a .regstk directive needed. */
153 char initialized
; /* true if the data is finalized. */
156 /* Current frame information calculated by ia64_compute_frame_size. */
157 static struct ia64_frame_info current_frame_info
;
158 /* The actual registers that are emitted. */
159 static int emitted_frame_related_regs
[number_of_ia64_frame_regs
];
161 static int ia64_first_cycle_multipass_dfa_lookahead (void);
162 static void ia64_dependencies_evaluation_hook (rtx_insn
*, rtx_insn
*);
163 static void ia64_init_dfa_pre_cycle_insn (void);
164 static rtx
ia64_dfa_pre_cycle_insn (void);
165 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
166 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn
*, int, int, int *);
167 static void ia64_h_i_d_extended (void);
168 static void * ia64_alloc_sched_context (void);
169 static void ia64_init_sched_context (void *, bool);
170 static void ia64_set_sched_context (void *);
171 static void ia64_clear_sched_context (void *);
172 static void ia64_free_sched_context (void *);
173 static int ia64_mode_to_int (machine_mode
);
174 static void ia64_set_sched_flags (spec_info_t
);
175 static ds_t
ia64_get_insn_spec_ds (rtx_insn
*);
176 static ds_t
ia64_get_insn_checked_ds (rtx_insn
*);
177 static bool ia64_skip_rtx_p (const_rtx
);
178 static int ia64_speculate_insn (rtx_insn
*, ds_t
, rtx
*);
179 static bool ia64_needs_block_p (ds_t
);
180 static rtx
ia64_gen_spec_check (rtx_insn
*, rtx_insn
*, ds_t
);
181 static int ia64_spec_check_p (rtx
);
182 static int ia64_spec_check_src_p (rtx
);
183 static rtx
gen_tls_get_addr (void);
184 static rtx
gen_thread_pointer (void);
185 static int find_gr_spill (enum ia64_frame_regs
, int);
186 static int next_scratch_gr_reg (void);
187 static void mark_reg_gr_used_mask (rtx
, void *);
188 static void ia64_compute_frame_size (HOST_WIDE_INT
);
189 static void setup_spill_pointers (int, rtx
, HOST_WIDE_INT
);
190 static void finish_spill_pointers (void);
191 static rtx
spill_restore_mem (rtx
, HOST_WIDE_INT
);
192 static void do_spill (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
, rtx
);
193 static void do_restore (rtx (*)(rtx
, rtx
, rtx
), rtx
, HOST_WIDE_INT
);
194 static rtx
gen_movdi_x (rtx
, rtx
, rtx
);
195 static rtx
gen_fr_spill_x (rtx
, rtx
, rtx
);
196 static rtx
gen_fr_restore_x (rtx
, rtx
, rtx
);
198 static void ia64_option_override (void);
199 static bool ia64_can_eliminate (const int, const int);
200 static machine_mode
hfa_element_mode (const_tree
, bool);
201 static void ia64_setup_incoming_varargs (cumulative_args_t
,
202 const function_arg_info
&,
204 static int ia64_arg_partial_bytes (cumulative_args_t
,
205 const function_arg_info
&);
206 static rtx
ia64_function_arg (cumulative_args_t
, const function_arg_info
&);
207 static rtx
ia64_function_incoming_arg (cumulative_args_t
,
208 const function_arg_info
&);
209 static void ia64_function_arg_advance (cumulative_args_t
,
210 const function_arg_info
&);
211 static pad_direction
ia64_function_arg_padding (machine_mode
, const_tree
);
212 static unsigned int ia64_function_arg_boundary (machine_mode
,
214 static bool ia64_function_ok_for_sibcall (tree
, tree
);
215 static bool ia64_return_in_memory (const_tree
, const_tree
);
216 static rtx
ia64_function_value (const_tree
, const_tree
, bool);
217 static rtx
ia64_libcall_value (machine_mode
, const_rtx
);
218 static bool ia64_function_value_regno_p (const unsigned int);
219 static int ia64_register_move_cost (machine_mode
, reg_class_t
,
221 static int ia64_memory_move_cost (machine_mode mode
, reg_class_t
,
223 static bool ia64_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
224 static int ia64_unspec_may_trap_p (const_rtx
, unsigned);
225 static void fix_range (const char *);
226 static struct machine_function
* ia64_init_machine_status (void);
227 static void emit_insn_group_barriers (FILE *);
228 static void emit_all_insn_group_barriers (FILE *);
229 static void final_emit_insn_group_barriers (FILE *);
230 static void emit_predicate_relation_info (void);
231 static void ia64_reorg (void);
232 static bool ia64_in_small_data_p (const_tree
);
233 static void process_epilogue (FILE *, rtx
, bool, bool);
235 static bool ia64_assemble_integer (rtx
, unsigned int, int);
236 static void ia64_output_function_prologue (FILE *);
237 static void ia64_output_function_epilogue (FILE *);
238 static void ia64_output_function_end_prologue (FILE *);
240 static void ia64_print_operand (FILE *, rtx
, int);
241 static void ia64_print_operand_address (FILE *, machine_mode
, rtx
);
242 static bool ia64_print_operand_punct_valid_p (unsigned char code
);
244 static int ia64_issue_rate (void);
245 static int ia64_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, dw_t
);
246 static void ia64_sched_init (FILE *, int, int);
247 static void ia64_sched_init_global (FILE *, int, int);
248 static void ia64_sched_finish_global (FILE *, int);
249 static void ia64_sched_finish (FILE *, int);
250 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn
**, int *, int, int);
251 static int ia64_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
252 static int ia64_sched_reorder2 (FILE *, int, rtx_insn
**, int *, int);
253 static int ia64_variable_issue (FILE *, int, rtx_insn
*, int);
255 static void ia64_asm_unwind_emit (FILE *, rtx_insn
*);
256 static void ia64_asm_emit_except_personality (rtx
);
257 static void ia64_asm_init_sections (void);
259 static enum unwind_info_type
ia64_debug_unwind_info (void);
261 static struct bundle_state
*get_free_bundle_state (void);
262 static void free_bundle_state (struct bundle_state
*);
263 static void initiate_bundle_states (void);
264 static void finish_bundle_states (void);
265 static int insert_bundle_state (struct bundle_state
*);
266 static void initiate_bundle_state_table (void);
267 static void finish_bundle_state_table (void);
268 static int try_issue_nops (struct bundle_state
*, int);
269 static int try_issue_insn (struct bundle_state
*, rtx
);
270 static void issue_nops_and_insn (struct bundle_state
*, int, rtx_insn
*,
272 static int get_max_pos (state_t
);
273 static int get_template (state_t
, int);
275 static rtx_insn
*get_next_important_insn (rtx_insn
*, rtx_insn
*);
276 static bool important_for_bundling_p (rtx_insn
*);
277 static bool unknown_for_bundling_p (rtx_insn
*);
278 static void bundling (FILE *, int, rtx_insn
*, rtx_insn
*);
280 static void ia64_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
281 HOST_WIDE_INT
, tree
);
282 static void ia64_file_start (void);
283 static void ia64_globalize_decl_name (FILE *, tree
);
285 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
286 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED
;
287 static section
*ia64_select_rtx_section (machine_mode
, rtx
,
288 unsigned HOST_WIDE_INT
);
289 static void ia64_output_dwarf_dtprel (FILE *, int, rtx
)
291 static unsigned int ia64_section_type_flags (tree
, const char *, int);
292 static void ia64_init_libfuncs (void)
294 static void ia64_hpux_init_libfuncs (void)
296 static void ia64_sysv4_init_libfuncs (void)
298 static void ia64_vms_init_libfuncs (void)
300 static void ia64_soft_fp_init_libfuncs (void)
302 static bool ia64_vms_valid_pointer_mode (scalar_int_mode mode
)
304 static tree
ia64_vms_common_object_attribute (tree
*, tree
, tree
, int, bool *)
307 static bool ia64_attribute_takes_identifier_p (const_tree
);
308 static tree
ia64_handle_model_attribute (tree
*, tree
, tree
, int, bool *);
309 static tree
ia64_handle_version_id_attribute (tree
*, tree
, tree
, int, bool *);
310 static void ia64_encode_section_info (tree
, rtx
, int);
311 static rtx
ia64_struct_value_rtx (tree
, int);
312 static tree
ia64_gimplify_va_arg (tree
, tree
, gimple_seq
*, gimple_seq
*);
313 static bool ia64_scalar_mode_supported_p (scalar_mode mode
);
314 static bool ia64_vector_mode_supported_p (machine_mode mode
);
315 static bool ia64_legitimate_constant_p (machine_mode
, rtx
);
316 static bool ia64_legitimate_address_p (machine_mode
, rtx
, bool,
317 code_helper
= ERROR_MARK
);
318 static bool ia64_cannot_force_const_mem (machine_mode
, rtx
);
319 static const char *ia64_mangle_type (const_tree
);
320 static const char *ia64_invalid_conversion (const_tree
, const_tree
);
321 static const char *ia64_invalid_unary_op (int, const_tree
);
322 static const char *ia64_invalid_binary_op (int, const_tree
, const_tree
);
323 static machine_mode
ia64_c_mode_for_suffix (char);
324 static machine_mode
ia64_c_mode_for_floating_type (enum tree_index
);
325 static void ia64_trampoline_init (rtx
, tree
, rtx
);
326 static void ia64_override_options_after_change (void);
327 static bool ia64_member_type_forces_blk (const_tree
, machine_mode
);
329 static tree
ia64_fold_builtin (tree
, int, tree
*, bool);
330 static tree
ia64_builtin_decl (unsigned, bool);
332 static reg_class_t
ia64_preferred_reload_class (rtx
, reg_class_t
);
333 static fixed_size_mode
ia64_get_reg_raw_mode (int regno
);
334 static section
* ia64_hpux_function_section (tree
, enum node_frequency
,
337 static bool ia64_vectorize_vec_perm_const (machine_mode
, machine_mode
, rtx
,
338 rtx
, rtx
, const vec_perm_indices
&);
340 static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode
);
341 static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode
);
342 static bool ia64_modes_tieable_p (machine_mode
, machine_mode
);
343 static bool ia64_can_change_mode_class (machine_mode
, machine_mode
,
346 #define MAX_VECT_LEN 8
348 struct expand_vec_perm_d
350 rtx target
, op0
, op1
;
351 unsigned char perm
[MAX_VECT_LEN
];
358 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
);
361 /* Table of valid machine attributes. */
362 static const attribute_spec ia64_gnu_attributes
[] =
364 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
365 affects_type_identity, handler, exclude } */
366 { "syscall_linkage", 0, 0, false, true, true, false, NULL
, NULL
},
367 { "model", 1, 1, true, false, false, false,
368 ia64_handle_model_attribute
, NULL
},
369 #if TARGET_ABI_OPEN_VMS
370 { "common_object", 1, 1, true, false, false, false,
371 ia64_vms_common_object_attribute
, NULL
},
373 { "version_id", 1, 1, true, false, false, false,
374 ia64_handle_version_id_attribute
, NULL
}
377 static const scoped_attribute_specs ia64_gnu_attribute_table
=
379 "gnu", { ia64_gnu_attributes
}
382 static const scoped_attribute_specs
*const ia64_attribute_table
[] =
384 &ia64_gnu_attribute_table
387 /* Initialize the GCC target structure. */
388 #undef TARGET_ATTRIBUTE_TABLE
389 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
391 #undef TARGET_INIT_BUILTINS
392 #define TARGET_INIT_BUILTINS ia64_init_builtins
394 #undef TARGET_FOLD_BUILTIN
395 #define TARGET_FOLD_BUILTIN ia64_fold_builtin
397 #undef TARGET_EXPAND_BUILTIN
398 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
400 #undef TARGET_BUILTIN_DECL
401 #define TARGET_BUILTIN_DECL ia64_builtin_decl
403 #undef TARGET_ASM_BYTE_OP
404 #define TARGET_ASM_BYTE_OP "\tdata1\t"
405 #undef TARGET_ASM_ALIGNED_HI_OP
406 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
407 #undef TARGET_ASM_ALIGNED_SI_OP
408 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
409 #undef TARGET_ASM_ALIGNED_DI_OP
410 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
411 #undef TARGET_ASM_UNALIGNED_HI_OP
412 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
413 #undef TARGET_ASM_UNALIGNED_SI_OP
414 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
415 #undef TARGET_ASM_UNALIGNED_DI_OP
416 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
417 #undef TARGET_ASM_INTEGER
418 #define TARGET_ASM_INTEGER ia64_assemble_integer
420 #undef TARGET_OPTION_OVERRIDE
421 #define TARGET_OPTION_OVERRIDE ia64_option_override
423 #undef TARGET_ASM_FUNCTION_PROLOGUE
424 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
425 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
426 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
427 #undef TARGET_ASM_FUNCTION_EPILOGUE
428 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
430 #undef TARGET_PRINT_OPERAND
431 #define TARGET_PRINT_OPERAND ia64_print_operand
432 #undef TARGET_PRINT_OPERAND_ADDRESS
433 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
434 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
435 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
437 #undef TARGET_IN_SMALL_DATA_P
438 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
440 #undef TARGET_SCHED_ADJUST_COST
441 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
442 #undef TARGET_SCHED_ISSUE_RATE
443 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
444 #undef TARGET_SCHED_VARIABLE_ISSUE
445 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
446 #undef TARGET_SCHED_INIT
447 #define TARGET_SCHED_INIT ia64_sched_init
448 #undef TARGET_SCHED_FINISH
449 #define TARGET_SCHED_FINISH ia64_sched_finish
450 #undef TARGET_SCHED_INIT_GLOBAL
451 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
452 #undef TARGET_SCHED_FINISH_GLOBAL
453 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
454 #undef TARGET_SCHED_REORDER
455 #define TARGET_SCHED_REORDER ia64_sched_reorder
456 #undef TARGET_SCHED_REORDER2
457 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
459 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
460 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
462 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
463 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
465 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
466 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
467 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
468 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
470 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
471 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
472 ia64_first_cycle_multipass_dfa_lookahead_guard
474 #undef TARGET_SCHED_DFA_NEW_CYCLE
475 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
477 #undef TARGET_SCHED_H_I_D_EXTENDED
478 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
480 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
481 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
483 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
484 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
486 #undef TARGET_SCHED_SET_SCHED_CONTEXT
487 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
489 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
490 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
492 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
493 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
495 #undef TARGET_SCHED_SET_SCHED_FLAGS
496 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
498 #undef TARGET_SCHED_GET_INSN_SPEC_DS
499 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
501 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
502 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
504 #undef TARGET_SCHED_SPECULATE_INSN
505 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
507 #undef TARGET_SCHED_NEEDS_BLOCK_P
508 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
510 #undef TARGET_SCHED_GEN_SPEC_CHECK
511 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
513 #undef TARGET_SCHED_SKIP_RTX_P
514 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
516 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
517 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
518 #undef TARGET_ARG_PARTIAL_BYTES
519 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
520 #undef TARGET_FUNCTION_ARG
521 #define TARGET_FUNCTION_ARG ia64_function_arg
522 #undef TARGET_FUNCTION_INCOMING_ARG
523 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
524 #undef TARGET_FUNCTION_ARG_ADVANCE
525 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
526 #undef TARGET_FUNCTION_ARG_PADDING
527 #define TARGET_FUNCTION_ARG_PADDING ia64_function_arg_padding
528 #undef TARGET_FUNCTION_ARG_BOUNDARY
529 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
531 #undef TARGET_ASM_OUTPUT_MI_THUNK
532 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
533 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
534 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
536 #undef TARGET_ASM_FILE_START
537 #define TARGET_ASM_FILE_START ia64_file_start
539 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
540 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
542 #undef TARGET_REGISTER_MOVE_COST
543 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
544 #undef TARGET_MEMORY_MOVE_COST
545 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
546 #undef TARGET_RTX_COSTS
547 #define TARGET_RTX_COSTS ia64_rtx_costs
548 #undef TARGET_ADDRESS_COST
549 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
551 #undef TARGET_UNSPEC_MAY_TRAP_P
552 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
554 #undef TARGET_MACHINE_DEPENDENT_REORG
555 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
557 #undef TARGET_ENCODE_SECTION_INFO
558 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
560 #undef TARGET_SECTION_TYPE_FLAGS
561 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
564 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
565 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
568 /* ??? Investigate. */
570 #undef TARGET_PROMOTE_PROTOTYPES
571 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
574 #undef TARGET_FUNCTION_VALUE
575 #define TARGET_FUNCTION_VALUE ia64_function_value
576 #undef TARGET_LIBCALL_VALUE
577 #define TARGET_LIBCALL_VALUE ia64_libcall_value
578 #undef TARGET_FUNCTION_VALUE_REGNO_P
579 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
581 #undef TARGET_STRUCT_VALUE_RTX
582 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
583 #undef TARGET_RETURN_IN_MEMORY
584 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
585 #undef TARGET_SETUP_INCOMING_VARARGS
586 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
587 #undef TARGET_STRICT_ARGUMENT_NAMING
588 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
589 #undef TARGET_MUST_PASS_IN_STACK
590 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
591 #undef TARGET_GET_RAW_RESULT_MODE
592 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
593 #undef TARGET_GET_RAW_ARG_MODE
594 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
596 #undef TARGET_MEMBER_TYPE_FORCES_BLK
597 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
599 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
600 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
602 #undef TARGET_ASM_UNWIND_EMIT
603 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
604 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
605 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
606 #undef TARGET_ASM_INIT_SECTIONS
607 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
609 #undef TARGET_DEBUG_UNWIND_INFO
610 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
612 #undef TARGET_SCALAR_MODE_SUPPORTED_P
613 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
614 #undef TARGET_VECTOR_MODE_SUPPORTED_P
615 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
617 #undef TARGET_LEGITIMATE_CONSTANT_P
618 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
619 #undef TARGET_LEGITIMATE_ADDRESS_P
620 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
622 #undef TARGET_CANNOT_FORCE_CONST_MEM
623 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
625 #undef TARGET_MANGLE_TYPE
626 #define TARGET_MANGLE_TYPE ia64_mangle_type
628 #undef TARGET_INVALID_CONVERSION
629 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
630 #undef TARGET_INVALID_UNARY_OP
631 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
632 #undef TARGET_INVALID_BINARY_OP
633 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
635 #undef TARGET_C_MODE_FOR_SUFFIX
636 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
638 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
639 #define TARGET_C_MODE_FOR_FLOATING_TYPE ia64_c_mode_for_floating_type
641 #undef TARGET_CAN_ELIMINATE
642 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
644 #undef TARGET_TRAMPOLINE_INIT
645 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
647 #undef TARGET_CAN_USE_DOLOOP_P
648 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
649 #undef TARGET_INVALID_WITHIN_DOLOOP
650 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
652 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
653 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
655 #undef TARGET_PREFERRED_RELOAD_CLASS
656 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
658 #undef TARGET_DELAY_SCHED2
659 #define TARGET_DELAY_SCHED2 true
661 /* Variable tracking should be run after all optimizations which
662 change order of insns. It also needs a valid CFG. */
663 #undef TARGET_DELAY_VARTRACK
664 #define TARGET_DELAY_VARTRACK true
666 #undef TARGET_VECTORIZE_VEC_PERM_CONST
667 #define TARGET_VECTORIZE_VEC_PERM_CONST ia64_vectorize_vec_perm_const
669 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
670 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
672 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
673 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
675 #undef TARGET_HARD_REGNO_NREGS
676 #define TARGET_HARD_REGNO_NREGS ia64_hard_regno_nregs
677 #undef TARGET_HARD_REGNO_MODE_OK
678 #define TARGET_HARD_REGNO_MODE_OK ia64_hard_regno_mode_ok
680 #undef TARGET_MODES_TIEABLE_P
681 #define TARGET_MODES_TIEABLE_P ia64_modes_tieable_p
683 #undef TARGET_CAN_CHANGE_MODE_CLASS
684 #define TARGET_CAN_CHANGE_MODE_CLASS ia64_can_change_mode_class
686 #undef TARGET_CONSTANT_ALIGNMENT
687 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
689 #undef TARGET_DOCUMENTATION_NAME
690 #define TARGET_DOCUMENTATION_NAME "IA-64"
692 struct gcc_target targetm
= TARGET_INITIALIZER
;
694 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
695 identifier as an argument, so the front end shouldn't look it up. */
698 ia64_attribute_takes_identifier_p (const_tree attr_id
)
700 if (is_attribute_p ("model", attr_id
))
702 #if TARGET_ABI_OPEN_VMS
703 if (is_attribute_p ("common_object", attr_id
))
711 ADDR_AREA_NORMAL
, /* normal address area */
712 ADDR_AREA_SMALL
/* addressable by "addl" (-2MB < addr < 2MB) */
716 static GTY(()) tree small_ident1
;
717 static GTY(()) tree small_ident2
;
722 if (small_ident1
== 0)
724 small_ident1
= get_identifier ("small");
725 small_ident2
= get_identifier ("__small__");
729 /* Retrieve the address area that has been chosen for the given decl. */
731 static ia64_addr_area
732 ia64_get_addr_area (tree decl
)
736 model_attr
= lookup_attribute ("model", DECL_ATTRIBUTES (decl
));
742 id
= TREE_VALUE (TREE_VALUE (model_attr
));
743 if (id
== small_ident1
|| id
== small_ident2
)
744 return ADDR_AREA_SMALL
;
746 return ADDR_AREA_NORMAL
;
750 ia64_handle_model_attribute (tree
*node
, tree name
, tree args
,
751 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
753 ia64_addr_area addr_area
= ADDR_AREA_NORMAL
;
755 tree arg
, decl
= *node
;
758 arg
= TREE_VALUE (args
);
759 if (arg
== small_ident1
|| arg
== small_ident2
)
761 addr_area
= ADDR_AREA_SMALL
;
765 warning (OPT_Wattributes
, "invalid argument of %qE attribute",
767 *no_add_attrs
= true;
770 switch (TREE_CODE (decl
))
773 if ((DECL_CONTEXT (decl
) && TREE_CODE (DECL_CONTEXT (decl
))
775 && !TREE_STATIC (decl
))
777 error_at (DECL_SOURCE_LOCATION (decl
),
778 "an address area attribute cannot be specified for "
780 *no_add_attrs
= true;
782 area
= ia64_get_addr_area (decl
);
783 if (area
!= ADDR_AREA_NORMAL
&& addr_area
!= area
)
785 error ("address area of %q+D conflicts with previous "
786 "declaration", decl
);
787 *no_add_attrs
= true;
792 error_at (DECL_SOURCE_LOCATION (decl
),
793 "address area attribute cannot be specified for "
795 *no_add_attrs
= true;
799 warning (OPT_Wattributes
, "%qE attribute ignored",
801 *no_add_attrs
= true;
808 /* Part of the low level implementation of DEC Ada pragma Common_Object which
809 enables the shared use of variables stored in overlaid linker areas
810 corresponding to the use of Fortran COMMON. */
813 ia64_vms_common_object_attribute (tree
*node
, tree name
, tree args
,
814 int flags ATTRIBUTE_UNUSED
,
820 gcc_assert (DECL_P (decl
));
822 DECL_COMMON (decl
) = 1;
823 id
= TREE_VALUE (args
);
824 if (TREE_CODE (id
) != IDENTIFIER_NODE
&& TREE_CODE (id
) != STRING_CST
)
826 error ("%qE attribute requires a string constant argument", name
);
827 *no_add_attrs
= true;
833 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
836 ia64_vms_output_aligned_decl_common (FILE *file
, tree decl
, const char *name
,
837 unsigned HOST_WIDE_INT size
,
840 tree attr
= DECL_ATTRIBUTES (decl
);
843 attr
= lookup_attribute ("common_object", attr
);
846 tree id
= TREE_VALUE (TREE_VALUE (attr
));
849 if (TREE_CODE (id
) == IDENTIFIER_NODE
)
850 name
= IDENTIFIER_POINTER (id
);
851 else if (TREE_CODE (id
) == STRING_CST
)
852 name
= TREE_STRING_POINTER (id
);
856 fprintf (file
, "\t.vms_common\t\"%s\",", name
);
859 fprintf (file
, "%s", COMMON_ASM_OP
);
861 /* Code from elfos.h. */
862 assemble_name (file
, name
);
863 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u",
864 size
, align
/ BITS_PER_UNIT
);
870 ia64_encode_addr_area (tree decl
, rtx symbol
)
874 flags
= SYMBOL_REF_FLAGS (symbol
);
875 switch (ia64_get_addr_area (decl
))
877 case ADDR_AREA_NORMAL
: break;
878 case ADDR_AREA_SMALL
: flags
|= SYMBOL_FLAG_SMALL_ADDR
; break;
879 default: gcc_unreachable ();
881 SYMBOL_REF_FLAGS (symbol
) = flags
;
885 ia64_encode_section_info (tree decl
, rtx rtl
, int first
)
887 default_encode_section_info (decl
, rtl
, first
);
889 /* Careful not to prod global register variables. */
891 && GET_CODE (DECL_RTL (decl
)) == MEM
892 && GET_CODE (XEXP (DECL_RTL (decl
), 0)) == SYMBOL_REF
893 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
)))
894 ia64_encode_addr_area (decl
, XEXP (rtl
, 0));
897 /* Return 1 if the operands of a move are ok. */
900 ia64_move_ok (rtx dst
, rtx src
)
902 /* If we're under init_recog_no_volatile, we'll not be able to use
903 memory_operand. So check the code directly and don't worry about
904 the validity of the underlying address, which should have been
905 checked elsewhere anyway. */
906 if (GET_CODE (dst
) != MEM
)
908 if (GET_CODE (src
) == MEM
)
910 if (register_operand (src
, VOIDmode
))
913 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
914 if (INTEGRAL_MODE_P (GET_MODE (dst
)))
915 return src
== const0_rtx
;
917 return satisfies_constraint_G (src
);
920 /* Return 1 if the operands are ok for a floating point load pair. */
923 ia64_load_pair_ok (rtx dst
, rtx src
)
925 /* ??? There is a thinko in the implementation of the "x" constraint and the
926 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
927 also return false for it. */
928 if (GET_CODE (dst
) != REG
929 || !(FP_REGNO_P (REGNO (dst
)) && FP_REGNO_P (REGNO (dst
) + 1)))
931 if (GET_CODE (src
) != MEM
|| MEM_VOLATILE_P (src
))
933 switch (GET_CODE (XEXP (src
, 0)))
942 rtx adjust
= XEXP (XEXP (XEXP (src
, 0), 1), 1);
944 if (GET_CODE (adjust
) != CONST_INT
945 || INTVAL (adjust
) != GET_MODE_SIZE (GET_MODE (src
)))
956 addp4_optimize_ok (rtx op1
, rtx op2
)
958 return (basereg_operand (op1
, GET_MODE(op1
)) !=
959 basereg_operand (op2
, GET_MODE(op2
)));
962 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
963 Return the length of the field, or <= 0 on failure. */
966 ia64_depz_field_mask (rtx rop
, rtx rshift
)
968 unsigned HOST_WIDE_INT op
= INTVAL (rop
);
969 unsigned HOST_WIDE_INT shift
= INTVAL (rshift
);
971 /* Get rid of the zero bits we're shifting in. */
974 /* We must now have a solid block of 1's at bit 0. */
975 return exact_log2 (op
+ 1);
978 /* Return the TLS model to use for ADDR. */
980 static enum tls_model
981 tls_symbolic_operand_type (rtx addr
)
983 enum tls_model tls_kind
= TLS_MODEL_NONE
;
985 if (GET_CODE (addr
) == CONST
)
987 if (GET_CODE (XEXP (addr
, 0)) == PLUS
988 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
)
989 tls_kind
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr
, 0), 0));
991 else if (GET_CODE (addr
) == SYMBOL_REF
)
992 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
997 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
998 as a base register. */
1001 ia64_reg_ok_for_base_p (const_rtx reg
, bool strict
)
1004 && REGNO_OK_FOR_BASE_P (REGNO (reg
)))
1007 && (GENERAL_REGNO_P (REGNO (reg
))
1008 || !HARD_REGISTER_P (reg
)))
1015 ia64_legitimate_address_reg (const_rtx reg
, bool strict
)
1017 if ((REG_P (reg
) && ia64_reg_ok_for_base_p (reg
, strict
))
1018 || (GET_CODE (reg
) == SUBREG
&& REG_P (XEXP (reg
, 0))
1019 && ia64_reg_ok_for_base_p (XEXP (reg
, 0), strict
)))
1026 ia64_legitimate_address_disp (const_rtx reg
, const_rtx disp
, bool strict
)
1028 if (GET_CODE (disp
) == PLUS
1029 && rtx_equal_p (reg
, XEXP (disp
, 0))
1030 && (ia64_legitimate_address_reg (XEXP (disp
, 1), strict
)
1031 || (CONST_INT_P (XEXP (disp
, 1))
1032 && IN_RANGE (INTVAL (XEXP (disp
, 1)), -256, 255))))
1038 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1041 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
,
1042 bool strict
, code_helper
)
1044 if (ia64_legitimate_address_reg (x
, strict
))
1046 else if ((GET_CODE (x
) == POST_INC
|| GET_CODE (x
) == POST_DEC
)
1047 && ia64_legitimate_address_reg (XEXP (x
, 0), strict
)
1048 && XEXP (x
, 0) != arg_pointer_rtx
)
1050 else if (GET_CODE (x
) == POST_MODIFY
1051 && ia64_legitimate_address_reg (XEXP (x
, 0), strict
)
1052 && XEXP (x
, 0) != arg_pointer_rtx
1053 && ia64_legitimate_address_disp (XEXP (x
, 0), XEXP (x
, 1), strict
))
1059 /* Return true if X is a constant that is valid for some immediate
1060 field in an instruction. */
1063 ia64_legitimate_constant_p (machine_mode mode
, rtx x
)
1065 switch (GET_CODE (x
))
1072 if (GET_MODE (x
) == VOIDmode
|| mode
== SFmode
|| mode
== DFmode
)
1074 return satisfies_constraint_G (x
);
1078 /* ??? Short term workaround for PR 28490. We must make the code here
1079 match the code in ia64_expand_move and move_operand, even though they
1080 are both technically wrong. */
1081 if (tls_symbolic_operand_type (x
) == 0)
1083 HOST_WIDE_INT addend
= 0;
1086 if (GET_CODE (op
) == CONST
1087 && GET_CODE (XEXP (op
, 0)) == PLUS
1088 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
1090 addend
= INTVAL (XEXP (XEXP (op
, 0), 1));
1091 op
= XEXP (XEXP (op
, 0), 0);
1094 if (any_offset_symbol_operand (op
, mode
)
1095 || function_operand (op
, mode
))
1097 if (aligned_offset_symbol_operand (op
, mode
))
1098 return (addend
& 0x3fff) == 0;
1104 if (mode
== V2SFmode
)
1105 return satisfies_constraint_Y (x
);
1107 return (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1108 && GET_MODE_SIZE (mode
) <= 8);
1115 /* Don't allow TLS addresses to get spilled to memory. */
1118 ia64_cannot_force_const_mem (machine_mode mode
, rtx x
)
1122 return tls_symbolic_operand_type (x
) != 0;
1125 /* Expand a symbolic constant load. */
1128 ia64_expand_load_address (rtx dest
, rtx src
)
1130 gcc_assert (GET_CODE (dest
) == REG
);
1132 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1133 having to pointer-extend the value afterward. Other forms of address
1134 computation below are also more natural to compute as 64-bit quantities.
1135 If we've been given an SImode destination register, change it. */
1136 if (GET_MODE (dest
) != Pmode
)
1137 dest
= gen_rtx_REG_offset (dest
, Pmode
, REGNO (dest
),
1138 byte_lowpart_offset (Pmode
, GET_MODE (dest
)));
1142 if (small_addr_symbolic_operand (src
, VOIDmode
))
1145 if (TARGET_AUTO_PIC
)
1146 emit_insn (gen_load_gprel64 (dest
, src
));
1147 else if (GET_CODE (src
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (src
))
1148 emit_insn (gen_load_fptr (dest
, src
));
1149 else if (sdata_symbolic_operand (src
, VOIDmode
))
1150 emit_insn (gen_load_gprel (dest
, src
));
1151 else if (local_symbolic_operand64 (src
, VOIDmode
))
1153 /* We want to use @gprel rather than @ltoff relocations for local
1155 - @gprel does not require dynamic linker
1156 - and does not use .sdata section
1157 https://gcc.gnu.org/bugzilla/60465 */
1158 emit_insn (gen_load_gprel64 (dest
, src
));
1162 HOST_WIDE_INT addend
= 0;
1165 /* We did split constant offsets in ia64_expand_move, and we did try
1166 to keep them split in move_operand, but we also allowed reload to
1167 rematerialize arbitrary constants rather than spill the value to
1168 the stack and reload it. So we have to be prepared here to split
1169 them apart again. */
1170 if (GET_CODE (src
) == CONST
)
1172 HOST_WIDE_INT hi
, lo
;
1174 hi
= INTVAL (XEXP (XEXP (src
, 0), 1));
1175 lo
= ((hi
& 0x3fff) ^ 0x2000) - 0x2000;
1181 src
= plus_constant (Pmode
, XEXP (XEXP (src
, 0), 0), hi
);
1185 tmp
= gen_rtx_HIGH (Pmode
, src
);
1186 tmp
= gen_rtx_PLUS (Pmode
, tmp
, pic_offset_table_rtx
);
1187 emit_insn (gen_rtx_SET (dest
, tmp
));
1189 tmp
= gen_rtx_LO_SUM (Pmode
, gen_const_mem (Pmode
, dest
), src
);
1190 emit_insn (gen_rtx_SET (dest
, tmp
));
1194 tmp
= gen_rtx_PLUS (Pmode
, dest
, GEN_INT (addend
));
1195 emit_insn (gen_rtx_SET (dest
, tmp
));
1202 static GTY(()) rtx gen_tls_tga
;
1204 gen_tls_get_addr (void)
1207 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
1211 static GTY(()) rtx thread_pointer_rtx
;
1213 gen_thread_pointer (void)
1215 if (!thread_pointer_rtx
)
1216 thread_pointer_rtx
= gen_rtx_REG (Pmode
, 13);
1217 return thread_pointer_rtx
;
1221 ia64_expand_tls_address (enum tls_model tls_kind
, rtx op0
, rtx op1
,
1222 rtx orig_op1
, HOST_WIDE_INT addend
)
1224 rtx tga_op1
, tga_op2
, tga_ret
, tga_eqv
, tmp
;
1227 HOST_WIDE_INT addend_lo
, addend_hi
;
1231 case TLS_MODEL_GLOBAL_DYNAMIC
:
1234 tga_op1
= gen_reg_rtx (Pmode
);
1235 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1237 tga_op2
= gen_reg_rtx (Pmode
);
1238 emit_insn (gen_load_dtprel (tga_op2
, op1
));
1240 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1242 tga_op1
, Pmode
, tga_op2
, Pmode
);
1244 insns
= get_insns ();
1247 if (GET_MODE (op0
) != Pmode
)
1249 emit_libcall_block (insns
, op0
, tga_ret
, op1
);
1252 case TLS_MODEL_LOCAL_DYNAMIC
:
1253 /* ??? This isn't the completely proper way to do local-dynamic
1254 If the call to __tls_get_addr is used only by a single symbol,
1255 then we should (somehow) move the dtprel to the second arg
1256 to avoid the extra add. */
1259 tga_op1
= gen_reg_rtx (Pmode
);
1260 emit_insn (gen_load_dtpmod (tga_op1
, op1
));
1262 tga_op2
= const0_rtx
;
1264 tga_ret
= emit_library_call_value (gen_tls_get_addr (), NULL_RTX
,
1266 tga_op1
, Pmode
, tga_op2
, Pmode
);
1268 insns
= get_insns ();
1271 tga_eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
1273 tmp
= gen_reg_rtx (Pmode
);
1274 emit_libcall_block (insns
, tmp
, tga_ret
, tga_eqv
);
1276 if (!register_operand (op0
, Pmode
))
1277 op0
= gen_reg_rtx (Pmode
);
1280 emit_insn (gen_load_dtprel (op0
, op1
));
1281 emit_insn (gen_adddi3 (op0
, tmp
, op0
));
1284 emit_insn (gen_add_dtprel (op0
, op1
, tmp
));
1287 case TLS_MODEL_INITIAL_EXEC
:
1288 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1289 addend_hi
= addend
- addend_lo
;
1291 op1
= plus_constant (Pmode
, op1
, addend_hi
);
1294 tmp
= gen_reg_rtx (Pmode
);
1295 emit_insn (gen_load_tprel (tmp
, op1
));
1297 if (!register_operand (op0
, Pmode
))
1298 op0
= gen_reg_rtx (Pmode
);
1299 emit_insn (gen_adddi3 (op0
, tmp
, gen_thread_pointer ()));
1302 case TLS_MODEL_LOCAL_EXEC
:
1303 if (!register_operand (op0
, Pmode
))
1304 op0
= gen_reg_rtx (Pmode
);
1310 emit_insn (gen_load_tprel (op0
, op1
));
1311 emit_insn (gen_adddi3 (op0
, op0
, gen_thread_pointer ()));
1314 emit_insn (gen_add_tprel (op0
, op1
, gen_thread_pointer ()));
1322 op0
= expand_simple_binop (Pmode
, PLUS
, op0
, GEN_INT (addend
),
1323 orig_op0
, 1, OPTAB_DIRECT
);
1324 if (orig_op0
== op0
)
1326 if (GET_MODE (orig_op0
) == Pmode
)
1328 return gen_lowpart (GET_MODE (orig_op0
), op0
);
1332 ia64_expand_move (rtx op0
, rtx op1
)
1334 machine_mode mode
= GET_MODE (op0
);
1336 if (!lra_in_progress
&& !reload_completed
&& !ia64_move_ok (op0
, op1
))
1337 op1
= force_reg (mode
, op1
);
1339 if ((mode
== Pmode
|| mode
== ptr_mode
) && symbolic_operand (op1
, VOIDmode
))
1341 HOST_WIDE_INT addend
= 0;
1342 enum tls_model tls_kind
;
1345 if (GET_CODE (op1
) == CONST
1346 && GET_CODE (XEXP (op1
, 0)) == PLUS
1347 && GET_CODE (XEXP (XEXP (op1
, 0), 1)) == CONST_INT
)
1349 addend
= INTVAL (XEXP (XEXP (op1
, 0), 1));
1350 sym
= XEXP (XEXP (op1
, 0), 0);
1353 tls_kind
= tls_symbolic_operand_type (sym
);
1355 return ia64_expand_tls_address (tls_kind
, op0
, sym
, op1
, addend
);
1357 if (any_offset_symbol_operand (sym
, mode
))
1359 else if (aligned_offset_symbol_operand (sym
, mode
))
1361 HOST_WIDE_INT addend_lo
, addend_hi
;
1363 addend_lo
= ((addend
& 0x3fff) ^ 0x2000) - 0x2000;
1364 addend_hi
= addend
- addend_lo
;
1368 op1
= plus_constant (mode
, sym
, addend_hi
);
1377 if (reload_completed
)
1379 /* We really should have taken care of this offset earlier. */
1380 gcc_assert (addend
== 0);
1381 if (ia64_expand_load_address (op0
, op1
))
1387 rtx subtarget
= !can_create_pseudo_p () ? op0
: gen_reg_rtx (mode
);
1389 emit_insn (gen_rtx_SET (subtarget
, op1
));
1391 op1
= expand_simple_binop (mode
, PLUS
, subtarget
,
1392 GEN_INT (addend
), op0
, 1, OPTAB_DIRECT
);
1401 /* Split a move from OP1 to OP0 conditional on COND. */
1404 ia64_emit_cond_move (rtx op0
, rtx op1
, rtx cond
)
1406 rtx_insn
*insn
, *first
= get_last_insn ();
1408 emit_move_insn (op0
, op1
);
1410 for (insn
= get_last_insn (); insn
!= first
; insn
= PREV_INSN (insn
))
1412 PATTERN (insn
) = gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
),
1416 /* Split a post-reload TImode or TFmode reference into two DImode
1417 components. This is made extra difficult by the fact that we do
1418 not get any scratch registers to work with, because reload cannot
1419 be prevented from giving us a scratch that overlaps the register
1420 pair involved. So instead, when addressing memory, we tweak the
1421 pointer register up and back down with POST_INCs. Or up and not
1422 back down when we can get away with it.
1424 REVERSED is true when the loads must be done in reversed order
1425 (high word first) for correctness. DEAD is true when the pointer
1426 dies with the second insn we generate and therefore the second
1427 address must not carry a postmodify.
1429 May return an insn which is to be emitted after the moves. */
1432 ia64_split_tmode (rtx out
[2], rtx in
, bool reversed
, bool dead
)
1436 switch (GET_CODE (in
))
1439 out
[reversed
] = gen_rtx_REG (DImode
, REGNO (in
));
1440 out
[!reversed
] = gen_rtx_REG (DImode
, REGNO (in
) + 1);
1445 /* Cannot occur reversed. */
1446 gcc_assert (!reversed
);
1448 if (GET_MODE (in
) != TFmode
)
1449 split_double (in
, &out
[0], &out
[1]);
1451 /* split_double does not understand how to split a TFmode
1452 quantity into a pair of DImode constants. */
1454 unsigned HOST_WIDE_INT p
[2];
1455 long l
[4]; /* TFmode is 128 bits */
1457 real_to_target (l
, CONST_DOUBLE_REAL_VALUE (in
), TFmode
);
1459 if (FLOAT_WORDS_BIG_ENDIAN
)
1461 p
[0] = (((unsigned HOST_WIDE_INT
) l
[0]) << 32) + l
[1];
1462 p
[1] = (((unsigned HOST_WIDE_INT
) l
[2]) << 32) + l
[3];
1466 p
[0] = (((unsigned HOST_WIDE_INT
) l
[1]) << 32) + l
[0];
1467 p
[1] = (((unsigned HOST_WIDE_INT
) l
[3]) << 32) + l
[2];
1469 out
[0] = GEN_INT (p
[0]);
1470 out
[1] = GEN_INT (p
[1]);
1476 rtx base
= XEXP (in
, 0);
1479 switch (GET_CODE (base
))
1484 out
[0] = adjust_automodify_address
1485 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1486 out
[1] = adjust_automodify_address
1487 (in
, DImode
, dead
? 0 : gen_rtx_POST_DEC (Pmode
, base
), 8);
1491 /* Reversal requires a pre-increment, which can only
1492 be done as a separate insn. */
1493 emit_insn (gen_adddi3 (base
, base
, GEN_INT (8)));
1494 out
[0] = adjust_automodify_address
1495 (in
, DImode
, gen_rtx_POST_DEC (Pmode
, base
), 8);
1496 out
[1] = adjust_address (in
, DImode
, 0);
1501 gcc_assert (!reversed
&& !dead
);
1503 /* Just do the increment in two steps. */
1504 out
[0] = adjust_automodify_address (in
, DImode
, 0, 0);
1505 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1509 gcc_assert (!reversed
&& !dead
);
1511 /* Add 8, subtract 24. */
1512 base
= XEXP (base
, 0);
1513 out
[0] = adjust_automodify_address
1514 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1515 out
[1] = adjust_automodify_address
1517 gen_rtx_POST_MODIFY (Pmode
, base
,
1518 plus_constant (Pmode
, base
, -24)),
1523 gcc_assert (!reversed
&& !dead
);
1525 /* Extract and adjust the modification. This case is
1526 trickier than the others, because we might have an
1527 index register, or we might have a combined offset that
1528 doesn't fit a signed 9-bit displacement field. We can
1529 assume the incoming expression is already legitimate. */
1530 offset
= XEXP (base
, 1);
1531 base
= XEXP (base
, 0);
1533 out
[0] = adjust_automodify_address
1534 (in
, DImode
, gen_rtx_POST_INC (Pmode
, base
), 0);
1536 if (GET_CODE (XEXP (offset
, 1)) == REG
)
1538 /* Can't adjust the postmodify to match. Emit the
1539 original, then a separate addition insn. */
1540 out
[1] = adjust_automodify_address (in
, DImode
, 0, 8);
1541 fixup
= gen_adddi3 (base
, base
, GEN_INT (-8));
1545 gcc_assert (GET_CODE (XEXP (offset
, 1)) == CONST_INT
);
1546 if (INTVAL (XEXP (offset
, 1)) < -256 + 8)
1548 /* Again the postmodify cannot be made to match,
1549 but in this case it's more efficient to get rid
1550 of the postmodify entirely and fix up with an
1552 out
[1] = adjust_automodify_address (in
, DImode
, base
, 8);
1554 (base
, base
, GEN_INT (INTVAL (XEXP (offset
, 1)) - 8));
1558 /* Combined offset still fits in the displacement field.
1559 (We cannot overflow it at the high end.) */
1560 out
[1] = adjust_automodify_address
1561 (in
, DImode
, gen_rtx_POST_MODIFY
1562 (Pmode
, base
, gen_rtx_PLUS
1564 GEN_INT (INTVAL (XEXP (offset
, 1)) - 8))),
1583 /* Split a TImode or TFmode move instruction after reload.
1584 This is used by *movtf_internal and *movti_internal. */
1586 ia64_split_tmode_move (rtx operands
[])
1588 rtx in
[2], out
[2], insn
;
1591 bool reversed
= false;
1593 /* It is possible for reload to decide to overwrite a pointer with
1594 the value it points to. In that case we have to do the loads in
1595 the appropriate order so that the pointer is not destroyed too
1596 early. Also we must not generate a postmodify for that second
1597 load, or rws_access_regno will die. And we must not generate a
1598 postmodify for the second load if the destination register
1599 overlaps with the base register. */
1600 if (GET_CODE (operands
[1]) == MEM
1601 && reg_overlap_mentioned_p (operands
[0], operands
[1]))
1603 rtx base
= XEXP (operands
[1], 0);
1604 while (GET_CODE (base
) != REG
)
1605 base
= XEXP (base
, 0);
1607 if (REGNO (base
) == REGNO (operands
[0]))
1610 if (refers_to_regno_p (REGNO (operands
[0]),
1611 REGNO (operands
[0])+2,
1615 /* Another reason to do the moves in reversed order is if the first
1616 element of the target register pair is also the second element of
1617 the source register pair. */
1618 if (GET_CODE (operands
[0]) == REG
&& GET_CODE (operands
[1]) == REG
1619 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
1622 fixup
[0] = ia64_split_tmode (in
, operands
[1], reversed
, dead
);
1623 fixup
[1] = ia64_split_tmode (out
, operands
[0], reversed
, dead
);
1625 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1626 if (GET_CODE (EXP) == MEM \
1627 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1628 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1629 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1630 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1632 insn
= emit_insn (gen_rtx_SET (out
[0], in
[0]));
1633 MAYBE_ADD_REG_INC_NOTE (insn
, in
[0]);
1634 MAYBE_ADD_REG_INC_NOTE (insn
, out
[0]);
1636 insn
= emit_insn (gen_rtx_SET (out
[1], in
[1]));
1637 MAYBE_ADD_REG_INC_NOTE (insn
, in
[1]);
1638 MAYBE_ADD_REG_INC_NOTE (insn
, out
[1]);
1641 emit_insn (fixup
[0]);
1643 emit_insn (fixup
[1]);
1645 #undef MAYBE_ADD_REG_INC_NOTE
1648 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1649 through memory plus an extra GR scratch register. Except that you can
1650 either get the first from TARGET_SECONDARY_MEMORY_NEEDED or the second
1651 from SECONDARY_RELOAD_CLASS, but not both.
1653 We got into problems in the first place by allowing a construct like
1654 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1655 This solution attempts to prevent this situation from occurring. When
1656 we see something like the above, we spill the inner register to memory. */
1659 spill_xfmode_rfmode_operand (rtx in
, int force
, machine_mode mode
)
1661 if (GET_CODE (in
) == SUBREG
1662 && GET_MODE (SUBREG_REG (in
)) == TImode
1663 && GET_CODE (SUBREG_REG (in
)) == REG
)
1665 rtx memt
= assign_stack_temp (TImode
, 16);
1666 emit_move_insn (memt
, SUBREG_REG (in
));
1667 return adjust_address (memt
, mode
, 0);
1669 else if (force
&& GET_CODE (in
) == REG
)
1671 rtx memx
= assign_stack_temp (mode
, 16);
1672 emit_move_insn (memx
, in
);
1679 /* Expand the movxf or movrf pattern (MODE says which) with the given
1680 OPERANDS, returning true if the pattern should then invoke
1684 ia64_expand_movxf_movrf (machine_mode mode
, rtx operands
[])
1686 rtx op0
= operands
[0];
1688 if (GET_CODE (op0
) == SUBREG
)
1689 op0
= SUBREG_REG (op0
);
1691 /* We must support XFmode loads into general registers for stdarg/vararg,
1692 unprototyped calls, and a rare case where a long double is passed as
1693 an argument after a float HFA fills the FP registers. We split them into
1694 DImode loads for convenience. We also need to support XFmode stores
1695 for the last case. This case does not happen for stdarg/vararg routines,
1696 because we do a block store to memory of unnamed arguments. */
1698 if (GET_CODE (op0
) == REG
&& GR_REGNO_P (REGNO (op0
)))
1702 /* We're hoping to transform everything that deals with XFmode
1703 quantities and GR registers early in the compiler. */
1704 gcc_assert (can_create_pseudo_p ());
1706 /* Struct to register can just use TImode instead. */
1707 if ((GET_CODE (operands
[1]) == SUBREG
1708 && GET_MODE (SUBREG_REG (operands
[1])) == TImode
)
1709 || (GET_CODE (operands
[1]) == REG
1710 && GR_REGNO_P (REGNO (operands
[1]))))
1712 rtx op1
= operands
[1];
1714 if (GET_CODE (op1
) == SUBREG
)
1715 op1
= SUBREG_REG (op1
);
1717 op1
= gen_rtx_REG (TImode
, REGNO (op1
));
1719 emit_move_insn (gen_rtx_REG (TImode
, REGNO (op0
)), op1
);
1723 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
1725 /* Don't word-swap when reading in the constant. */
1726 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
)),
1727 operand_subword (operands
[1], WORDS_BIG_ENDIAN
,
1729 emit_move_insn (gen_rtx_REG (DImode
, REGNO (op0
) + 1),
1730 operand_subword (operands
[1], !WORDS_BIG_ENDIAN
,
1735 /* If the quantity is in a register not known to be GR, spill it. */
1736 if (register_operand (operands
[1], mode
))
1737 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 1, mode
);
1739 gcc_assert (GET_CODE (operands
[1]) == MEM
);
1741 /* Don't word-swap when reading in the value. */
1742 out
[0] = gen_rtx_REG (DImode
, REGNO (op0
));
1743 out
[1] = gen_rtx_REG (DImode
, REGNO (op0
) + 1);
1745 emit_move_insn (out
[0], adjust_address (operands
[1], DImode
, 0));
1746 emit_move_insn (out
[1], adjust_address (operands
[1], DImode
, 8));
1750 if (GET_CODE (operands
[1]) == REG
&& GR_REGNO_P (REGNO (operands
[1])))
1752 /* We're hoping to transform everything that deals with XFmode
1753 quantities and GR registers early in the compiler. */
1754 gcc_assert (can_create_pseudo_p ());
1756 /* Op0 can't be a GR_REG here, as that case is handled above.
1757 If op0 is a register, then we spill op1, so that we now have a
1758 MEM operand. This requires creating an XFmode subreg of a TImode reg
1759 to force the spill. */
1760 if (register_operand (operands
[0], mode
))
1762 rtx op1
= gen_rtx_REG (TImode
, REGNO (operands
[1]));
1763 op1
= gen_rtx_SUBREG (mode
, op1
, 0);
1764 operands
[1] = spill_xfmode_rfmode_operand (op1
, 0, mode
);
1771 gcc_assert (GET_CODE (operands
[0]) == MEM
);
1773 /* Don't word-swap when writing out the value. */
1774 in
[0] = gen_rtx_REG (DImode
, REGNO (operands
[1]));
1775 in
[1] = gen_rtx_REG (DImode
, REGNO (operands
[1]) + 1);
1777 emit_move_insn (adjust_address (operands
[0], DImode
, 0), in
[0]);
1778 emit_move_insn (adjust_address (operands
[0], DImode
, 8), in
[1]);
1783 if (!lra_in_progress
&& !reload_completed
)
1785 operands
[1] = spill_xfmode_rfmode_operand (operands
[1], 0, mode
);
1787 if (GET_MODE (op0
) == TImode
&& GET_CODE (op0
) == REG
)
1789 rtx memt
, memx
, in
= operands
[1];
1790 if (CONSTANT_P (in
))
1791 in
= validize_mem (force_const_mem (mode
, in
));
1792 if (GET_CODE (in
) == MEM
)
1793 memt
= adjust_address (in
, TImode
, 0);
1796 memt
= assign_stack_temp (TImode
, 16);
1797 memx
= adjust_address (memt
, mode
, 0);
1798 emit_move_insn (memx
, in
);
1800 emit_move_insn (op0
, memt
);
1804 if (!ia64_move_ok (operands
[0], operands
[1]))
1805 operands
[1] = force_reg (mode
, operands
[1]);
1811 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1812 with the expression that holds the compare result (in VOIDmode). */
1814 static GTY(()) rtx cmptf_libfunc
;
1817 ia64_expand_compare (rtx
*expr
, rtx
*op0
, rtx
*op1
)
1819 enum rtx_code code
= GET_CODE (*expr
);
1822 /* If we have a BImode input, then we already have a compare result, and
1823 do not need to emit another comparison. */
1824 if (GET_MODE (*op0
) == BImode
)
1826 gcc_assert ((code
== NE
|| code
== EQ
) && *op1
== const0_rtx
);
1829 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1830 magic number as its third argument, that indicates what to do.
1831 The return value is an integer to be compared against zero. */
1832 else if (TARGET_HPUX
&& GET_MODE (*op0
) == TFmode
)
1835 QCMP_INV
= 1, /* Raise FP_INVALID on NaNs as a side effect. */
1842 enum rtx_code ncode
;
1845 gcc_assert (cmptf_libfunc
&& GET_MODE (*op1
) == TFmode
);
1848 /* 1 = equal, 0 = not equal. Equality operators do
1849 not raise FP_INVALID when given a NaN operand. */
1850 case EQ
: magic
= QCMP_EQ
; ncode
= NE
; break;
1851 case NE
: magic
= QCMP_EQ
; ncode
= EQ
; break;
1852 /* isunordered() from C99. */
1853 case UNORDERED
: magic
= QCMP_UNORD
; ncode
= NE
; break;
1854 case ORDERED
: magic
= QCMP_UNORD
; ncode
= EQ
; break;
1855 /* Relational operators raise FP_INVALID when given
1857 case LT
: magic
= QCMP_LT
|QCMP_INV
; ncode
= NE
; break;
1858 case LE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1859 case GT
: magic
= QCMP_GT
|QCMP_INV
; ncode
= NE
; break;
1860 case GE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_INV
; ncode
= NE
; break;
1861 /* Unordered relational operators do not raise FP_INVALID
1862 when given a NaN operand. */
1863 case UNLT
: magic
= QCMP_LT
|QCMP_UNORD
; ncode
= NE
; break;
1864 case UNLE
: magic
= QCMP_LT
|QCMP_EQ
|QCMP_UNORD
; ncode
= NE
; break;
1865 case UNGT
: magic
= QCMP_GT
|QCMP_UNORD
; ncode
= NE
; break;
1866 case UNGE
: magic
= QCMP_GT
|QCMP_EQ
|QCMP_UNORD
; ncode
= NE
; break;
1867 /* Not supported. */
1870 default: gcc_unreachable ();
1875 ret
= emit_library_call_value (cmptf_libfunc
, 0, LCT_CONST
, DImode
,
1876 *op0
, TFmode
, *op1
, TFmode
,
1877 GEN_INT (magic
), DImode
);
1878 cmp
= gen_reg_rtx (BImode
);
1879 emit_insn (gen_rtx_SET (cmp
, gen_rtx_fmt_ee (ncode
, BImode
,
1882 rtx_insn
*insns
= get_insns ();
1885 emit_libcall_block (insns
, cmp
, cmp
,
1886 gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
));
1891 cmp
= gen_reg_rtx (BImode
);
1892 emit_insn (gen_rtx_SET (cmp
, gen_rtx_fmt_ee (code
, BImode
, *op0
, *op1
)));
1896 *expr
= gen_rtx_fmt_ee (code
, VOIDmode
, cmp
, const0_rtx
);
1901 /* Generate an integral vector comparison. Return true if the condition has
1902 been reversed, and so the sense of the comparison should be inverted. */
1905 ia64_expand_vecint_compare (enum rtx_code code
, machine_mode mode
,
1906 rtx dest
, rtx op0
, rtx op1
)
1908 bool negate
= false;
1911 /* Canonicalize the comparison to EQ, GT, GTU. */
1922 code
= reverse_condition (code
);
1928 code
= reverse_condition (code
);
1934 code
= swap_condition (code
);
1935 x
= op0
, op0
= op1
, op1
= x
;
1942 /* Unsigned parallel compare is not supported by the hardware. Play some
1943 tricks to turn this into a signed comparison against 0. */
1952 /* Subtract (-(INT MAX) - 1) from both operands to make
1954 mask
= gen_int_mode (0x80000000, SImode
);
1955 mask
= gen_const_vec_duplicate (V2SImode
, mask
);
1956 mask
= force_reg (mode
, mask
);
1957 t1
= gen_reg_rtx (mode
);
1958 emit_insn (gen_subv2si3 (t1
, op0
, mask
));
1959 t2
= gen_reg_rtx (mode
);
1960 emit_insn (gen_subv2si3 (t2
, op1
, mask
));
1969 /* Perform a parallel unsigned saturating subtraction. */
1970 x
= gen_reg_rtx (mode
);
1971 emit_insn (gen_rtx_SET (x
, gen_rtx_US_MINUS (mode
, op0
, op1
)));
1975 op1
= CONST0_RTX (mode
);
1984 x
= gen_rtx_fmt_ee (code
, mode
, op0
, op1
);
1985 emit_insn (gen_rtx_SET (dest
, x
));
1990 /* Emit an integral vector conditional move. */
1993 ia64_expand_vecint_cmov (rtx operands
[])
1995 machine_mode mode
= GET_MODE (operands
[0]);
1996 enum rtx_code code
= GET_CODE (operands
[3]);
2000 cmp
= gen_reg_rtx (mode
);
2001 negate
= ia64_expand_vecint_compare (code
, mode
, cmp
,
2002 operands
[4], operands
[5]);
2004 ot
= operands
[1+negate
];
2005 of
= operands
[2-negate
];
2007 if (ot
== CONST0_RTX (mode
))
2009 if (of
== CONST0_RTX (mode
))
2011 emit_move_insn (operands
[0], ot
);
2015 x
= gen_rtx_NOT (mode
, cmp
);
2016 x
= gen_rtx_AND (mode
, x
, of
);
2017 emit_insn (gen_rtx_SET (operands
[0], x
));
2019 else if (of
== CONST0_RTX (mode
))
2021 x
= gen_rtx_AND (mode
, cmp
, ot
);
2022 emit_insn (gen_rtx_SET (operands
[0], x
));
2028 t
= gen_reg_rtx (mode
);
2029 x
= gen_rtx_AND (mode
, cmp
, operands
[1+negate
]);
2030 emit_insn (gen_rtx_SET (t
, x
));
2032 f
= gen_reg_rtx (mode
);
2033 x
= gen_rtx_NOT (mode
, cmp
);
2034 x
= gen_rtx_AND (mode
, x
, operands
[2-negate
]);
2035 emit_insn (gen_rtx_SET (f
, x
));
2037 x
= gen_rtx_IOR (mode
, t
, f
);
2038 emit_insn (gen_rtx_SET (operands
[0], x
));
2042 /* Emit an integral vector min or max operation. Return true if all done. */
2045 ia64_expand_vecint_minmax (enum rtx_code code
, machine_mode mode
,
2050 /* These four combinations are supported directly. */
2051 if (mode
== V8QImode
&& (code
== UMIN
|| code
== UMAX
))
2053 if (mode
== V4HImode
&& (code
== SMIN
|| code
== SMAX
))
2056 /* This combination can be implemented with only saturating subtraction. */
2057 if (mode
== V4HImode
&& code
== UMAX
)
2059 rtx x
, tmp
= gen_reg_rtx (mode
);
2061 x
= gen_rtx_US_MINUS (mode
, operands
[1], operands
[2]);
2062 emit_insn (gen_rtx_SET (tmp
, x
));
2064 emit_insn (gen_addv4hi3 (operands
[0], tmp
, operands
[2]));
2068 /* Everything else implemented via vector comparisons. */
2069 xops
[0] = operands
[0];
2070 xops
[4] = xops
[1] = operands
[1];
2071 xops
[5] = xops
[2] = operands
[2];
2090 xops
[3] = gen_rtx_fmt_ee (code
, VOIDmode
, operands
[1], operands
[2]);
2092 ia64_expand_vecint_cmov (xops
);
2096 /* The vectors LO and HI each contain N halves of a double-wide vector.
2097 Reassemble either the first N/2 or the second N/2 elements. */
2100 ia64_unpack_assemble (rtx out
, rtx lo
, rtx hi
, bool highp
)
2102 machine_mode vmode
= GET_MODE (lo
);
2103 unsigned int i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
2104 struct expand_vec_perm_d d
;
2107 d
.target
= gen_lowpart (vmode
, out
);
2108 d
.op0
= (TARGET_BIG_ENDIAN
? hi
: lo
);
2109 d
.op1
= (TARGET_BIG_ENDIAN
? lo
: hi
);
2112 d
.one_operand_p
= false;
2113 d
.testing_p
= false;
2115 high
= (highp
? nelt
/ 2 : 0);
2116 for (i
= 0; i
< nelt
/ 2; ++i
)
2118 d
.perm
[i
* 2] = i
+ high
;
2119 d
.perm
[i
* 2 + 1] = i
+ high
+ nelt
;
2122 ok
= ia64_expand_vec_perm_const_1 (&d
);
2126 /* Return a vector of the sign-extension of VEC. */
2129 ia64_unpack_sign (rtx vec
, bool unsignedp
)
2131 machine_mode mode
= GET_MODE (vec
);
2132 rtx zero
= CONST0_RTX (mode
);
2138 rtx sign
= gen_reg_rtx (mode
);
2141 neg
= ia64_expand_vecint_compare (LT
, mode
, sign
, vec
, zero
);
2148 /* Emit an integral vector unpack operation. */
2151 ia64_expand_unpack (rtx operands
[3], bool unsignedp
, bool highp
)
2153 rtx sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2154 ia64_unpack_assemble (operands
[0], operands
[1], sign
, highp
);
2157 /* Emit an integral vector widening sum operations. */
2160 ia64_expand_widen_sum (rtx operands
[3], bool unsignedp
)
2165 sign
= ia64_unpack_sign (operands
[1], unsignedp
);
2167 wmode
= GET_MODE (operands
[0]);
2168 l
= gen_reg_rtx (wmode
);
2169 h
= gen_reg_rtx (wmode
);
2171 ia64_unpack_assemble (l
, operands
[1], sign
, false);
2172 ia64_unpack_assemble (h
, operands
[1], sign
, true);
2174 t
= expand_binop (wmode
, add_optab
, l
, operands
[2], NULL
, 0, OPTAB_DIRECT
);
2175 t
= expand_binop (wmode
, add_optab
, h
, t
, operands
[0], 0, OPTAB_DIRECT
);
2176 if (t
!= operands
[0])
2177 emit_move_insn (operands
[0], t
);
2180 /* Emit the appropriate sequence for a call. */
2183 ia64_expand_call (rtx retval
, rtx addr
, rtx nextarg ATTRIBUTE_UNUSED
,
2188 addr
= XEXP (addr
, 0);
2189 addr
= convert_memory_address (DImode
, addr
);
2190 b0
= gen_rtx_REG (DImode
, R_BR (0));
2192 /* ??? Should do this for functions known to bind local too. */
2193 if (TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
2196 insn
= gen_sibcall_nogp (addr
);
2198 insn
= gen_call_nogp (addr
, b0
);
2200 insn
= gen_call_value_nogp (retval
, addr
, b0
);
2201 insn
= emit_call_insn (insn
);
2206 insn
= gen_sibcall_gp (addr
);
2208 insn
= gen_call_gp (addr
, b0
);
2210 insn
= gen_call_value_gp (retval
, addr
, b0
);
2211 insn
= emit_call_insn (insn
);
2213 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), pic_offset_table_rtx
);
2217 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), b0
);
2219 if (TARGET_ABI_OPEN_VMS
)
2220 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
2221 gen_rtx_REG (DImode
, GR_REG (25)));
2225 reg_emitted (enum ia64_frame_regs r
)
2227 if (emitted_frame_related_regs
[r
] == 0)
2228 emitted_frame_related_regs
[r
] = current_frame_info
.r
[r
];
2230 gcc_assert (emitted_frame_related_regs
[r
] == current_frame_info
.r
[r
]);
2234 get_reg (enum ia64_frame_regs r
)
2237 return current_frame_info
.r
[r
];
2241 is_emitted (int regno
)
2245 for (r
= reg_fp
; r
< number_of_ia64_frame_regs
; r
++)
2246 if (emitted_frame_related_regs
[r
] == regno
)
2252 ia64_reload_gp (void)
2256 if (current_frame_info
.r
[reg_save_gp
])
2258 tmp
= gen_rtx_REG (DImode
, get_reg (reg_save_gp
));
2262 HOST_WIDE_INT offset
;
2265 offset
= (current_frame_info
.spill_cfa_off
2266 + current_frame_info
.spill_size
);
2267 if (frame_pointer_needed
)
2269 tmp
= hard_frame_pointer_rtx
;
2274 tmp
= stack_pointer_rtx
;
2275 offset
= current_frame_info
.total_size
- offset
;
2278 offset_r
= GEN_INT (offset
);
2279 if (satisfies_constraint_I (offset_r
))
2280 emit_insn (gen_adddi3 (pic_offset_table_rtx
, tmp
, offset_r
));
2283 emit_move_insn (pic_offset_table_rtx
, offset_r
);
2284 emit_insn (gen_adddi3 (pic_offset_table_rtx
,
2285 pic_offset_table_rtx
, tmp
));
2288 tmp
= gen_rtx_MEM (DImode
, pic_offset_table_rtx
);
2291 emit_move_insn (pic_offset_table_rtx
, tmp
);
2295 ia64_split_call (rtx retval
, rtx addr
, rtx retaddr
, rtx scratch_r
,
2296 rtx scratch_b
, int noreturn_p
, int sibcall_p
)
2299 bool is_desc
= false;
2301 /* If we find we're calling through a register, then we're actually
2302 calling through a descriptor, so load up the values. */
2303 if (REG_P (addr
) && GR_REGNO_P (REGNO (addr
)))
2308 /* ??? We are currently constrained to *not* use peep2, because
2309 we can legitimately change the global lifetime of the GP
2310 (in the form of killing where previously live). This is
2311 because a call through a descriptor doesn't use the previous
2312 value of the GP, while a direct call does, and we do not
2313 commit to either form until the split here.
2315 That said, this means that we lack precise life info for
2316 whether ADDR is dead after this call. This is not terribly
2317 important, since we can fix things up essentially for free
2318 with the POST_DEC below, but it's nice to not use it when we
2319 can immediately tell it's not necessary. */
2320 addr_dead_p
= ((noreturn_p
|| sibcall_p
2321 || TEST_HARD_REG_BIT (regs_invalidated_by_call
,
2323 && !FUNCTION_ARG_REGNO_P (REGNO (addr
)));
2325 /* Load the code address into scratch_b. */
2326 tmp
= gen_rtx_POST_INC (Pmode
, addr
);
2327 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2328 emit_move_insn (scratch_r
, tmp
);
2329 emit_move_insn (scratch_b
, scratch_r
);
2331 /* Load the GP address. If ADDR is not dead here, then we must
2332 revert the change made above via the POST_INCREMENT. */
2334 tmp
= gen_rtx_POST_DEC (Pmode
, addr
);
2337 tmp
= gen_rtx_MEM (Pmode
, tmp
);
2338 emit_move_insn (pic_offset_table_rtx
, tmp
);
2345 insn
= gen_sibcall_nogp (addr
);
2347 insn
= gen_call_value_nogp (retval
, addr
, retaddr
);
2349 insn
= gen_call_nogp (addr
, retaddr
);
2350 emit_call_insn (insn
);
2352 if ((!TARGET_CONST_GP
|| is_desc
) && !noreturn_p
&& !sibcall_p
)
2356 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2358 This differs from the generic code in that we know about the zero-extending
2359 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2360 also know that ld.acq+cmpxchg.rel equals a full barrier.
2362 The loop we want to generate looks like
2367 new_reg = cmp_reg op val;
2368 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2369 if (cmp_reg != old_reg)
2372 Note that we only do the plain load from memory once. Subsequent
2373 iterations use the value loaded by the compare-and-swap pattern. */
2376 ia64_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
2377 rtx old_dst
, rtx new_dst
, enum memmodel model
)
2379 machine_mode mode
= GET_MODE (mem
);
2380 rtx old_reg
, new_reg
, cmp_reg
, ar_ccv
, label
;
2381 enum insn_code icode
;
2383 /* Special case for using fetchadd. */
2384 if ((mode
== SImode
|| mode
== DImode
)
2385 && (code
== PLUS
|| code
== MINUS
)
2386 && fetchadd_operand (val
, mode
))
2389 val
= GEN_INT (-INTVAL (val
));
2392 old_dst
= gen_reg_rtx (mode
);
2396 case MEMMODEL_ACQ_REL
:
2397 case MEMMODEL_SEQ_CST
:
2398 case MEMMODEL_SYNC_SEQ_CST
:
2399 emit_insn (gen_memory_barrier ());
2401 case MEMMODEL_RELAXED
:
2402 case MEMMODEL_ACQUIRE
:
2403 case MEMMODEL_SYNC_ACQUIRE
:
2404 case MEMMODEL_CONSUME
:
2406 icode
= CODE_FOR_fetchadd_acq_si
;
2408 icode
= CODE_FOR_fetchadd_acq_di
;
2410 case MEMMODEL_RELEASE
:
2411 case MEMMODEL_SYNC_RELEASE
:
2413 icode
= CODE_FOR_fetchadd_rel_si
;
2415 icode
= CODE_FOR_fetchadd_rel_di
;
2422 emit_insn (GEN_FCN (icode
) (old_dst
, mem
, val
));
2426 new_reg
= expand_simple_binop (mode
, PLUS
, old_dst
, val
, new_dst
,
2428 if (new_reg
!= new_dst
)
2429 emit_move_insn (new_dst
, new_reg
);
2434 /* Because of the volatile mem read, we get an ld.acq, which is the
2435 front half of the full barrier. The end half is the cmpxchg.rel.
2436 For relaxed and release memory models, we don't need this. But we
2437 also don't bother trying to prevent it either. */
2438 gcc_assert (is_mm_relaxed (model
) || is_mm_release (model
)
2439 || MEM_VOLATILE_P (mem
));
2441 old_reg
= gen_reg_rtx (DImode
);
2442 cmp_reg
= gen_reg_rtx (DImode
);
2443 label
= gen_label_rtx ();
2447 val
= simplify_gen_subreg (DImode
, val
, mode
, 0);
2448 emit_insn (gen_extend_insn (cmp_reg
, mem
, DImode
, mode
, 1));
2451 emit_move_insn (cmp_reg
, mem
);
2455 ar_ccv
= gen_rtx_REG (DImode
, AR_CCV_REGNUM
);
2456 emit_move_insn (old_reg
, cmp_reg
);
2457 emit_move_insn (ar_ccv
, cmp_reg
);
2460 emit_move_insn (old_dst
, gen_lowpart (mode
, cmp_reg
));
2465 new_reg
= expand_simple_binop (DImode
, AND
, new_reg
, val
, NULL_RTX
,
2466 true, OPTAB_DIRECT
);
2467 new_reg
= expand_simple_unop (DImode
, code
, new_reg
, NULL_RTX
, true);
2470 new_reg
= expand_simple_binop (DImode
, code
, new_reg
, val
, NULL_RTX
,
2471 true, OPTAB_DIRECT
);
2474 new_reg
= gen_lowpart (mode
, new_reg
);
2476 emit_move_insn (new_dst
, new_reg
);
2480 case MEMMODEL_RELAXED
:
2481 case MEMMODEL_ACQUIRE
:
2482 case MEMMODEL_SYNC_ACQUIRE
:
2483 case MEMMODEL_CONSUME
:
2486 case E_QImode
: icode
= CODE_FOR_cmpxchg_acq_qi
; break;
2487 case E_HImode
: icode
= CODE_FOR_cmpxchg_acq_hi
; break;
2488 case E_SImode
: icode
= CODE_FOR_cmpxchg_acq_si
; break;
2489 case E_DImode
: icode
= CODE_FOR_cmpxchg_acq_di
; break;
2495 case MEMMODEL_RELEASE
:
2496 case MEMMODEL_SYNC_RELEASE
:
2497 case MEMMODEL_ACQ_REL
:
2498 case MEMMODEL_SEQ_CST
:
2499 case MEMMODEL_SYNC_SEQ_CST
:
2502 case E_QImode
: icode
= CODE_FOR_cmpxchg_rel_qi
; break;
2503 case E_HImode
: icode
= CODE_FOR_cmpxchg_rel_hi
; break;
2504 case E_SImode
: icode
= CODE_FOR_cmpxchg_rel_si
; break;
2505 case E_DImode
: icode
= CODE_FOR_cmpxchg_rel_di
; break;
2515 emit_insn (GEN_FCN (icode
) (cmp_reg
, mem
, ar_ccv
, new_reg
));
2517 emit_cmp_and_jump_insns (cmp_reg
, old_reg
, NE
, NULL
, DImode
, true, label
);
2520 /* Begin the assembly file. */
2523 ia64_file_start (void)
2525 default_file_start ();
2526 emit_safe_across_calls ();
2530 emit_safe_across_calls (void)
2532 unsigned int rs
, re
;
2539 while (rs
< 64 && call_used_or_fixed_reg_p (PR_REG (rs
)))
2544 re
< 64 && ! call_used_or_fixed_reg_p (PR_REG (re
)); re
++)
2548 fputs ("\t.pred.safe_across_calls ", asm_out_file
);
2552 fputc (',', asm_out_file
);
2554 fprintf (asm_out_file
, "p%u", rs
);
2556 fprintf (asm_out_file
, "p%u-p%u", rs
, re
- 1);
2560 fputc ('\n', asm_out_file
);
2563 /* Globalize a declaration. */
2566 ia64_globalize_decl_name (FILE * stream
, tree decl
)
2568 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
2569 tree version_attr
= lookup_attribute ("version_id", DECL_ATTRIBUTES (decl
));
2572 tree v
= TREE_VALUE (TREE_VALUE (version_attr
));
2573 const char *p
= TREE_STRING_POINTER (v
);
2574 fprintf (stream
, "\t.alias %s#, \"%s{%s}\"\n", name
, name
, p
);
2576 targetm
.asm_out
.globalize_label (stream
, name
);
2577 if (TREE_CODE (decl
) == FUNCTION_DECL
)
2578 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "function");
2581 /* Helper function for ia64_compute_frame_size: find an appropriate general
2582 register to spill some special register to. SPECIAL_SPILL_MASK contains
2583 bits in GR0 to GR31 that have already been allocated by this routine.
2584 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2587 find_gr_spill (enum ia64_frame_regs r
, int try_locals
)
2591 if (emitted_frame_related_regs
[r
] != 0)
2593 regno
= emitted_frame_related_regs
[r
];
2594 if (regno
>= LOC_REG (0) && regno
< LOC_REG (80 - frame_pointer_needed
)
2595 && current_frame_info
.n_local_regs
< regno
- LOC_REG (0) + 1)
2596 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2597 else if (crtl
->is_leaf
2598 && regno
>= GR_REG (1) && regno
<= GR_REG (31))
2599 current_frame_info
.gr_used_mask
|= 1 << regno
;
2604 /* If this is a leaf function, first try an otherwise unused
2605 call-clobbered register. */
2608 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2609 if (! df_regs_ever_live_p (regno
)
2610 && call_used_or_fixed_reg_p (regno
)
2611 && ! fixed_regs
[regno
]
2612 && ! global_regs
[regno
]
2613 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0
2614 && ! is_emitted (regno
))
2616 current_frame_info
.gr_used_mask
|= 1 << regno
;
2623 regno
= current_frame_info
.n_local_regs
;
2624 /* If there is a frame pointer, then we can't use loc79, because
2625 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2626 reg_name switching code in ia64_expand_prologue. */
2627 while (regno
< (80 - frame_pointer_needed
))
2628 if (! is_emitted (LOC_REG (regno
++)))
2630 current_frame_info
.n_local_regs
= regno
;
2631 return LOC_REG (regno
- 1);
2635 /* Failed to find a general register to spill to. Must use stack. */
2639 /* In order to make for nice schedules, we try to allocate every temporary
2640 to a different register. We must of course stay away from call-saved,
2641 fixed, and global registers. We must also stay away from registers
2642 allocated in current_frame_info.gr_used_mask, since those include regs
2643 used all through the prologue.
2645 Any register allocated here must be used immediately. The idea is to
2646 aid scheduling, not to solve data flow problems. */
2648 static int last_scratch_gr_reg
;
2651 next_scratch_gr_reg (void)
2655 for (i
= 0; i
< 32; ++i
)
2657 regno
= (last_scratch_gr_reg
+ i
+ 1) & 31;
2658 if (call_used_or_fixed_reg_p (regno
)
2659 && ! fixed_regs
[regno
]
2660 && ! global_regs
[regno
]
2661 && ((current_frame_info
.gr_used_mask
>> regno
) & 1) == 0)
2663 last_scratch_gr_reg
= regno
;
2668 /* There must be _something_ available. */
2672 /* Helper function for ia64_compute_frame_size, called through
2673 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2676 mark_reg_gr_used_mask (rtx reg
, void *data ATTRIBUTE_UNUSED
)
2678 unsigned int regno
= REGNO (reg
);
2681 unsigned int i
, n
= REG_NREGS (reg
);
2682 for (i
= 0; i
< n
; ++i
)
2683 current_frame_info
.gr_used_mask
|= 1 << (regno
+ i
);
2688 /* Returns the number of bytes offset between the frame pointer and the stack
2689 pointer for the current function. SIZE is the number of bytes of space
2690 needed for local variables. */
2693 ia64_compute_frame_size (HOST_WIDE_INT size
)
2695 HOST_WIDE_INT total_size
;
2696 HOST_WIDE_INT spill_size
= 0;
2697 HOST_WIDE_INT extra_spill_size
= 0;
2698 HOST_WIDE_INT pretend_args_size
;
2701 int spilled_gr_p
= 0;
2702 int spilled_fr_p
= 0;
2708 if (current_frame_info
.initialized
)
2711 memset (¤t_frame_info
, 0, sizeof current_frame_info
);
2712 CLEAR_HARD_REG_SET (mask
);
2714 /* Don't allocate scratches to the return register. */
2715 diddle_return_value (mark_reg_gr_used_mask
, NULL
);
2717 /* Don't allocate scratches to the EH scratch registers. */
2718 if (cfun
->machine
->ia64_eh_epilogue_sp
)
2719 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_sp
, NULL
);
2720 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
2721 mark_reg_gr_used_mask (cfun
->machine
->ia64_eh_epilogue_bsp
, NULL
);
2723 /* Static stack checking uses r2 and r3. */
2724 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
2725 || flag_stack_clash_protection
)
2726 current_frame_info
.gr_used_mask
|= 0xc;
2728 /* Find the size of the register stack frame. We have only 80 local
2729 registers, because we reserve 8 for the inputs and 8 for the
2732 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2733 since we'll be adjusting that down later. */
2734 regno
= LOC_REG (78) + ! frame_pointer_needed
;
2735 for (; regno
>= LOC_REG (0); regno
--)
2736 if (df_regs_ever_live_p (regno
) && !is_emitted (regno
))
2738 current_frame_info
.n_local_regs
= regno
- LOC_REG (0) + 1;
2740 /* For functions marked with the syscall_linkage attribute, we must mark
2741 all eight input registers as in use, so that locals aren't visible to
2744 if (cfun
->machine
->n_varargs
> 0
2745 || lookup_attribute ("syscall_linkage",
2746 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
2747 current_frame_info
.n_input_regs
= 8;
2750 for (regno
= IN_REG (7); regno
>= IN_REG (0); regno
--)
2751 if (df_regs_ever_live_p (regno
))
2753 current_frame_info
.n_input_regs
= regno
- IN_REG (0) + 1;
2756 for (regno
= OUT_REG (7); regno
>= OUT_REG (0); regno
--)
2757 if (df_regs_ever_live_p (regno
))
2759 i
= regno
- OUT_REG (0) + 1;
2761 #ifndef PROFILE_HOOK
2762 /* When -p profiling, we need one output register for the mcount argument.
2763 Likewise for -a profiling for the bb_init_func argument. For -ax
2764 profiling, we need two output registers for the two bb_init_trace_func
2769 current_frame_info
.n_output_regs
= i
;
2771 /* ??? No rotating register support yet. */
2772 current_frame_info
.n_rotate_regs
= 0;
2774 /* Discover which registers need spilling, and how much room that
2775 will take. Begin with floating point and general registers,
2776 which will always wind up on the stack. */
2778 for (regno
= FR_REG (2); regno
<= FR_REG (127); regno
++)
2779 if (df_regs_ever_live_p (regno
) && ! call_used_or_fixed_reg_p (regno
))
2781 SET_HARD_REG_BIT (mask
, regno
);
2787 for (regno
= GR_REG (1); regno
<= GR_REG (31); regno
++)
2788 if (df_regs_ever_live_p (regno
) && ! call_used_or_fixed_reg_p (regno
))
2790 SET_HARD_REG_BIT (mask
, regno
);
2796 for (regno
= BR_REG (1); regno
<= BR_REG (7); regno
++)
2797 if (df_regs_ever_live_p (regno
) && ! call_used_or_fixed_reg_p (regno
))
2799 SET_HARD_REG_BIT (mask
, regno
);
2804 /* Now come all special registers that might get saved in other
2805 general registers. */
2807 if (frame_pointer_needed
)
2809 current_frame_info
.r
[reg_fp
] = find_gr_spill (reg_fp
, 1);
2810 /* If we did not get a register, then we take LOC79. This is guaranteed
2811 to be free, even if regs_ever_live is already set, because this is
2812 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2813 as we don't count loc79 above. */
2814 if (current_frame_info
.r
[reg_fp
] == 0)
2816 current_frame_info
.r
[reg_fp
] = LOC_REG (79);
2817 current_frame_info
.n_local_regs
= LOC_REG (79) - LOC_REG (0) + 1;
2821 if (! crtl
->is_leaf
)
2823 /* Emit a save of BR0 if we call other functions. Do this even
2824 if this function doesn't return, as EH depends on this to be
2825 able to unwind the stack. */
2826 SET_HARD_REG_BIT (mask
, BR_REG (0));
2828 current_frame_info
.r
[reg_save_b0
] = find_gr_spill (reg_save_b0
, 1);
2829 if (current_frame_info
.r
[reg_save_b0
] == 0)
2831 extra_spill_size
+= 8;
2835 /* Similarly for ar.pfs. */
2836 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2837 current_frame_info
.r
[reg_save_ar_pfs
] = find_gr_spill (reg_save_ar_pfs
, 1);
2838 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2840 extra_spill_size
+= 8;
2844 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2845 registers are clobbered, so we fall back to the stack. */
2846 current_frame_info
.r
[reg_save_gp
]
2847 = (cfun
->calls_setjmp
? 0 : find_gr_spill (reg_save_gp
, 1));
2848 if (current_frame_info
.r
[reg_save_gp
] == 0)
2850 SET_HARD_REG_BIT (mask
, GR_REG (1));
2857 if (df_regs_ever_live_p (BR_REG (0))
2858 && ! call_used_or_fixed_reg_p (BR_REG (0)))
2860 SET_HARD_REG_BIT (mask
, BR_REG (0));
2861 extra_spill_size
+= 8;
2865 if (df_regs_ever_live_p (AR_PFS_REGNUM
))
2867 SET_HARD_REG_BIT (mask
, AR_PFS_REGNUM
);
2868 current_frame_info
.r
[reg_save_ar_pfs
]
2869 = find_gr_spill (reg_save_ar_pfs
, 1);
2870 if (current_frame_info
.r
[reg_save_ar_pfs
] == 0)
2872 extra_spill_size
+= 8;
2878 /* Unwind descriptor hackery: things are most efficient if we allocate
2879 consecutive GR save registers for RP, PFS, FP in that order. However,
2880 it is absolutely critical that FP get the only hard register that's
2881 guaranteed to be free, so we allocated it first. If all three did
2882 happen to be allocated hard regs, and are consecutive, rearrange them
2883 into the preferred order now.
2885 If we have already emitted code for any of those registers,
2886 then it's already too late to change. */
2887 min_regno
= MIN (current_frame_info
.r
[reg_fp
],
2888 MIN (current_frame_info
.r
[reg_save_b0
],
2889 current_frame_info
.r
[reg_save_ar_pfs
]));
2890 max_regno
= MAX (current_frame_info
.r
[reg_fp
],
2891 MAX (current_frame_info
.r
[reg_save_b0
],
2892 current_frame_info
.r
[reg_save_ar_pfs
]));
2894 && min_regno
+ 2 == max_regno
2895 && (current_frame_info
.r
[reg_fp
] == min_regno
+ 1
2896 || current_frame_info
.r
[reg_save_b0
] == min_regno
+ 1
2897 || current_frame_info
.r
[reg_save_ar_pfs
] == min_regno
+ 1)
2898 && (emitted_frame_related_regs
[reg_save_b0
] == 0
2899 || emitted_frame_related_regs
[reg_save_b0
] == min_regno
)
2900 && (emitted_frame_related_regs
[reg_save_ar_pfs
] == 0
2901 || emitted_frame_related_regs
[reg_save_ar_pfs
] == min_regno
+ 1)
2902 && (emitted_frame_related_regs
[reg_fp
] == 0
2903 || emitted_frame_related_regs
[reg_fp
] == min_regno
+ 2))
2905 current_frame_info
.r
[reg_save_b0
] = min_regno
;
2906 current_frame_info
.r
[reg_save_ar_pfs
] = min_regno
+ 1;
2907 current_frame_info
.r
[reg_fp
] = min_regno
+ 2;
2910 /* See if we need to store the predicate register block. */
2911 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2912 if (df_regs_ever_live_p (regno
) && ! call_used_or_fixed_reg_p (regno
))
2914 if (regno
<= PR_REG (63))
2916 SET_HARD_REG_BIT (mask
, PR_REG (0));
2917 current_frame_info
.r
[reg_save_pr
] = find_gr_spill (reg_save_pr
, 1);
2918 if (current_frame_info
.r
[reg_save_pr
] == 0)
2920 extra_spill_size
+= 8;
2924 /* ??? Mark them all as used so that register renaming and such
2925 are free to use them. */
2926 for (regno
= PR_REG (0); regno
<= PR_REG (63); regno
++)
2927 df_set_regs_ever_live (regno
, true);
2930 /* If we're forced to use st8.spill, we're forced to save and restore
2931 ar.unat as well. The check for existing liveness allows inline asm
2932 to touch ar.unat. */
2933 if (spilled_gr_p
|| cfun
->machine
->n_varargs
2934 || df_regs_ever_live_p (AR_UNAT_REGNUM
))
2936 df_set_regs_ever_live (AR_UNAT_REGNUM
, true);
2937 SET_HARD_REG_BIT (mask
, AR_UNAT_REGNUM
);
2938 current_frame_info
.r
[reg_save_ar_unat
]
2939 = find_gr_spill (reg_save_ar_unat
, spill_size
== 0);
2940 if (current_frame_info
.r
[reg_save_ar_unat
] == 0)
2942 extra_spill_size
+= 8;
2947 if (df_regs_ever_live_p (AR_LC_REGNUM
))
2949 SET_HARD_REG_BIT (mask
, AR_LC_REGNUM
);
2950 current_frame_info
.r
[reg_save_ar_lc
]
2951 = find_gr_spill (reg_save_ar_lc
, spill_size
== 0);
2952 if (current_frame_info
.r
[reg_save_ar_lc
] == 0)
2954 extra_spill_size
+= 8;
2959 /* If we have an odd number of words of pretend arguments written to
2960 the stack, then the FR save area will be unaligned. We round the
2961 size of this area up to keep things 16 byte aligned. */
2963 pretend_args_size
= IA64_STACK_ALIGN (crtl
->args
.pretend_args_size
);
2965 pretend_args_size
= crtl
->args
.pretend_args_size
;
2967 total_size
= (spill_size
+ extra_spill_size
+ size
+ pretend_args_size
2968 + crtl
->outgoing_args_size
);
2969 total_size
= IA64_STACK_ALIGN (total_size
);
2971 /* We always use the 16-byte scratch area provided by the caller, but
2972 if we are a leaf function, there's no one to which we need to provide
2973 a scratch area. However, if the function allocates dynamic stack space,
2974 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2975 so we need to cope. */
2976 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
2977 total_size
= MAX (0, total_size
- 16);
2979 current_frame_info
.total_size
= total_size
;
2980 current_frame_info
.spill_cfa_off
= pretend_args_size
- 16;
2981 current_frame_info
.spill_size
= spill_size
;
2982 current_frame_info
.extra_spill_size
= extra_spill_size
;
2983 current_frame_info
.mask
= mask
;
2984 current_frame_info
.n_spilled
= n_spilled
;
2985 current_frame_info
.initialized
= reload_completed
;
2988 /* Worker function for TARGET_CAN_ELIMINATE. */
2991 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED
, const int to
)
2993 return (to
== BR_REG (0) ? crtl
->is_leaf
: true);
2996 /* Compute the initial difference between the specified pair of registers. */
2999 ia64_initial_elimination_offset (int from
, int to
)
3001 HOST_WIDE_INT offset
;
3003 ia64_compute_frame_size (get_frame_size ());
3006 case FRAME_POINTER_REGNUM
:
3009 case HARD_FRAME_POINTER_REGNUM
:
3010 offset
= -current_frame_info
.total_size
;
3011 if (!crtl
->is_leaf
|| cfun
->calls_alloca
)
3012 offset
+= 16 + crtl
->outgoing_args_size
;
3015 case STACK_POINTER_REGNUM
:
3017 if (!crtl
->is_leaf
|| cfun
->calls_alloca
)
3018 offset
+= 16 + crtl
->outgoing_args_size
;
3026 case ARG_POINTER_REGNUM
:
3027 /* Arguments start above the 16 byte save area, unless stdarg
3028 in which case we store through the 16 byte save area. */
3031 case HARD_FRAME_POINTER_REGNUM
:
3032 offset
= 16 - crtl
->args
.pretend_args_size
;
3035 case STACK_POINTER_REGNUM
:
3036 offset
= (current_frame_info
.total_size
3037 + 16 - crtl
->args
.pretend_args_size
);
3052 /* If there are more than a trivial number of register spills, we use
3053 two interleaved iterators so that we can get two memory references
3056 In order to simplify things in the prologue and epilogue expanders,
3057 we use helper functions to fix up the memory references after the
3058 fact with the appropriate offsets to a POST_MODIFY memory mode.
3059 The following data structure tracks the state of the two iterators
3060 while insns are being emitted. */
3062 struct spill_fill_data
3064 rtx_insn
*init_after
; /* point at which to emit initializations */
3065 rtx init_reg
[2]; /* initial base register */
3066 rtx iter_reg
[2]; /* the iterator registers */
3067 rtx
*prev_addr
[2]; /* address of last memory use */
3068 rtx_insn
*prev_insn
[2]; /* the insn corresponding to prev_addr */
3069 HOST_WIDE_INT prev_off
[2]; /* last offset */
3070 int n_iter
; /* number of iterators in use */
3071 int next_iter
; /* next iterator to use */
3072 unsigned int save_gr_used_mask
;
3075 static struct spill_fill_data spill_fill_data
;
3078 setup_spill_pointers (int n_spills
, rtx init_reg
, HOST_WIDE_INT cfa_off
)
3082 spill_fill_data
.init_after
= get_last_insn ();
3083 spill_fill_data
.init_reg
[0] = init_reg
;
3084 spill_fill_data
.init_reg
[1] = init_reg
;
3085 spill_fill_data
.prev_addr
[0] = NULL
;
3086 spill_fill_data
.prev_addr
[1] = NULL
;
3087 spill_fill_data
.prev_insn
[0] = NULL
;
3088 spill_fill_data
.prev_insn
[1] = NULL
;
3089 spill_fill_data
.prev_off
[0] = cfa_off
;
3090 spill_fill_data
.prev_off
[1] = cfa_off
;
3091 spill_fill_data
.next_iter
= 0;
3092 spill_fill_data
.save_gr_used_mask
= current_frame_info
.gr_used_mask
;
3094 spill_fill_data
.n_iter
= 1 + (n_spills
> 2);
3095 for (i
= 0; i
< spill_fill_data
.n_iter
; ++i
)
3097 int regno
= next_scratch_gr_reg ();
3098 spill_fill_data
.iter_reg
[i
] = gen_rtx_REG (DImode
, regno
);
3099 current_frame_info
.gr_used_mask
|= 1 << regno
;
3104 finish_spill_pointers (void)
3106 current_frame_info
.gr_used_mask
= spill_fill_data
.save_gr_used_mask
;
3110 spill_restore_mem (rtx reg
, HOST_WIDE_INT cfa_off
)
3112 int iter
= spill_fill_data
.next_iter
;
3113 HOST_WIDE_INT disp
= spill_fill_data
.prev_off
[iter
] - cfa_off
;
3114 rtx disp_rtx
= GEN_INT (disp
);
3117 if (spill_fill_data
.prev_addr
[iter
])
3119 if (satisfies_constraint_N (disp_rtx
))
3121 *spill_fill_data
.prev_addr
[iter
]
3122 = gen_rtx_POST_MODIFY (DImode
, spill_fill_data
.iter_reg
[iter
],
3123 gen_rtx_PLUS (DImode
,
3124 spill_fill_data
.iter_reg
[iter
],
3126 add_reg_note (spill_fill_data
.prev_insn
[iter
],
3127 REG_INC
, spill_fill_data
.iter_reg
[iter
]);
3131 /* ??? Could use register post_modify for loads. */
3132 if (!satisfies_constraint_I (disp_rtx
))
3134 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3135 emit_move_insn (tmp
, disp_rtx
);
3138 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3139 spill_fill_data
.iter_reg
[iter
], disp_rtx
));
3142 /* Micro-optimization: if we've created a frame pointer, it's at
3143 CFA 0, which may allow the real iterator to be initialized lower,
3144 slightly increasing parallelism. Also, if there are few saves
3145 it may eliminate the iterator entirely. */
3147 && spill_fill_data
.init_reg
[iter
] == stack_pointer_rtx
3148 && frame_pointer_needed
)
3150 mem
= gen_rtx_MEM (GET_MODE (reg
), hard_frame_pointer_rtx
);
3151 set_mem_alias_set (mem
, get_varargs_alias_set ());
3160 seq
= gen_movdi (spill_fill_data
.iter_reg
[iter
],
3161 spill_fill_data
.init_reg
[iter
]);
3166 if (!satisfies_constraint_I (disp_rtx
))
3168 rtx tmp
= gen_rtx_REG (DImode
, next_scratch_gr_reg ());
3169 emit_move_insn (tmp
, disp_rtx
);
3173 emit_insn (gen_adddi3 (spill_fill_data
.iter_reg
[iter
],
3174 spill_fill_data
.init_reg
[iter
],
3181 /* Careful for being the first insn in a sequence. */
3182 if (spill_fill_data
.init_after
)
3183 insn
= emit_insn_after (seq
, spill_fill_data
.init_after
);
3186 rtx_insn
*first
= get_insns ();
3188 insn
= emit_insn_before (seq
, first
);
3190 insn
= emit_insn (seq
);
3192 spill_fill_data
.init_after
= insn
;
3195 mem
= gen_rtx_MEM (GET_MODE (reg
), spill_fill_data
.iter_reg
[iter
]);
3197 /* ??? Not all of the spills are for varargs, but some of them are.
3198 The rest of the spills belong in an alias set of their own. But
3199 it doesn't actually hurt to include them here. */
3200 set_mem_alias_set (mem
, get_varargs_alias_set ());
3202 spill_fill_data
.prev_addr
[iter
] = &XEXP (mem
, 0);
3203 spill_fill_data
.prev_off
[iter
] = cfa_off
;
3205 if (++iter
>= spill_fill_data
.n_iter
)
3207 spill_fill_data
.next_iter
= iter
;
3213 do_spill (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
,
3216 int iter
= spill_fill_data
.next_iter
;
3220 mem
= spill_restore_mem (reg
, cfa_off
);
3221 insn
= emit_insn ((*move_fn
) (mem
, reg
, GEN_INT (cfa_off
)));
3222 spill_fill_data
.prev_insn
[iter
] = insn
;
3229 RTX_FRAME_RELATED_P (insn
) = 1;
3231 /* Don't even pretend that the unwind code can intuit its way
3232 through a pair of interleaved post_modify iterators. Just
3233 provide the correct answer. */
3235 if (frame_pointer_needed
)
3237 base
= hard_frame_pointer_rtx
;
3242 base
= stack_pointer_rtx
;
3243 off
= current_frame_info
.total_size
- cfa_off
;
3246 add_reg_note (insn
, REG_CFA_OFFSET
,
3247 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg
),
3248 plus_constant (Pmode
,
3255 do_restore (rtx (*move_fn
) (rtx
, rtx
, rtx
), rtx reg
, HOST_WIDE_INT cfa_off
)
3257 int iter
= spill_fill_data
.next_iter
;
3260 insn
= emit_insn ((*move_fn
) (reg
, spill_restore_mem (reg
, cfa_off
),
3261 GEN_INT (cfa_off
)));
3262 spill_fill_data
.prev_insn
[iter
] = insn
;
3265 /* Wrapper functions that discards the CONST_INT spill offset. These
3266 exist so that we can give gr_spill/gr_fill the offset they need and
3267 use a consistent function interface. */
3270 gen_movdi_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3272 return gen_movdi (dest
, src
);
3276 gen_fr_spill_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3278 return gen_fr_spill (dest
, src
);
3282 gen_fr_restore_x (rtx dest
, rtx src
, rtx offset ATTRIBUTE_UNUSED
)
3284 return gen_fr_restore (dest
, src
);
3287 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3289 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3290 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3292 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3293 inclusive. These are offsets from the current stack pointer. BS_SIZE
3294 is the size of the backing store. ??? This clobbers r2 and r3. */
3297 ia64_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
3300 rtx r2
= gen_rtx_REG (Pmode
, GR_REG (2));
3301 rtx r3
= gen_rtx_REG (Pmode
, GR_REG (3));
3302 rtx p6
= gen_rtx_REG (BImode
, PR_REG (6));
3304 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3305 of the Register Stack Engine. We also need to probe it after checking
3306 that the 2 stacks don't overlap. */
3307 emit_insn (gen_bsp_value (r3
));
3308 emit_move_insn (r2
, GEN_INT (-(first
+ size
)));
3310 /* Compare current value of BSP and SP registers. */
3311 emit_insn (gen_rtx_SET (p6
, gen_rtx_fmt_ee (LTU
, BImode
,
3312 r3
, stack_pointer_rtx
)));
3314 /* Compute the address of the probe for the Backing Store (which grows
3315 towards higher addresses). We probe only at the first offset of
3316 the next page because some OS (eg Linux/ia64) only extend the
3317 backing store when this specific address is hit (but generate a SEGV
3318 on other address). Page size is the worst case (4KB). The reserve
3319 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3320 Also compute the address of the last probe for the memory stack
3321 (which grows towards lower addresses). */
3322 emit_insn (gen_rtx_SET (r3
, plus_constant (Pmode
, r3
, 4095)));
3323 emit_insn (gen_rtx_SET (r2
, gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3325 /* Compare them and raise SEGV if the former has topped the latter. */
3326 emit_insn (gen_rtx_COND_EXEC (VOIDmode
,
3327 gen_rtx_fmt_ee (NE
, VOIDmode
, p6
, const0_rtx
),
3328 gen_rtx_SET (p6
, gen_rtx_fmt_ee (GEU
, BImode
,
3330 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode
, r3
, GEN_INT (12),
3333 emit_insn (gen_rtx_COND_EXEC (VOIDmode
,
3334 gen_rtx_fmt_ee (NE
, VOIDmode
, p6
, const0_rtx
),
3335 gen_rtx_TRAP_IF (VOIDmode
, const1_rtx
,
3338 /* Probe the Backing Store if necessary. */
3340 emit_stack_probe (r3
);
3342 /* Probe the memory stack if necessary. */
3346 /* See if we have a constant small number of probes to generate. If so,
3347 that's the easy case. */
3348 else if (size
<= PROBE_INTERVAL
)
3349 emit_stack_probe (r2
);
3351 /* The run-time loop is made up of 9 insns in the generic case while this
3352 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3353 else if (size
<= 4 * PROBE_INTERVAL
)
3357 emit_move_insn (r2
, GEN_INT (-(first
+ PROBE_INTERVAL
)));
3358 emit_insn (gen_rtx_SET (r2
,
3359 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3360 emit_stack_probe (r2
);
3362 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3363 it exceeds SIZE. If only two probes are needed, this will not
3364 generate any code. Then probe at FIRST + SIZE. */
3365 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
3367 emit_insn (gen_rtx_SET (r2
,
3368 plus_constant (Pmode
, r2
, -PROBE_INTERVAL
)));
3369 emit_stack_probe (r2
);
3372 emit_insn (gen_rtx_SET (r2
,
3373 plus_constant (Pmode
, r2
,
3374 (i
- PROBE_INTERVAL
) - size
)));
3375 emit_stack_probe (r2
);
3378 /* Otherwise, do the same as above, but in a loop. Note that we must be
3379 extra careful with variables wrapping around because we might be at
3380 the very top (or the very bottom) of the address space and we have
3381 to be able to handle this case properly; in particular, we use an
3382 equality test for the loop condition. */
3385 HOST_WIDE_INT rounded_size
;
3387 emit_move_insn (r2
, GEN_INT (-first
));
3390 /* Step 1: round SIZE to the previous multiple of the interval. */
3392 rounded_size
= size
& -PROBE_INTERVAL
;
3395 /* Step 2: compute initial and final value of the loop counter. */
3397 /* TEST_ADDR = SP + FIRST. */
3398 emit_insn (gen_rtx_SET (r2
,
3399 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, r2
)));
3401 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3402 if (rounded_size
> (1 << 21))
3404 emit_move_insn (r3
, GEN_INT (-rounded_size
));
3405 emit_insn (gen_rtx_SET (r3
, gen_rtx_PLUS (Pmode
, r2
, r3
)));
3408 emit_insn (gen_rtx_SET (r3
, gen_rtx_PLUS (Pmode
, r2
,
3409 GEN_INT (-rounded_size
))));
3416 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3419 while (TEST_ADDR != LAST_ADDR)
3421 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3422 until it is equal to ROUNDED_SIZE. */
3424 emit_insn (gen_probe_stack_range (r2
, r2
, r3
));
3427 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3428 that SIZE is equal to ROUNDED_SIZE. */
3430 /* TEMP = SIZE - ROUNDED_SIZE. */
3431 if (size
!= rounded_size
)
3433 emit_insn (gen_rtx_SET (r2
, plus_constant (Pmode
, r2
,
3434 rounded_size
- size
)));
3435 emit_stack_probe (r2
);
3439 /* Make sure nothing is scheduled before we are done. */
3440 emit_insn (gen_blockage ());
3443 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3444 absolute addresses. */
3447 output_probe_stack_range (rtx reg1
, rtx reg2
)
3449 static int labelno
= 0;
3453 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
3456 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
3458 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3460 xops
[1] = GEN_INT (-PROBE_INTERVAL
);
3461 output_asm_insn ("addl %0 = %1, %0", xops
);
3462 fputs ("\t;;\n", asm_out_file
);
3464 /* Probe at TEST_ADDR. */
3465 output_asm_insn ("probe.w.fault %0, 0", xops
);
3467 /* Test if TEST_ADDR == LAST_ADDR. */
3469 xops
[2] = gen_rtx_REG (BImode
, PR_REG (6));
3470 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops
);
3473 fprintf (asm_out_file
, "\t(%s) br.cond.dpnt ", reg_names
[PR_REG (7)]);
3474 assemble_name_raw (asm_out_file
, loop_lab
);
3475 fputc ('\n', asm_out_file
);
3480 /* Called after register allocation to add any instructions needed for the
3481 prologue. Using a prologue insn is favored compared to putting all of the
3482 instructions in output_function_prologue(), since it allows the scheduler
3483 to intermix instructions with the saves of the caller saved registers. In
3484 some cases, it might be necessary to emit a barrier instruction as the last
3485 insn to prevent such scheduling.
3487 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3488 so that the debug info generation code can handle them properly.
3490 The register save area is laid out like so:
3492 [ varargs spill area ]
3493 [ fr register spill area ]
3494 [ br register spill area ]
3495 [ ar register spill area ]
3496 [ pr register spill area ]
3497 [ gr register spill area ] */
3499 /* ??? Get inefficient code when the frame size is larger than can fit in an
3500 adds instruction. */
3503 ia64_expand_prologue (void)
3506 rtx ar_pfs_save_reg
, ar_unat_save_reg
;
3507 int i
, epilogue_p
, regno
, alt_regno
, cfa_off
, n_varargs
;
3510 ia64_compute_frame_size (get_frame_size ());
3511 last_scratch_gr_reg
= 15;
3513 if (flag_stack_usage_info
)
3514 current_function_static_stack_size
= current_frame_info
.total_size
;
3516 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
3517 || flag_stack_clash_protection
)
3519 HOST_WIDE_INT size
= current_frame_info
.total_size
;
3520 int bs_size
= BACKING_STORE_SIZE (current_frame_info
.n_input_regs
3521 + current_frame_info
.n_local_regs
);
3523 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
3525 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
3526 ia64_emit_probe_stack_range (get_stack_check_protect (),
3527 size
- get_stack_check_protect (),
3529 else if (size
+ bs_size
> get_stack_check_protect ())
3530 ia64_emit_probe_stack_range (get_stack_check_protect (),
3533 else if (size
+ bs_size
> 0)
3534 ia64_emit_probe_stack_range (get_stack_check_protect (), size
, bs_size
);
3539 fprintf (dump_file
, "ia64 frame related registers "
3540 "recorded in current_frame_info.r[]:\n");
3541 #define PRINTREG(a) if (current_frame_info.r[a]) \
3542 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3544 PRINTREG(reg_save_b0
);
3545 PRINTREG(reg_save_pr
);
3546 PRINTREG(reg_save_ar_pfs
);
3547 PRINTREG(reg_save_ar_unat
);
3548 PRINTREG(reg_save_ar_lc
);
3549 PRINTREG(reg_save_gp
);
3553 /* If there is no epilogue, then we don't need some prologue insns.
3554 We need to avoid emitting the dead prologue insns, because flow
3555 will complain about them. */
3561 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
3562 if ((e
->flags
& EDGE_FAKE
) == 0
3563 && (e
->flags
& EDGE_FALLTHRU
) != 0)
3565 epilogue_p
= (e
!= NULL
);
3570 /* Set the local, input, and output register names. We need to do this
3571 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3572 half. If we use in/loc/out register names, then we get assembler errors
3573 in crtn.S because there is no alloc insn or regstk directive in there. */
3574 if (! TARGET_REG_NAMES
)
3576 int inputs
= current_frame_info
.n_input_regs
;
3577 int locals
= current_frame_info
.n_local_regs
;
3578 int outputs
= current_frame_info
.n_output_regs
;
3580 for (i
= 0; i
< inputs
; i
++)
3581 reg_names
[IN_REG (i
)] = ia64_reg_numbers
[i
];
3582 for (i
= 0; i
< locals
; i
++)
3583 reg_names
[LOC_REG (i
)] = ia64_reg_numbers
[inputs
+ i
];
3584 for (i
= 0; i
< outputs
; i
++)
3585 reg_names
[OUT_REG (i
)] = ia64_reg_numbers
[inputs
+ locals
+ i
];
3588 /* Set the frame pointer register name. The regnum is logically loc79,
3589 but of course we'll not have allocated that many locals. Rather than
3590 worrying about renumbering the existing rtxs, we adjust the name. */
3591 /* ??? This code means that we can never use one local register when
3592 there is a frame pointer. loc79 gets wasted in this case, as it is
3593 renamed to a register that will never be used. See also the try_locals
3594 code in find_gr_spill. */
3595 if (current_frame_info
.r
[reg_fp
])
3597 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
3598 reg_names
[HARD_FRAME_POINTER_REGNUM
]
3599 = reg_names
[current_frame_info
.r
[reg_fp
]];
3600 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
3603 /* We don't need an alloc instruction if we've used no outputs or locals. */
3604 if (current_frame_info
.n_local_regs
== 0
3605 && current_frame_info
.n_output_regs
== 0
3606 && current_frame_info
.n_input_regs
<= crtl
->args
.info
.int_regs
3607 && !TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3609 /* If there is no alloc, but there are input registers used, then we
3610 need a .regstk directive. */
3611 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
3612 ar_pfs_save_reg
= NULL_RTX
;
3616 current_frame_info
.need_regstk
= 0;
3618 if (current_frame_info
.r
[reg_save_ar_pfs
])
3620 regno
= current_frame_info
.r
[reg_save_ar_pfs
];
3621 reg_emitted (reg_save_ar_pfs
);
3624 regno
= next_scratch_gr_reg ();
3625 ar_pfs_save_reg
= gen_rtx_REG (DImode
, regno
);
3627 insn
= emit_insn (gen_alloc (ar_pfs_save_reg
,
3628 GEN_INT (current_frame_info
.n_input_regs
),
3629 GEN_INT (current_frame_info
.n_local_regs
),
3630 GEN_INT (current_frame_info
.n_output_regs
),
3631 GEN_INT (current_frame_info
.n_rotate_regs
)));
3632 if (current_frame_info
.r
[reg_save_ar_pfs
])
3634 RTX_FRAME_RELATED_P (insn
) = 1;
3635 add_reg_note (insn
, REG_CFA_REGISTER
,
3636 gen_rtx_SET (ar_pfs_save_reg
,
3637 gen_rtx_REG (DImode
, AR_PFS_REGNUM
)));
3641 /* Set up frame pointer, stack pointer, and spill iterators. */
3643 n_varargs
= cfun
->machine
->n_varargs
;
3644 setup_spill_pointers (current_frame_info
.n_spilled
+ n_varargs
,
3645 stack_pointer_rtx
, 0);
3647 if (frame_pointer_needed
)
3649 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3650 RTX_FRAME_RELATED_P (insn
) = 1;
3652 /* Force the unwind info to recognize this as defining a new CFA,
3653 rather than some temp register setup. */
3654 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL_RTX
);
3657 if (current_frame_info
.total_size
!= 0)
3659 rtx frame_size_rtx
= GEN_INT (- current_frame_info
.total_size
);
3662 if (satisfies_constraint_I (frame_size_rtx
))
3663 offset
= frame_size_rtx
;
3666 regno
= next_scratch_gr_reg ();
3667 offset
= gen_rtx_REG (DImode
, regno
);
3668 emit_move_insn (offset
, frame_size_rtx
);
3671 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
,
3672 stack_pointer_rtx
, offset
));
3674 if (! frame_pointer_needed
)
3676 RTX_FRAME_RELATED_P (insn
) = 1;
3677 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
3678 gen_rtx_SET (stack_pointer_rtx
,
3679 gen_rtx_PLUS (DImode
,
3684 /* ??? At this point we must generate a magic insn that appears to
3685 modify the stack pointer, the frame pointer, and all spill
3686 iterators. This would allow the most scheduling freedom. For
3687 now, just hard stop. */
3688 emit_insn (gen_blockage ());
3691 /* Must copy out ar.unat before doing any integer spills. */
3692 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3694 if (current_frame_info
.r
[reg_save_ar_unat
])
3697 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3698 reg_emitted (reg_save_ar_unat
);
3702 alt_regno
= next_scratch_gr_reg ();
3703 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3704 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3707 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3708 insn
= emit_move_insn (ar_unat_save_reg
, reg
);
3709 if (current_frame_info
.r
[reg_save_ar_unat
])
3711 RTX_FRAME_RELATED_P (insn
) = 1;
3712 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3715 /* Even if we're not going to generate an epilogue, we still
3716 need to save the register so that EH works. */
3717 if (! epilogue_p
&& current_frame_info
.r
[reg_save_ar_unat
])
3718 emit_insn (gen_prologue_use (ar_unat_save_reg
));
3721 ar_unat_save_reg
= NULL_RTX
;
3723 /* Spill all varargs registers. Do this before spilling any GR registers,
3724 since we want the UNAT bits for the GR registers to override the UNAT
3725 bits from varargs, which we don't care about. */
3728 for (regno
= GR_ARG_FIRST
+ 7; n_varargs
> 0; --n_varargs
, --regno
)
3730 reg
= gen_rtx_REG (DImode
, regno
);
3731 do_spill (gen_gr_spill
, reg
, cfa_off
+= 8, NULL_RTX
);
3734 /* Locate the bottom of the register save area. */
3735 cfa_off
= (current_frame_info
.spill_cfa_off
3736 + current_frame_info
.spill_size
3737 + current_frame_info
.extra_spill_size
);
3739 /* Save the predicate register block either in a register or in memory. */
3740 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3742 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3743 if (current_frame_info
.r
[reg_save_pr
] != 0)
3745 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3746 reg_emitted (reg_save_pr
);
3747 insn
= emit_move_insn (alt_reg
, reg
);
3749 /* ??? Denote pr spill/fill by a DImode move that modifies all
3750 64 hard registers. */
3751 RTX_FRAME_RELATED_P (insn
) = 1;
3752 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3754 /* Even if we're not going to generate an epilogue, we still
3755 need to save the register so that EH works. */
3757 emit_insn (gen_prologue_use (alt_reg
));
3761 alt_regno
= next_scratch_gr_reg ();
3762 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3763 insn
= emit_move_insn (alt_reg
, reg
);
3764 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3769 /* Handle AR regs in numerical order. All of them get special handling. */
3770 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
)
3771 && current_frame_info
.r
[reg_save_ar_unat
] == 0)
3773 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
3774 do_spill (gen_movdi_x
, ar_unat_save_reg
, cfa_off
, reg
);
3778 /* The alloc insn already copied ar.pfs into a general register. The
3779 only thing we have to do now is copy that register to a stack slot
3780 if we'd not allocated a local register for the job. */
3781 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
)
3782 && current_frame_info
.r
[reg_save_ar_pfs
] == 0)
3784 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3785 do_spill (gen_movdi_x
, ar_pfs_save_reg
, cfa_off
, reg
);
3789 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
3791 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
3792 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
3794 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
3795 reg_emitted (reg_save_ar_lc
);
3796 insn
= emit_move_insn (alt_reg
, reg
);
3797 RTX_FRAME_RELATED_P (insn
) = 1;
3798 add_reg_note (insn
, REG_CFA_REGISTER
, NULL_RTX
);
3800 /* Even if we're not going to generate an epilogue, we still
3801 need to save the register so that EH works. */
3803 emit_insn (gen_prologue_use (alt_reg
));
3807 alt_regno
= next_scratch_gr_reg ();
3808 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3809 emit_move_insn (alt_reg
, reg
);
3810 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3815 /* Save the return pointer. */
3816 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
3818 reg
= gen_rtx_REG (DImode
, BR_REG (0));
3819 if (current_frame_info
.r
[reg_save_b0
] != 0)
3821 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
3822 reg_emitted (reg_save_b0
);
3823 insn
= emit_move_insn (alt_reg
, reg
);
3824 RTX_FRAME_RELATED_P (insn
) = 1;
3825 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (alt_reg
, pc_rtx
));
3827 /* Even if we're not going to generate an epilogue, we still
3828 need to save the register so that EH works. */
3830 emit_insn (gen_prologue_use (alt_reg
));
3834 alt_regno
= next_scratch_gr_reg ();
3835 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3836 emit_move_insn (alt_reg
, reg
);
3837 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3842 if (current_frame_info
.r
[reg_save_gp
])
3844 reg_emitted (reg_save_gp
);
3845 insn
= emit_move_insn (gen_rtx_REG (DImode
,
3846 current_frame_info
.r
[reg_save_gp
]),
3847 pic_offset_table_rtx
);
3850 /* We should now be at the base of the gr/br/fr spill area. */
3851 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
3852 + current_frame_info
.spill_size
));
3854 /* Spill all general registers. */
3855 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
3856 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3858 reg
= gen_rtx_REG (DImode
, regno
);
3859 do_spill (gen_gr_spill
, reg
, cfa_off
, reg
);
3863 /* Spill the rest of the BR registers. */
3864 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
3865 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3867 alt_regno
= next_scratch_gr_reg ();
3868 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3869 reg
= gen_rtx_REG (DImode
, regno
);
3870 emit_move_insn (alt_reg
, reg
);
3871 do_spill (gen_movdi_x
, alt_reg
, cfa_off
, reg
);
3875 /* Align the frame and spill all FR registers. */
3876 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
3877 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
3879 gcc_assert (!(cfa_off
& 15));
3880 reg
= gen_rtx_REG (XFmode
, regno
);
3881 do_spill (gen_fr_spill_x
, reg
, cfa_off
, reg
);
3885 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
3887 finish_spill_pointers ();
3890 /* Output the textual info surrounding the prologue. */
3893 ia64_start_function (FILE *file
, const char *fnname
,
3894 tree decl ATTRIBUTE_UNUSED
)
3896 #if TARGET_ABI_OPEN_VMS
3897 vms_start_function (fnname
);
3900 fputs ("\t.proc ", file
);
3901 assemble_name (file
, fnname
);
3903 ASM_OUTPUT_FUNCTION_LABEL (file
, fnname
, decl
);
3906 /* Called after register allocation to add any instructions needed for the
3907 epilogue. Using an epilogue insn is favored compared to putting all of the
3908 instructions in output_function_prologue(), since it allows the scheduler
3909 to intermix instructions with the saves of the caller saved registers. In
3910 some cases, it might be necessary to emit a barrier instruction as the last
3911 insn to prevent such scheduling. */
3914 ia64_expand_epilogue (int sibcall_p
)
3917 rtx reg
, alt_reg
, ar_unat_save_reg
;
3918 int regno
, alt_regno
, cfa_off
;
3920 ia64_compute_frame_size (get_frame_size ());
3922 /* If there is a frame pointer, then we use it instead of the stack
3923 pointer, so that the stack pointer does not need to be valid when
3924 the epilogue starts. See EXIT_IGNORE_STACK. */
3925 if (frame_pointer_needed
)
3926 setup_spill_pointers (current_frame_info
.n_spilled
,
3927 hard_frame_pointer_rtx
, 0);
3929 setup_spill_pointers (current_frame_info
.n_spilled
, stack_pointer_rtx
,
3930 current_frame_info
.total_size
);
3932 if (current_frame_info
.total_size
!= 0)
3934 /* ??? At this point we must generate a magic insn that appears to
3935 modify the spill iterators and the frame pointer. This would
3936 allow the most scheduling freedom. For now, just hard stop. */
3937 emit_insn (gen_blockage ());
3940 /* Locate the bottom of the register save area. */
3941 cfa_off
= (current_frame_info
.spill_cfa_off
3942 + current_frame_info
.spill_size
3943 + current_frame_info
.extra_spill_size
);
3945 /* Restore the predicate registers. */
3946 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, PR_REG (0)))
3948 if (current_frame_info
.r
[reg_save_pr
] != 0)
3950 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_pr
]);
3951 reg_emitted (reg_save_pr
);
3955 alt_regno
= next_scratch_gr_reg ();
3956 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3957 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
3960 reg
= gen_rtx_REG (DImode
, PR_REG (0));
3961 emit_move_insn (reg
, alt_reg
);
3964 /* Restore the application registers. */
3966 /* Load the saved unat from the stack, but do not restore it until
3967 after the GRs have been restored. */
3968 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
3970 if (current_frame_info
.r
[reg_save_ar_unat
] != 0)
3973 = gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_unat
]);
3974 reg_emitted (reg_save_ar_unat
);
3978 alt_regno
= next_scratch_gr_reg ();
3979 ar_unat_save_reg
= gen_rtx_REG (DImode
, alt_regno
);
3980 current_frame_info
.gr_used_mask
|= 1 << alt_regno
;
3981 do_restore (gen_movdi_x
, ar_unat_save_reg
, cfa_off
);
3986 ar_unat_save_reg
= NULL_RTX
;
3988 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0)
3990 reg_emitted (reg_save_ar_pfs
);
3991 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_pfs
]);
3992 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
3993 emit_move_insn (reg
, alt_reg
);
3995 else if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_PFS_REGNUM
))
3997 alt_regno
= next_scratch_gr_reg ();
3998 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
3999 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4001 reg
= gen_rtx_REG (DImode
, AR_PFS_REGNUM
);
4002 emit_move_insn (reg
, alt_reg
);
4005 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_LC_REGNUM
))
4007 if (current_frame_info
.r
[reg_save_ar_lc
] != 0)
4009 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_ar_lc
]);
4010 reg_emitted (reg_save_ar_lc
);
4014 alt_regno
= next_scratch_gr_reg ();
4015 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
4016 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4019 reg
= gen_rtx_REG (DImode
, AR_LC_REGNUM
);
4020 emit_move_insn (reg
, alt_reg
);
4023 /* Restore the return pointer. */
4024 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
4026 if (current_frame_info
.r
[reg_save_b0
] != 0)
4028 alt_reg
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
4029 reg_emitted (reg_save_b0
);
4033 alt_regno
= next_scratch_gr_reg ();
4034 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
4035 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4038 reg
= gen_rtx_REG (DImode
, BR_REG (0));
4039 emit_move_insn (reg
, alt_reg
);
4042 /* We should now be at the base of the gr/br/fr spill area. */
4043 gcc_assert (cfa_off
== (current_frame_info
.spill_cfa_off
4044 + current_frame_info
.spill_size
));
4046 /* The GP may be stored on the stack in the prologue, but it's
4047 never restored in the epilogue. Skip the stack slot. */
4048 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, GR_REG (1)))
4051 /* Restore all general registers. */
4052 for (regno
= GR_REG (2); regno
<= GR_REG (31); ++regno
)
4053 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4055 reg
= gen_rtx_REG (DImode
, regno
);
4056 do_restore (gen_gr_restore
, reg
, cfa_off
);
4060 /* Restore the branch registers. */
4061 for (regno
= BR_REG (1); regno
<= BR_REG (7); ++regno
)
4062 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4064 alt_regno
= next_scratch_gr_reg ();
4065 alt_reg
= gen_rtx_REG (DImode
, alt_regno
);
4066 do_restore (gen_movdi_x
, alt_reg
, cfa_off
);
4068 reg
= gen_rtx_REG (DImode
, regno
);
4069 emit_move_insn (reg
, alt_reg
);
4072 /* Restore floating point registers. */
4073 for (regno
= FR_REG (2); regno
<= FR_REG (127); ++regno
)
4074 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4076 gcc_assert (!(cfa_off
& 15));
4077 reg
= gen_rtx_REG (XFmode
, regno
);
4078 do_restore (gen_fr_restore_x
, reg
, cfa_off
);
4082 /* Restore ar.unat for real. */
4083 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, AR_UNAT_REGNUM
))
4085 reg
= gen_rtx_REG (DImode
, AR_UNAT_REGNUM
);
4086 emit_move_insn (reg
, ar_unat_save_reg
);
4089 gcc_assert (cfa_off
== current_frame_info
.spill_cfa_off
);
4091 finish_spill_pointers ();
4093 if (current_frame_info
.total_size
4094 || cfun
->machine
->ia64_eh_epilogue_sp
4095 || frame_pointer_needed
)
4097 /* ??? At this point we must generate a magic insn that appears to
4098 modify the spill iterators, the stack pointer, and the frame
4099 pointer. This would allow the most scheduling freedom. For now,
4101 emit_insn (gen_blockage ());
4104 if (cfun
->machine
->ia64_eh_epilogue_sp
)
4105 emit_move_insn (stack_pointer_rtx
, cfun
->machine
->ia64_eh_epilogue_sp
);
4106 else if (frame_pointer_needed
)
4108 insn
= emit_move_insn (stack_pointer_rtx
, hard_frame_pointer_rtx
);
4109 RTX_FRAME_RELATED_P (insn
) = 1;
4110 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
4112 else if (current_frame_info
.total_size
)
4114 rtx offset
, frame_size_rtx
;
4116 frame_size_rtx
= GEN_INT (current_frame_info
.total_size
);
4117 if (satisfies_constraint_I (frame_size_rtx
))
4118 offset
= frame_size_rtx
;
4121 regno
= next_scratch_gr_reg ();
4122 offset
= gen_rtx_REG (DImode
, regno
);
4123 emit_move_insn (offset
, frame_size_rtx
);
4126 insn
= emit_insn (gen_adddi3 (stack_pointer_rtx
, stack_pointer_rtx
,
4129 RTX_FRAME_RELATED_P (insn
) = 1;
4130 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
4131 gen_rtx_SET (stack_pointer_rtx
,
4132 gen_rtx_PLUS (DImode
,
4137 if (cfun
->machine
->ia64_eh_epilogue_bsp
)
4138 emit_insn (gen_set_bsp (cfun
->machine
->ia64_eh_epilogue_bsp
));
4141 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode
, BR_REG (0))));
4144 int fp
= GR_REG (2);
4145 /* We need a throw away register here, r0 and r1 are reserved,
4146 so r2 is the first available call clobbered register. If
4147 there was a frame_pointer register, we may have swapped the
4148 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4149 sure we're using the string "r2" when emitting the register
4150 name for the assembler. */
4151 if (current_frame_info
.r
[reg_fp
]
4152 && current_frame_info
.r
[reg_fp
] == GR_REG (2))
4153 fp
= HARD_FRAME_POINTER_REGNUM
;
4155 /* We must emit an alloc to force the input registers to become output
4156 registers. Otherwise, if the callee tries to pass its parameters
4157 through to another call without an intervening alloc, then these
4159 /* ??? We don't need to preserve all input registers. We only need to
4160 preserve those input registers used as arguments to the sibling call.
4161 It is unclear how to compute that number here. */
4162 if (current_frame_info
.n_input_regs
!= 0)
4164 rtx n_inputs
= GEN_INT (current_frame_info
.n_input_regs
);
4166 insn
= emit_insn (gen_alloc (gen_rtx_REG (DImode
, fp
),
4167 const0_rtx
, const0_rtx
,
4168 n_inputs
, const0_rtx
));
4169 RTX_FRAME_RELATED_P (insn
) = 1;
4171 /* ??? We need to mark the alloc as frame-related so that it gets
4172 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4173 But there's nothing dwarf2 related to be done wrt the register
4174 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4175 the empty parallel means dwarf2out will not see anything. */
4176 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4177 gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (0)));
4182 /* Return 1 if br.ret can do all the work required to return from a
4186 ia64_direct_return (void)
4188 if (reload_completed
&& ! frame_pointer_needed
)
4190 ia64_compute_frame_size (get_frame_size ());
4192 return (current_frame_info
.total_size
== 0
4193 && current_frame_info
.n_spilled
== 0
4194 && current_frame_info
.r
[reg_save_b0
] == 0
4195 && current_frame_info
.r
[reg_save_pr
] == 0
4196 && current_frame_info
.r
[reg_save_ar_pfs
] == 0
4197 && current_frame_info
.r
[reg_save_ar_unat
] == 0
4198 && current_frame_info
.r
[reg_save_ar_lc
] == 0);
4203 /* Return the magic cookie that we use to hold the return address
4204 during early compilation. */
4207 ia64_return_addr_rtx (HOST_WIDE_INT count
, rtx frame ATTRIBUTE_UNUSED
)
4211 return gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_RET_ADDR
);
4214 /* Split this value after reload, now that we know where the return
4215 address is saved. */
4218 ia64_split_return_addr_rtx (rtx dest
)
4222 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, BR_REG (0)))
4224 if (current_frame_info
.r
[reg_save_b0
] != 0)
4226 src
= gen_rtx_REG (DImode
, current_frame_info
.r
[reg_save_b0
]);
4227 reg_emitted (reg_save_b0
);
4235 /* Compute offset from CFA for BR0. */
4236 /* ??? Must be kept in sync with ia64_expand_prologue. */
4237 off
= (current_frame_info
.spill_cfa_off
4238 + current_frame_info
.spill_size
);
4239 for (regno
= GR_REG (1); regno
<= GR_REG (31); ++regno
)
4240 if (TEST_HARD_REG_BIT (current_frame_info
.mask
, regno
))
4243 /* Convert CFA offset to a register based offset. */
4244 if (frame_pointer_needed
)
4245 src
= hard_frame_pointer_rtx
;
4248 src
= stack_pointer_rtx
;
4249 off
+= current_frame_info
.total_size
;
4252 /* Load address into scratch register. */
4253 off_r
= GEN_INT (off
);
4254 if (satisfies_constraint_I (off_r
))
4255 emit_insn (gen_adddi3 (dest
, src
, off_r
));
4258 emit_move_insn (dest
, off_r
);
4259 emit_insn (gen_adddi3 (dest
, src
, dest
));
4262 src
= gen_rtx_MEM (Pmode
, dest
);
4266 src
= gen_rtx_REG (DImode
, BR_REG (0));
4268 emit_move_insn (dest
, src
);
4272 ia64_hard_regno_rename_ok (int from
, int to
)
4274 /* Don't clobber any of the registers we reserved for the prologue. */
4277 for (r
= reg_fp
; r
<= reg_save_ar_lc
; r
++)
4278 if (to
== current_frame_info
.r
[r
]
4279 || from
== current_frame_info
.r
[r
]
4280 || to
== emitted_frame_related_regs
[r
]
4281 || from
== emitted_frame_related_regs
[r
])
4284 /* Don't use output registers outside the register frame. */
4285 if (OUT_REGNO_P (to
) && to
>= OUT_REG (current_frame_info
.n_output_regs
))
4288 /* Retain even/oddness on predicate register pairs. */
4289 if (PR_REGNO_P (from
) && PR_REGNO_P (to
))
4290 return (from
& 1) == (to
& 1);
4295 /* Implement TARGET_HARD_REGNO_NREGS.
4297 ??? We say that BImode PR values require two registers. This allows us to
4298 easily store the normal and inverted values. We use CCImode to indicate
4299 a single predicate register. */
4302 ia64_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
4304 if (regno
== PR_REG (0) && mode
== DImode
)
4306 if (PR_REGNO_P (regno
) && (mode
) == BImode
)
4308 if ((PR_REGNO_P (regno
) || GR_REGNO_P (regno
)) && mode
== CCImode
)
4310 if (FR_REGNO_P (regno
) && mode
== XFmode
)
4312 if (FR_REGNO_P (regno
) && mode
== RFmode
)
4314 if (FR_REGNO_P (regno
) && mode
== XCmode
)
4316 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
4319 /* Implement TARGET_HARD_REGNO_MODE_OK. */
4322 ia64_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
4324 if (FR_REGNO_P (regno
))
4325 return (GET_MODE_CLASS (mode
) != MODE_CC
4329 if (PR_REGNO_P (regno
))
4330 return mode
== BImode
|| GET_MODE_CLASS (mode
) == MODE_CC
;
4332 if (GR_REGNO_P (regno
))
4333 return mode
!= XFmode
&& mode
!= XCmode
&& mode
!= RFmode
;
4335 if (AR_REGNO_P (regno
))
4336 return mode
== DImode
;
4338 if (BR_REGNO_P (regno
))
4339 return mode
== DImode
;
4344 /* Implement TARGET_MODES_TIEABLE_P.
4346 Don't tie integer and FP modes, as that causes us to get integer registers
4347 allocated for FP instructions. XFmode only supported in FP registers so
4348 we can't tie it with any other modes. */
4351 ia64_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
4353 return (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
)
4354 && ((mode1
== XFmode
|| mode1
== XCmode
|| mode1
== RFmode
)
4355 == (mode2
== XFmode
|| mode2
== XCmode
|| mode2
== RFmode
))
4356 && (mode1
== BImode
) == (mode2
== BImode
));
4359 /* Target hook for assembling integer objects. Handle word-sized
4360 aligned objects and detect the cases when @fptr is needed. */
4363 ia64_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
4365 if (size
== POINTER_SIZE
/ BITS_PER_UNIT
4366 && !(TARGET_NO_PIC
|| TARGET_AUTO_PIC
)
4367 && GET_CODE (x
) == SYMBOL_REF
4368 && SYMBOL_REF_FUNCTION_P (x
))
4370 static const char * const directive
[2][2] = {
4371 /* 64-bit pointer */ /* 32-bit pointer */
4372 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4373 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4375 fputs (directive
[(aligned_p
!= 0)][POINTER_SIZE
== 32], asm_out_file
);
4376 output_addr_const (asm_out_file
, x
);
4377 fputs (")\n", asm_out_file
);
4380 return default_assemble_integer (x
, size
, aligned_p
);
4383 /* Emit the function prologue. */
4386 ia64_output_function_prologue (FILE *file
)
4388 int mask
, grsave
, grsave_prev
;
4390 if (current_frame_info
.need_regstk
)
4391 fprintf (file
, "\t.regstk %d, %d, %d, %d\n",
4392 current_frame_info
.n_input_regs
,
4393 current_frame_info
.n_local_regs
,
4394 current_frame_info
.n_output_regs
,
4395 current_frame_info
.n_rotate_regs
);
4397 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
4400 /* Emit the .prologue directive. */
4403 grsave
= grsave_prev
= 0;
4404 if (current_frame_info
.r
[reg_save_b0
] != 0)
4407 grsave
= grsave_prev
= current_frame_info
.r
[reg_save_b0
];
4409 if (current_frame_info
.r
[reg_save_ar_pfs
] != 0
4410 && (grsave_prev
== 0
4411 || current_frame_info
.r
[reg_save_ar_pfs
] == grsave_prev
+ 1))
4414 if (grsave_prev
== 0)
4415 grsave
= current_frame_info
.r
[reg_save_ar_pfs
];
4416 grsave_prev
= current_frame_info
.r
[reg_save_ar_pfs
];
4418 if (current_frame_info
.r
[reg_fp
] != 0
4419 && (grsave_prev
== 0
4420 || current_frame_info
.r
[reg_fp
] == grsave_prev
+ 1))
4423 if (grsave_prev
== 0)
4424 grsave
= HARD_FRAME_POINTER_REGNUM
;
4425 grsave_prev
= current_frame_info
.r
[reg_fp
];
4427 if (current_frame_info
.r
[reg_save_pr
] != 0
4428 && (grsave_prev
== 0
4429 || current_frame_info
.r
[reg_save_pr
] == grsave_prev
+ 1))
4432 if (grsave_prev
== 0)
4433 grsave
= current_frame_info
.r
[reg_save_pr
];
4436 if (mask
&& TARGET_GNU_AS
)
4437 fprintf (file
, "\t.prologue %d, %d\n", mask
,
4438 ia64_debugger_regno (grsave
));
4440 fputs ("\t.prologue\n", file
);
4442 /* Emit a .spill directive, if necessary, to relocate the base of
4443 the register spill area. */
4444 if (current_frame_info
.spill_cfa_off
!= -16)
4445 fprintf (file
, "\t.spill %ld\n",
4446 (long) (current_frame_info
.spill_cfa_off
4447 + current_frame_info
.spill_size
));
4450 /* Emit the .body directive at the scheduled end of the prologue. */
4453 ia64_output_function_end_prologue (FILE *file
)
4455 if (ia64_except_unwind_info (&global_options
) != UI_TARGET
)
4458 fputs ("\t.body\n", file
);
4461 /* Emit the function epilogue. */
4464 ia64_output_function_epilogue (FILE *)
4468 if (current_frame_info
.r
[reg_fp
])
4470 const char *tmp
= reg_names
[HARD_FRAME_POINTER_REGNUM
];
4471 reg_names
[HARD_FRAME_POINTER_REGNUM
]
4472 = reg_names
[current_frame_info
.r
[reg_fp
]];
4473 reg_names
[current_frame_info
.r
[reg_fp
]] = tmp
;
4474 reg_emitted (reg_fp
);
4476 if (! TARGET_REG_NAMES
)
4478 for (i
= 0; i
< current_frame_info
.n_input_regs
; i
++)
4479 reg_names
[IN_REG (i
)] = ia64_input_reg_names
[i
];
4480 for (i
= 0; i
< current_frame_info
.n_local_regs
; i
++)
4481 reg_names
[LOC_REG (i
)] = ia64_local_reg_names
[i
];
4482 for (i
= 0; i
< current_frame_info
.n_output_regs
; i
++)
4483 reg_names
[OUT_REG (i
)] = ia64_output_reg_names
[i
];
4486 current_frame_info
.initialized
= 0;
4490 ia64_debugger_regno (int regno
)
4492 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4493 from its home at loc79 to something inside the register frame. We
4494 must perform the same renumbering here for the debug info. */
4495 if (current_frame_info
.r
[reg_fp
])
4497 if (regno
== HARD_FRAME_POINTER_REGNUM
)
4498 regno
= current_frame_info
.r
[reg_fp
];
4499 else if (regno
== current_frame_info
.r
[reg_fp
])
4500 regno
= HARD_FRAME_POINTER_REGNUM
;
4503 if (IN_REGNO_P (regno
))
4504 return 32 + regno
- IN_REG (0);
4505 else if (LOC_REGNO_P (regno
))
4506 return 32 + current_frame_info
.n_input_regs
+ regno
- LOC_REG (0);
4507 else if (OUT_REGNO_P (regno
))
4508 return (32 + current_frame_info
.n_input_regs
4509 + current_frame_info
.n_local_regs
+ regno
- OUT_REG (0));
4514 /* Implement TARGET_TRAMPOLINE_INIT.
4516 The trampoline should set the static chain pointer to value placed
4517 into the trampoline and should branch to the specified routine.
4518 To make the normal indirect-subroutine calling convention work,
4519 the trampoline must look like a function descriptor; the first
4520 word being the target address and the second being the target's
4523 We abuse the concept of a global pointer by arranging for it
4524 to point to the data we need to load. The complete trampoline
4525 has the following form:
4527 +-------------------+ \
4528 TRAMP: | __ia64_trampoline | |
4529 +-------------------+ > fake function descriptor
4531 +-------------------+ /
4532 | target descriptor |
4533 +-------------------+
4535 +-------------------+
4539 ia64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx static_chain
)
4541 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4542 rtx addr
, addr_reg
, tramp
, eight
= GEN_INT (8);
4544 /* The Intel assembler requires that the global __ia64_trampoline symbol
4545 be declared explicitly */
4548 static bool declared_ia64_trampoline
= false;
4550 if (!declared_ia64_trampoline
)
4552 declared_ia64_trampoline
= true;
4553 (*targetm
.asm_out
.globalize_label
) (asm_out_file
,
4554 "__ia64_trampoline");
4558 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4559 addr
= convert_memory_address (Pmode
, XEXP (m_tramp
, 0));
4560 fnaddr
= convert_memory_address (Pmode
, fnaddr
);
4561 static_chain
= convert_memory_address (Pmode
, static_chain
);
4563 /* Load up our iterator. */
4564 addr_reg
= copy_to_reg (addr
);
4565 m_tramp
= adjust_automodify_address (m_tramp
, Pmode
, addr_reg
, 0);
4567 /* The first two words are the fake descriptor:
4568 __ia64_trampoline, ADDR+16. */
4569 tramp
= gen_rtx_SYMBOL_REF (Pmode
, "__ia64_trampoline");
4570 if (TARGET_ABI_OPEN_VMS
)
4572 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4573 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4574 relocation against function symbols to make it identical to the
4575 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4576 strict ELF and dereference to get the bare code address. */
4577 rtx reg
= gen_reg_rtx (Pmode
);
4578 SYMBOL_REF_FLAGS (tramp
) |= SYMBOL_FLAG_FUNCTION
;
4579 emit_move_insn (reg
, tramp
);
4580 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
4583 emit_move_insn (m_tramp
, tramp
);
4584 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4585 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4587 emit_move_insn (m_tramp
, force_reg (Pmode
, plus_constant (Pmode
, addr
, 16)));
4588 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4589 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4591 /* The third word is the target descriptor. */
4592 emit_move_insn (m_tramp
, force_reg (Pmode
, fnaddr
));
4593 emit_insn (gen_adddi3 (addr_reg
, addr_reg
, eight
));
4594 m_tramp
= adjust_automodify_address (m_tramp
, VOIDmode
, NULL
, 8);
4596 /* The fourth word is the static chain. */
4597 emit_move_insn (m_tramp
, static_chain
);
4600 /* Do any needed setup for a variadic function. CUM has not been updated
4601 for the last named argument, which is given by ARG.
4603 We generate the actual spill instructions during prologue generation. */
4606 ia64_setup_incoming_varargs (cumulative_args_t cum
,
4607 const function_arg_info
&arg
,
4609 int second_time ATTRIBUTE_UNUSED
)
4611 CUMULATIVE_ARGS next_cum
= *get_cumulative_args (cum
);
4613 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl
)))
4614 /* Skip the current argument. */
4615 ia64_function_arg_advance (pack_cumulative_args (&next_cum
), arg
);
4617 if (next_cum
.words
< MAX_ARGUMENT_SLOTS
)
4619 int n
= MAX_ARGUMENT_SLOTS
- next_cum
.words
;
4620 *pretend_size
= n
* UNITS_PER_WORD
;
4621 cfun
->machine
->n_varargs
= n
;
4625 /* Check whether TYPE is a homogeneous floating point aggregate. If
4626 it is, return the mode of the floating point type that appears
4627 in all leafs. If it is not, return VOIDmode.
4629 An aggregate is a homogeneous floating point aggregate is if all
4630 fields/elements in it have the same floating point type (e.g,
4631 SFmode). 128-bit quad-precision floats are excluded.
4633 Variable sized aggregates should never arrive here, since we should
4634 have already decided to pass them by reference. Top-level zero-sized
4635 aggregates are excluded because our parallels crash the middle-end. */
4638 hfa_element_mode (const_tree type
, bool nested
)
4640 machine_mode element_mode
= VOIDmode
;
4642 enum tree_code code
= TREE_CODE (type
);
4643 int know_element_mode
= 0;
4646 if (!nested
&& (!TYPE_SIZE (type
) || integer_zerop (TYPE_SIZE (type
))))
4651 case VOID_TYPE
: case INTEGER_TYPE
: case ENUMERAL_TYPE
:
4652 case BOOLEAN_TYPE
: case POINTER_TYPE
:
4653 case OFFSET_TYPE
: case REFERENCE_TYPE
: case METHOD_TYPE
:
4654 case LANG_TYPE
: case FUNCTION_TYPE
:
4657 /* Fortran complex types are supposed to be HFAs, so we need to handle
4658 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4661 if (GET_MODE_CLASS (TYPE_MODE (type
)) == MODE_COMPLEX_FLOAT
4662 && TYPE_MODE (type
) != TCmode
)
4663 return GET_MODE_INNER (TYPE_MODE (type
));
4668 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4669 mode if this is contained within an aggregate. */
4670 if (nested
&& TYPE_MODE (type
) != TFmode
)
4671 return TYPE_MODE (type
);
4676 return hfa_element_mode (TREE_TYPE (type
), 1);
4680 case QUAL_UNION_TYPE
:
4681 for (t
= TYPE_FIELDS (type
); t
; t
= DECL_CHAIN (t
))
4683 if (TREE_CODE (t
) != FIELD_DECL
|| DECL_FIELD_ABI_IGNORED (t
))
4686 mode
= hfa_element_mode (TREE_TYPE (t
), 1);
4687 if (know_element_mode
)
4689 if (mode
!= element_mode
)
4692 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
)
4696 know_element_mode
= 1;
4697 element_mode
= mode
;
4700 return element_mode
;
4703 /* If we reach here, we probably have some front-end specific type
4704 that the backend doesn't know about. This can happen via the
4705 aggregate_value_p call in init_function_start. All we can do is
4706 ignore unknown tree types. */
4713 /* Return the number of words required to hold a quantity of TYPE and MODE
4714 when passed as an argument. */
4716 ia64_function_arg_words (const_tree type
, machine_mode mode
)
4720 if (mode
== BLKmode
)
4721 words
= int_size_in_bytes (type
);
4723 words
= GET_MODE_SIZE (mode
);
4725 return (words
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
; /* round up */
4728 /* Return the number of registers that should be skipped so the current
4729 argument (described by TYPE and WORDS) will be properly aligned.
4731 Integer and float arguments larger than 8 bytes start at the next
4732 even boundary. Aggregates larger than 8 bytes start at the next
4733 even boundary if the aggregate has 16 byte alignment. Note that
4734 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4735 but are still to be aligned in registers.
4737 ??? The ABI does not specify how to handle aggregates with
4738 alignment from 9 to 15 bytes, or greater than 16. We handle them
4739 all as if they had 16 byte alignment. Such aggregates can occur
4740 only if gcc extensions are used. */
4742 ia64_function_arg_offset (const CUMULATIVE_ARGS
*cum
,
4743 const_tree type
, int words
)
4745 /* No registers are skipped on VMS. */
4746 if (TARGET_ABI_OPEN_VMS
|| (cum
->words
& 1) == 0)
4750 && TREE_CODE (type
) != INTEGER_TYPE
4751 && TREE_CODE (type
) != REAL_TYPE
)
4752 return TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
;
4757 /* Return rtx for register where argument is passed, or zero if it is passed
4759 /* ??? 128-bit quad-precision floats are always passed in general
4763 ia64_function_arg_1 (cumulative_args_t cum_v
, const function_arg_info
&arg
,
4766 const CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4768 int basereg
= (incoming
? GR_ARG_FIRST
: AR_ARG_FIRST
);
4769 int words
= ia64_function_arg_words (arg
.type
, arg
.mode
);
4770 int offset
= ia64_function_arg_offset (cum
, arg
.type
, words
);
4771 machine_mode hfa_mode
= VOIDmode
;
4773 /* For OPEN VMS, emit the instruction setting up the argument register here,
4774 when we know this will be together with the other arguments setup related
4775 insns. This is not the conceptually best place to do this, but this is
4776 the easiest as we have convenient access to cumulative args info. */
4778 if (TARGET_ABI_OPEN_VMS
&& arg
.end_marker_p ())
4780 unsigned HOST_WIDE_INT regval
= cum
->words
;
4783 for (i
= 0; i
< 8; i
++)
4784 regval
|= ((int) cum
->atypes
[i
]) << (i
* 3 + 8);
4786 emit_move_insn (gen_rtx_REG (DImode
, GR_REG (25)),
4790 /* If all argument slots are used, then it must go on the stack. */
4791 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4794 /* On OpenVMS argument is either in Rn or Fn. */
4795 if (TARGET_ABI_OPEN_VMS
)
4797 if (FLOAT_MODE_P (arg
.mode
))
4798 return gen_rtx_REG (arg
.mode
, FR_ARG_FIRST
+ cum
->words
);
4800 return gen_rtx_REG (arg
.mode
, basereg
+ cum
->words
);
4803 /* Check for and handle homogeneous FP aggregates. */
4805 hfa_mode
= hfa_element_mode (arg
.type
, 0);
4807 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4808 and unprototyped hfas are passed specially. */
4809 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| arg
.named
))
4813 int fp_regs
= cum
->fp_regs
;
4814 int int_regs
= cum
->words
+ offset
;
4815 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
4819 /* If prototyped, pass it in FR regs then GR regs.
4820 If not prototyped, pass it in both FR and GR regs.
4822 If this is an SFmode aggregate, then it is possible to run out of
4823 FR regs while GR regs are still left. In that case, we pass the
4824 remaining part in the GR regs. */
4826 /* Fill the FP regs. We do this always. We stop if we reach the end
4827 of the argument, the last FP register, or the last argument slot. */
4829 byte_size
= arg
.promoted_size_in_bytes ();
4830 args_byte_size
= int_regs
* UNITS_PER_WORD
;
4832 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
4833 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
)); i
++)
4835 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4836 gen_rtx_REG (hfa_mode
, (FR_ARG_FIRST
4840 args_byte_size
+= hfa_size
;
4844 /* If no prototype, then the whole thing must go in GR regs. */
4845 if (! cum
->prototype
)
4847 /* If this is an SFmode aggregate, then we might have some left over
4848 that needs to go in GR regs. */
4849 else if (byte_size
!= offset
)
4850 int_regs
+= offset
/ UNITS_PER_WORD
;
4852 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4854 for (; offset
< byte_size
&& int_regs
< MAX_ARGUMENT_SLOTS
; i
++)
4856 machine_mode gr_mode
= DImode
;
4857 unsigned int gr_size
;
4859 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4860 then this goes in a GR reg left adjusted/little endian, right
4861 adjusted/big endian. */
4862 /* ??? Currently this is handled wrong, because 4-byte hunks are
4863 always right adjusted/little endian. */
4866 /* If we have an even 4 byte hunk because the aggregate is a
4867 multiple of 4 bytes in size, then this goes in a GR reg right
4868 adjusted/little endian. */
4869 else if (byte_size
- offset
== 4)
4872 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
4873 gen_rtx_REG (gr_mode
, (basereg
4877 gr_size
= GET_MODE_SIZE (gr_mode
);
4879 if (gr_size
== UNITS_PER_WORD
4880 || (gr_size
< UNITS_PER_WORD
&& offset
% UNITS_PER_WORD
== 0))
4882 else if (gr_size
> UNITS_PER_WORD
)
4883 int_regs
+= gr_size
/ UNITS_PER_WORD
;
4885 return gen_rtx_PARALLEL (arg
.mode
, gen_rtvec_v (i
, loc
));
4888 /* Integral and aggregates go in general registers. If we have run out of
4889 FR registers, then FP values must also go in general registers. This can
4890 happen when we have a SFmode HFA. */
4891 else if (arg
.mode
== TFmode
|| arg
.mode
== TCmode
4892 || !FLOAT_MODE_P (arg
.mode
)
4893 || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
4895 int byte_size
= arg
.promoted_size_in_bytes ();
4896 if (BYTES_BIG_ENDIAN
4897 && (arg
.mode
== BLKmode
|| arg
.aggregate_type_p ())
4898 && byte_size
< UNITS_PER_WORD
4901 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4902 gen_rtx_REG (DImode
,
4903 (basereg
+ cum
->words
4906 return gen_rtx_PARALLEL (arg
.mode
, gen_rtvec (1, gr_reg
));
4909 return gen_rtx_REG (arg
.mode
, basereg
+ cum
->words
+ offset
);
4913 /* If there is a prototype, then FP values go in a FR register when
4914 named, and in a GR register when unnamed. */
4915 else if (cum
->prototype
)
4918 return gen_rtx_REG (arg
.mode
, FR_ARG_FIRST
+ cum
->fp_regs
);
4919 /* In big-endian mode, an anonymous SFmode value must be represented
4920 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4921 the value into the high half of the general register. */
4922 else if (BYTES_BIG_ENDIAN
&& arg
.mode
== SFmode
)
4923 return gen_rtx_PARALLEL (arg
.mode
,
4925 gen_rtx_EXPR_LIST (VOIDmode
,
4926 gen_rtx_REG (DImode
, basereg
+ cum
->words
+ offset
),
4929 return gen_rtx_REG (arg
.mode
, basereg
+ cum
->words
+ offset
);
4931 /* If there is no prototype, then FP values go in both FR and GR
4935 /* See comment above. */
4936 machine_mode inner_mode
=
4937 (BYTES_BIG_ENDIAN
&& arg
.mode
== SFmode
) ? DImode
: arg
.mode
;
4939 rtx fp_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4940 gen_rtx_REG (arg
.mode
, (FR_ARG_FIRST
4943 rtx gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4944 gen_rtx_REG (inner_mode
,
4945 (basereg
+ cum
->words
4949 return gen_rtx_PARALLEL (arg
.mode
, gen_rtvec (2, fp_reg
, gr_reg
));
4953 /* Implement TARGET_FUNCION_ARG target hook. */
4956 ia64_function_arg (cumulative_args_t cum
, const function_arg_info
&arg
)
4958 return ia64_function_arg_1 (cum
, arg
, false);
4961 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4964 ia64_function_incoming_arg (cumulative_args_t cum
,
4965 const function_arg_info
&arg
)
4967 return ia64_function_arg_1 (cum
, arg
, true);
4970 /* Return number of bytes, at the beginning of the argument, that must be
4971 put in registers. 0 is the argument is entirely in registers or entirely
4975 ia64_arg_partial_bytes (cumulative_args_t cum_v
, const function_arg_info
&arg
)
4977 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4979 int words
= ia64_function_arg_words (arg
.type
, arg
.mode
);
4980 int offset
= ia64_function_arg_offset (cum
, arg
.type
, words
);
4982 /* If all argument slots are used, then it must go on the stack. */
4983 if (cum
->words
+ offset
>= MAX_ARGUMENT_SLOTS
)
4986 /* It doesn't matter whether the argument goes in FR or GR regs. If
4987 it fits within the 8 argument slots, then it goes entirely in
4988 registers. If it extends past the last argument slot, then the rest
4989 goes on the stack. */
4991 if (words
+ cum
->words
+ offset
<= MAX_ARGUMENT_SLOTS
)
4994 return (MAX_ARGUMENT_SLOTS
- cum
->words
- offset
) * UNITS_PER_WORD
;
4997 /* Return ivms_arg_type based on machine_mode. */
4999 static enum ivms_arg_type
5000 ia64_arg_type (machine_mode mode
)
5013 /* Update CUM to point after this argument. This is patterned after
5014 ia64_function_arg. */
5017 ia64_function_arg_advance (cumulative_args_t cum_v
,
5018 const function_arg_info
&arg
)
5020 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
5021 int words
= ia64_function_arg_words (arg
.type
, arg
.mode
);
5022 int offset
= ia64_function_arg_offset (cum
, arg
.type
, words
);
5023 machine_mode hfa_mode
= VOIDmode
;
5025 /* If all arg slots are already full, then there is nothing to do. */
5026 if (cum
->words
>= MAX_ARGUMENT_SLOTS
)
5028 cum
->words
+= words
+ offset
;
5032 cum
->atypes
[cum
->words
] = ia64_arg_type (arg
.mode
);
5033 cum
->words
+= words
+ offset
;
5035 /* On OpenVMS argument is either in Rn or Fn. */
5036 if (TARGET_ABI_OPEN_VMS
)
5038 cum
->int_regs
= cum
->words
;
5039 cum
->fp_regs
= cum
->words
;
5043 /* Check for and handle homogeneous FP aggregates. */
5045 hfa_mode
= hfa_element_mode (arg
.type
, 0);
5047 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
5048 and unprototyped hfas are passed specially. */
5049 if (hfa_mode
!= VOIDmode
&& (! cum
->prototype
|| arg
.named
))
5051 int fp_regs
= cum
->fp_regs
;
5052 /* This is the original value of cum->words + offset. */
5053 int int_regs
= cum
->words
- words
;
5054 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
5058 /* If prototyped, pass it in FR regs then GR regs.
5059 If not prototyped, pass it in both FR and GR regs.
5061 If this is an SFmode aggregate, then it is possible to run out of
5062 FR regs while GR regs are still left. In that case, we pass the
5063 remaining part in the GR regs. */
5065 /* Fill the FP regs. We do this always. We stop if we reach the end
5066 of the argument, the last FP register, or the last argument slot. */
5068 byte_size
= arg
.promoted_size_in_bytes ();
5069 args_byte_size
= int_regs
* UNITS_PER_WORD
;
5071 for (; (offset
< byte_size
&& fp_regs
< MAX_ARGUMENT_SLOTS
5072 && args_byte_size
< (MAX_ARGUMENT_SLOTS
* UNITS_PER_WORD
));)
5075 args_byte_size
+= hfa_size
;
5079 cum
->fp_regs
= fp_regs
;
5082 /* Integral and aggregates go in general registers. So do TFmode FP values.
5083 If we have run out of FR registers, then other FP values must also go in
5084 general registers. This can happen when we have a SFmode HFA. */
5085 else if (arg
.mode
== TFmode
|| arg
.mode
== TCmode
5086 || !FLOAT_MODE_P (arg
.mode
)
5087 || cum
->fp_regs
== MAX_ARGUMENT_SLOTS
)
5088 cum
->int_regs
= cum
->words
;
5090 /* If there is a prototype, then FP values go in a FR register when
5091 named, and in a GR register when unnamed. */
5092 else if (cum
->prototype
)
5095 cum
->int_regs
= cum
->words
;
5097 /* ??? Complex types should not reach here. */
5099 += (GET_MODE_CLASS (arg
.mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
5101 /* If there is no prototype, then FP values go in both FR and GR
5105 /* ??? Complex types should not reach here. */
5107 += (GET_MODE_CLASS (arg
.mode
) == MODE_COMPLEX_FLOAT
? 2 : 1);
5108 cum
->int_regs
= cum
->words
;
5112 /* Arguments with alignment larger than 8 bytes start at the next even
5113 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
5114 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5117 ia64_function_arg_boundary (machine_mode mode
, const_tree type
)
5119 if (mode
== TFmode
&& TARGET_HPUX
&& TARGET_ILP32
)
5120 return PARM_BOUNDARY
* 2;
5124 if (TYPE_ALIGN (type
) > PARM_BOUNDARY
)
5125 return PARM_BOUNDARY
* 2;
5127 return PARM_BOUNDARY
;
5130 if (GET_MODE_BITSIZE (mode
) > PARM_BOUNDARY
)
5131 return PARM_BOUNDARY
* 2;
5133 return PARM_BOUNDARY
;
5136 /* True if it is OK to do sibling call optimization for the specified
5137 call expression EXP. DECL will be the called function, or NULL if
5138 this is an indirect call. */
5140 ia64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5142 /* We can't perform a sibcall if the current function has the syscall_linkage
5144 if (lookup_attribute ("syscall_linkage",
5145 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
5148 /* We must always return with our current GP. This means we can
5149 only sibcall to functions defined in the current module unless
5150 TARGET_CONST_GP is set to true. */
5151 return (decl
&& (*targetm
.binds_local_p
) (decl
)) || TARGET_CONST_GP
;
5155 /* Implement va_arg. */
5158 ia64_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
5161 /* Variable sized types are passed by reference. */
5162 if (pass_va_arg_by_reference (type
))
5164 tree ptrtype
= build_pointer_type (type
);
5165 tree addr
= std_gimplify_va_arg_expr (valist
, ptrtype
, pre_p
, post_p
);
5166 return build_va_arg_indirect_ref (addr
);
5169 /* Aggregate arguments with alignment larger than 8 bytes start at
5170 the next even boundary. Integer and floating point arguments
5171 do so if they are larger than 8 bytes, whether or not they are
5172 also aligned larger than 8 bytes. */
5173 if ((SCALAR_FLOAT_TYPE_P (type
) || TREE_CODE (type
) == INTEGER_TYPE
)
5174 ? int_size_in_bytes (type
) > 8 : TYPE_ALIGN (type
) > 8 * BITS_PER_UNIT
)
5176 tree t
= fold_build_pointer_plus_hwi (valist
, 2 * UNITS_PER_WORD
- 1);
5177 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5178 build_int_cst (TREE_TYPE (t
), -2 * UNITS_PER_WORD
));
5179 gimplify_assign (unshare_expr (valist
), t
, pre_p
);
5182 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
5185 /* Return 1 if function return value returned in memory. Return 0 if it is
5189 ia64_return_in_memory (const_tree valtype
, const_tree fntype ATTRIBUTE_UNUSED
)
5192 machine_mode hfa_mode
;
5193 HOST_WIDE_INT byte_size
;
5195 mode
= TYPE_MODE (valtype
);
5196 byte_size
= GET_MODE_SIZE (mode
);
5197 if (mode
== BLKmode
)
5199 byte_size
= int_size_in_bytes (valtype
);
5204 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5206 hfa_mode
= hfa_element_mode (valtype
, 0);
5207 if (hfa_mode
!= VOIDmode
)
5209 int hfa_size
= GET_MODE_SIZE (hfa_mode
);
5211 if (byte_size
/ hfa_size
> MAX_ARGUMENT_SLOTS
)
5216 else if (byte_size
> UNITS_PER_WORD
* MAX_INT_RETURN_SLOTS
)
5222 /* Return rtx for register that holds the function return value. */
5225 ia64_function_value (const_tree valtype
,
5226 const_tree fn_decl_or_type
,
5227 bool outgoing ATTRIBUTE_UNUSED
)
5230 machine_mode hfa_mode
;
5232 const_tree func
= fn_decl_or_type
;
5235 && !DECL_P (fn_decl_or_type
))
5238 mode
= TYPE_MODE (valtype
);
5239 hfa_mode
= hfa_element_mode (valtype
, 0);
5241 if (hfa_mode
!= VOIDmode
)
5249 hfa_size
= GET_MODE_SIZE (hfa_mode
);
5250 byte_size
= ((mode
== BLKmode
)
5251 ? int_size_in_bytes (valtype
) : GET_MODE_SIZE (mode
));
5253 for (i
= 0; offset
< byte_size
; i
++)
5255 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
5256 gen_rtx_REG (hfa_mode
, FR_ARG_FIRST
+ i
),
5260 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
5262 else if (FLOAT_TYPE_P (valtype
) && mode
!= TFmode
&& mode
!= TCmode
)
5263 return gen_rtx_REG (mode
, FR_ARG_FIRST
);
5266 bool need_parallel
= false;
5268 /* In big-endian mode, we need to manage the layout of aggregates
5269 in the registers so that we get the bits properly aligned in
5270 the highpart of the registers. */
5271 if (BYTES_BIG_ENDIAN
5272 && (mode
== BLKmode
|| (valtype
&& AGGREGATE_TYPE_P (valtype
))))
5273 need_parallel
= true;
5275 /* Something like struct S { long double x; char a[0] } is not an
5276 HFA structure, and therefore doesn't go in fp registers. But
5277 the middle-end will give it XFmode anyway, and XFmode values
5278 don't normally fit in integer registers. So we need to smuggle
5279 the value inside a parallel. */
5280 else if (mode
== XFmode
|| mode
== XCmode
|| mode
== RFmode
)
5281 need_parallel
= true;
5291 bytesize
= int_size_in_bytes (valtype
);
5292 /* An empty PARALLEL is invalid here, but the return value
5293 doesn't matter for empty structs. */
5295 return gen_rtx_REG (mode
, GR_RET_FIRST
);
5296 for (i
= 0; offset
< bytesize
; i
++)
5298 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
5299 gen_rtx_REG (DImode
,
5302 offset
+= UNITS_PER_WORD
;
5304 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (i
, loc
));
5307 mode
= promote_function_mode (valtype
, mode
, &unsignedp
,
5308 func
? TREE_TYPE (func
) : NULL_TREE
,
5311 return gen_rtx_REG (mode
, GR_RET_FIRST
);
5315 /* Worker function for TARGET_LIBCALL_VALUE. */
5318 ia64_libcall_value (machine_mode mode
,
5319 const_rtx fun ATTRIBUTE_UNUSED
)
5321 return gen_rtx_REG (mode
,
5322 (((GET_MODE_CLASS (mode
) == MODE_FLOAT
5323 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5324 && (mode
) != TFmode
)
5325 ? FR_RET_FIRST
: GR_RET_FIRST
));
5328 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5331 ia64_function_value_regno_p (const unsigned int regno
)
5333 return ((regno
>= GR_RET_FIRST
&& regno
<= GR_RET_LAST
)
5334 || (regno
>= FR_RET_FIRST
&& regno
<= FR_RET_LAST
));
5337 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5338 We need to emit DTP-relative relocations. */
5341 ia64_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
5343 gcc_assert (size
== 4 || size
== 8);
5345 fputs ("\tdata4.ua\t@dtprel(", file
);
5347 fputs ("\tdata8.ua\t@dtprel(", file
);
5348 output_addr_const (file
, x
);
5352 /* Print a memory address as an operand to reference that memory location. */
5354 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5355 also call this from ia64_print_operand for memory addresses. */
5358 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED
,
5359 machine_mode
/*mode*/,
5360 rtx address ATTRIBUTE_UNUSED
)
5364 /* Print an operand to an assembler instruction.
5365 C Swap and print a comparison operator.
5366 D Print an FP comparison operator.
5367 E Print 32 - constant, for SImode shifts as extract.
5368 e Print 64 - constant, for DImode rotates.
5369 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5370 a floating point register emitted normally.
5371 G A floating point constant.
5372 I Invert a predicate register by adding 1.
5373 J Select the proper predicate register for a condition.
5374 j Select the inverse predicate register for a condition.
5375 O Append .acq for volatile load.
5376 P Postincrement of a MEM.
5377 Q Append .rel for volatile store.
5378 R Print .s .d or nothing for a single, double or no truncation.
5379 S Shift amount for shladd instruction.
5380 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5381 for Intel assembler.
5382 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5383 for Intel assembler.
5384 X A pair of floating point registers.
5385 r Print register name, or constant 0 as r0. HP compatibility for
5387 v Print vector constant value as an 8-byte integer value. */
5390 ia64_print_operand (FILE * file
, rtx x
, int code
)
5397 /* Handled below. */
5402 enum rtx_code c
= swap_condition (GET_CODE (x
));
5403 fputs (GET_RTX_NAME (c
), file
);
5408 switch (GET_CODE (x
))
5435 str
= GET_RTX_NAME (GET_CODE (x
));
5442 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - INTVAL (x
));
5446 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - INTVAL (x
));
5450 if (x
== CONST0_RTX (GET_MODE (x
)))
5451 str
= reg_names
[FR_REG (0)];
5452 else if (x
== CONST1_RTX (GET_MODE (x
)))
5453 str
= reg_names
[FR_REG (1)];
5456 gcc_assert (GET_CODE (x
) == REG
);
5457 str
= reg_names
[REGNO (x
)];
5465 real_to_target (val
, CONST_DOUBLE_REAL_VALUE (x
), GET_MODE (x
));
5466 if (GET_MODE (x
) == SFmode
)
5467 fprintf (file
, "0x%08lx", val
[0] & 0xffffffff);
5468 else if (GET_MODE (x
) == DFmode
)
5469 fprintf (file
, "0x%08lx%08lx", (WORDS_BIG_ENDIAN
? val
[0] : val
[1])
5471 (WORDS_BIG_ENDIAN
? val
[1] : val
[0])
5474 output_operand_lossage ("invalid %%G mode");
5479 fputs (reg_names
[REGNO (x
) + 1], file
);
5485 unsigned int regno
= REGNO (XEXP (x
, 0));
5486 if (GET_CODE (x
) == EQ
)
5490 fputs (reg_names
[regno
], file
);
5495 if (MEM_VOLATILE_P (x
))
5496 fputs(".acq", file
);
5501 HOST_WIDE_INT value
;
5503 switch (GET_CODE (XEXP (x
, 0)))
5509 x
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5510 if (GET_CODE (x
) == CONST_INT
)
5514 gcc_assert (GET_CODE (x
) == REG
);
5515 fprintf (file
, ", %s", reg_names
[REGNO (x
)]);
5521 value
= GET_MODE_SIZE (GET_MODE (x
));
5525 value
= - (HOST_WIDE_INT
) GET_MODE_SIZE (GET_MODE (x
));
5529 fprintf (file
, ", " HOST_WIDE_INT_PRINT_DEC
, value
);
5534 if (MEM_VOLATILE_P (x
))
5535 fputs(".rel", file
);
5539 if (x
== CONST0_RTX (GET_MODE (x
)))
5541 else if (x
== CONST1_RTX (GET_MODE (x
)))
5543 else if (x
== CONST2_RTX (GET_MODE (x
)))
5546 output_operand_lossage ("invalid %%R value");
5550 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5554 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5556 fprintf (file
, "0x%x", (int) INTVAL (x
) & 0xffffffff);
5562 if (! TARGET_GNU_AS
&& GET_CODE (x
) == CONST_INT
)
5564 const char *prefix
= "0x";
5565 if (INTVAL (x
) & 0x80000000)
5567 fprintf (file
, "0xffffffff");
5570 fprintf (file
, "%s%x", prefix
, (int) INTVAL (x
) & 0xffffffff);
5577 unsigned int regno
= REGNO (x
);
5578 fprintf (file
, "%s, %s", reg_names
[regno
], reg_names
[regno
+ 1]);
5583 /* If this operand is the constant zero, write it as register zero.
5584 Any register, zero, or CONST_INT value is OK here. */
5585 if (GET_CODE (x
) == REG
)
5586 fputs (reg_names
[REGNO (x
)], file
);
5587 else if (x
== CONST0_RTX (GET_MODE (x
)))
5589 else if (GET_CODE (x
) == CONST_INT
)
5590 output_addr_const (file
, x
);
5592 output_operand_lossage ("invalid %%r value");
5596 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
5597 x
= simplify_subreg (DImode
, x
, GET_MODE (x
), 0);
5604 /* For conditional branches, returns or calls, substitute
5605 sptk, dptk, dpnt, or spnt for %s. */
5606 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
5609 int pred_val
= profile_probability::from_reg_br_prob_note
5610 (XINT (x
, 0)).to_reg_br_prob_base ();
5612 /* Guess top and bottom 10% statically predicted. */
5613 if (pred_val
< REG_BR_PROB_BASE
/ 50
5614 && br_prob_note_reliable_p (x
))
5616 else if (pred_val
< REG_BR_PROB_BASE
/ 2)
5618 else if (pred_val
< REG_BR_PROB_BASE
/ 100 * 98
5619 || !br_prob_note_reliable_p (x
))
5624 else if (CALL_P (current_output_insn
))
5629 fputs (which
, file
);
5634 x
= current_insn_predicate
;
5637 unsigned int regno
= REGNO (XEXP (x
, 0));
5638 if (GET_CODE (x
) == EQ
)
5640 fprintf (file
, "(%s) ", reg_names
[regno
]);
5645 output_operand_lossage ("ia64_print_operand: unknown code");
5649 switch (GET_CODE (x
))
5651 /* This happens for the spill/restore instructions. */
5659 fputs (reg_names
[REGNO (x
)], file
);
5664 rtx addr
= XEXP (x
, 0);
5665 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
5666 addr
= XEXP (addr
, 0);
5667 fprintf (file
, "[%s]", reg_names
[REGNO (addr
)]);
5672 output_addr_const (file
, x
);
5679 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5682 ia64_print_operand_punct_valid_p (unsigned char code
)
5684 return (code
== '+' || code
== ',');
5687 /* Compute a (partial) cost for rtx X. Return true if the complete
5688 cost has been computed, and false if subexpressions should be
5689 scanned. In either case, *TOTAL contains the cost result. */
5690 /* ??? This is incomplete. */
5693 ia64_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
5694 int opno ATTRIBUTE_UNUSED
,
5695 int *total
, bool speed ATTRIBUTE_UNUSED
)
5697 int code
= GET_CODE (x
);
5705 *total
= satisfies_constraint_J (x
) ? 0 : COSTS_N_INSNS (1);
5708 if (satisfies_constraint_I (x
))
5710 else if (satisfies_constraint_J (x
))
5713 *total
= COSTS_N_INSNS (1);
5716 if (satisfies_constraint_K (x
) || satisfies_constraint_L (x
))
5719 *total
= COSTS_N_INSNS (1);
5724 *total
= COSTS_N_INSNS (1);
5730 *total
= COSTS_N_INSNS (3);
5734 *total
= COSTS_N_INSNS (4);
5738 /* For multiplies wider than HImode, we have to go to the FPU,
5739 which normally involves copies. Plus there's the latency
5740 of the multiply itself, and the latency of the instructions to
5741 transfer integer regs to FP regs. */
5742 if (FLOAT_MODE_P (mode
))
5743 *total
= COSTS_N_INSNS (4);
5744 else if (GET_MODE_SIZE (mode
) > 2)
5745 *total
= COSTS_N_INSNS (10);
5747 *total
= COSTS_N_INSNS (2);
5752 if (FLOAT_MODE_P (mode
))
5754 *total
= COSTS_N_INSNS (4);
5762 *total
= COSTS_N_INSNS (1);
5769 /* We make divide expensive, so that divide-by-constant will be
5770 optimized to a multiply. */
5771 *total
= COSTS_N_INSNS (60);
5779 /* Calculate the cost of moving data from a register in class FROM to
5780 one in class TO, using MODE. */
5783 ia64_register_move_cost (machine_mode mode
, reg_class_t from
,
5786 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5787 if (to
== ADDL_REGS
)
5789 if (from
== ADDL_REGS
)
5792 /* All costs are symmetric, so reduce cases by putting the
5793 lower number class as the destination. */
5796 reg_class_t tmp
= to
;
5797 to
= from
, from
= tmp
;
5800 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5801 so that we get secondary memory reloads. Between FR_REGS,
5802 we have to make this at least as expensive as memory_move_cost
5803 to avoid spectacularly poor register class preferencing. */
5804 if (mode
== XFmode
|| mode
== RFmode
)
5806 if (to
!= GR_REGS
|| from
!= GR_REGS
)
5807 return memory_move_cost (mode
, to
, false);
5815 /* Moving between PR registers takes two insns. */
5816 if (from
== PR_REGS
)
5818 /* Moving between PR and anything but GR is impossible. */
5819 if (from
!= GR_REGS
)
5820 return memory_move_cost (mode
, to
, false);
5824 /* Moving between BR and anything but GR is impossible. */
5825 if (from
!= GR_REGS
&& from
!= GR_AND_BR_REGS
)
5826 return memory_move_cost (mode
, to
, false);
5831 /* Moving between AR and anything but GR is impossible. */
5832 if (from
!= GR_REGS
)
5833 return memory_move_cost (mode
, to
, false);
5839 case GR_AND_FR_REGS
:
5840 case GR_AND_BR_REGS
:
5851 /* Calculate the cost of moving data of MODE from a register to or from
5855 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
5857 bool in ATTRIBUTE_UNUSED
)
5859 if (rclass
== GENERAL_REGS
5860 || rclass
== FR_REGS
5861 || rclass
== FP_REGS
5862 || rclass
== GR_AND_FR_REGS
)
5868 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5869 on RCLASS to use when copying X into that class. */
5872 ia64_preferred_reload_class (rtx x
, reg_class_t rclass
)
5878 /* Don't allow volatile mem reloads into floating point registers.
5879 This is defined to force reload to choose the r/m case instead
5880 of the f/f case when reloading (set (reg fX) (mem/v)). */
5881 if (MEM_P (x
) && MEM_VOLATILE_P (x
))
5884 /* Force all unrecognized constants into the constant pool. */
5902 /* This function returns the register class required for a secondary
5903 register when copying between one of the registers in RCLASS, and X,
5904 using MODE. A return value of NO_REGS means that no secondary register
5908 ia64_secondary_reload_class (enum reg_class rclass
,
5909 machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
5913 if (GET_CODE (x
) == REG
|| GET_CODE (x
) == SUBREG
)
5914 regno
= true_regnum (x
);
5921 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5922 interaction. We end up with two pseudos with overlapping lifetimes
5923 both of which are equiv to the same constant, and both which need
5924 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5925 changes depending on the path length, which means the qty_first_reg
5926 check in make_regs_eqv can give different answers at different times.
5927 At some point I'll probably need a reload_indi pattern to handle
5930 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5931 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5932 non-general registers for good measure. */
5933 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
))
5936 /* This is needed if a pseudo used as a call_operand gets spilled to a
5938 if (GET_CODE (x
) == MEM
)
5944 /* Need to go through general registers to get to other class regs. */
5945 if (regno
>= 0 && ! (FR_REGNO_P (regno
) || GENERAL_REGNO_P (regno
)))
5948 /* This can happen when a paradoxical subreg is an operand to the
5950 /* ??? This shouldn't be necessary after instruction scheduling is
5951 enabled, because paradoxical subregs are not accepted by
5952 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5953 stop the paradoxical subreg stupidity in the *_operand functions
5955 if (GET_CODE (x
) == MEM
5956 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == HImode
5957 || GET_MODE (x
) == QImode
))
5960 /* This can happen because of the ior/and/etc patterns that accept FP
5961 registers as operands. If the third operand is a constant, then it
5962 needs to be reloaded into a FP register. */
5963 if (GET_CODE (x
) == CONST_INT
)
5966 /* This can happen because of register elimination in a muldi3 insn.
5967 E.g. `26107 * (unsigned long)&u'. */
5968 if (GET_CODE (x
) == PLUS
)
5973 /* ??? This happens if we cse/gcse a BImode value across a call,
5974 and the function has a nonlocal goto. This is because global
5975 does not allocate call crossing pseudos to hard registers when
5976 crtl->has_nonlocal_goto is true. This is relatively
5977 common for C++ programs that use exceptions. To reproduce,
5978 return NO_REGS and compile libstdc++. */
5979 if (GET_CODE (x
) == MEM
)
5982 /* This can happen when we take a BImode subreg of a DImode value,
5983 and that DImode value winds up in some non-GR register. */
5984 if (regno
>= 0 && ! GENERAL_REGNO_P (regno
) && ! PR_REGNO_P (regno
))
5996 /* Implement targetm.unspec_may_trap_p hook. */
5998 ia64_unspec_may_trap_p (const_rtx x
, unsigned flags
)
6000 switch (XINT (x
, 1))
6006 case UNSPEC_CHKACLR
:
6008 /* These unspecs are just wrappers. */
6009 return may_trap_p_1 (XVECEXP (x
, 0, 0), flags
);
6012 return default_unspec_may_trap_p (x
, flags
);
6016 /* Parse the -mfixed-range= option string. */
6019 fix_range (const char *const_str
)
6022 char *str
, *dash
, *comma
;
6024 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
6025 REG2 are either register names or register numbers. The effect
6026 of this option is to mark the registers in the range from REG1 to
6027 REG2 as ``fixed'' so they won't be used by the compiler. This is
6028 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
6030 i
= strlen (const_str
);
6031 str
= (char *) alloca (i
+ 1);
6032 memcpy (str
, const_str
, i
+ 1);
6036 dash
= strchr (str
, '-');
6039 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
6044 comma
= strchr (dash
+ 1, ',');
6048 first
= decode_reg_name (str
);
6051 warning (0, "unknown register name: %s", str
);
6055 last
= decode_reg_name (dash
+ 1);
6058 warning (0, "unknown register name: %s", dash
+ 1);
6066 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
6070 for (i
= first
; i
<= last
; ++i
)
6081 /* Implement TARGET_OPTION_OVERRIDE. */
6084 ia64_option_override (void)
6087 cl_deferred_option
*opt
;
6088 vec
<cl_deferred_option
> *v
6089 = (vec
<cl_deferred_option
> *) ia64_deferred_options
;
6092 FOR_EACH_VEC_ELT (*v
, i
, opt
)
6094 switch (opt
->opt_index
)
6096 case OPT_mfixed_range_
:
6097 fix_range (opt
->arg
);
6105 if (TARGET_AUTO_PIC
)
6106 target_flags
|= MASK_CONST_GP
;
6108 /* Numerous experiment shows that IRA based loop pressure
6109 calculation works better for RTL loop invariant motion on targets
6110 with enough (>= 32) registers. It is an expensive optimization.
6111 So it is on only for peak performance. */
6113 flag_ira_loop_pressure
= 1;
6116 ia64_section_threshold
= (OPTION_SET_P (g_switch_value
)
6118 : IA64_DEFAULT_GVALUE
);
6120 init_machine_status
= ia64_init_machine_status
;
6122 if (flag_align_functions
&& !str_align_functions
)
6123 str_align_functions
= "64";
6124 if (flag_align_loops
&& !str_align_loops
)
6125 str_align_loops
= "32";
6126 if (TARGET_ABI_OPEN_VMS
)
6129 ia64_override_options_after_change();
6132 /* Implement targetm.override_options_after_change. */
6135 ia64_override_options_after_change (void)
6138 && !OPTION_SET_P (flag_selective_scheduling
)
6139 && !OPTION_SET_P (flag_selective_scheduling2
))
6141 flag_selective_scheduling2
= 1;
6142 flag_sel_sched_pipelining
= 1;
6144 if (mflag_sched_control_spec
== 2)
6146 /* Control speculation is on by default for the selective scheduler,
6147 but not for the Haifa scheduler. */
6148 mflag_sched_control_spec
= flag_selective_scheduling2
? 1 : 0;
6150 if (flag_sel_sched_pipelining
&& flag_auto_inc_dec
)
6152 /* FIXME: remove this when we'd implement breaking autoinsns as
6153 a transformation. */
6154 flag_auto_inc_dec
= 0;
6158 /* Initialize the record of emitted frame related registers. */
6160 void ia64_init_expanders (void)
6162 memset (&emitted_frame_related_regs
, 0, sizeof (emitted_frame_related_regs
));
6165 static struct machine_function
*
6166 ia64_init_machine_status (void)
6168 return ggc_cleared_alloc
<machine_function
> ();
6171 static enum attr_itanium_class
ia64_safe_itanium_class (rtx_insn
*);
6172 static enum attr_type
ia64_safe_type (rtx_insn
*);
6174 static enum attr_itanium_class
6175 ia64_safe_itanium_class (rtx_insn
*insn
)
6177 if (recog_memoized (insn
) >= 0)
6178 return get_attr_itanium_class (insn
);
6179 else if (DEBUG_INSN_P (insn
))
6180 return ITANIUM_CLASS_IGNORE
;
6182 return ITANIUM_CLASS_UNKNOWN
;
6185 static enum attr_type
6186 ia64_safe_type (rtx_insn
*insn
)
6188 if (recog_memoized (insn
) >= 0)
6189 return get_attr_type (insn
);
6191 return TYPE_UNKNOWN
;
6194 /* The following collection of routines emit instruction group stop bits as
6195 necessary to avoid dependencies. */
6197 /* Need to track some additional registers as far as serialization is
6198 concerned so we can properly handle br.call and br.ret. We could
6199 make these registers visible to gcc, but since these registers are
6200 never explicitly used in gcc generated code, it seems wasteful to
6201 do so (plus it would make the call and return patterns needlessly
6203 #define REG_RP (BR_REG (0))
6204 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
6205 /* This is used for volatile asms which may require a stop bit immediately
6206 before and after them. */
6207 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
6208 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6209 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
6211 /* For each register, we keep track of how it has been written in the
6212 current instruction group.
6214 If a register is written unconditionally (no qualifying predicate),
6215 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6217 If a register is written if its qualifying predicate P is true, we
6218 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6219 may be written again by the complement of P (P^1) and when this happens,
6220 WRITE_COUNT gets set to 2.
6222 The result of this is that whenever an insn attempts to write a register
6223 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6225 If a predicate register is written by a floating-point insn, we set
6226 WRITTEN_BY_FP to true.
6228 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6229 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6231 #if GCC_VERSION >= 4000
6232 #define RWS_FIELD_TYPE __extension__ unsigned short
6234 #define RWS_FIELD_TYPE unsigned int
6236 struct reg_write_state
6238 RWS_FIELD_TYPE write_count
: 2;
6239 RWS_FIELD_TYPE first_pred
: 10;
6240 RWS_FIELD_TYPE written_by_fp
: 1;
6241 RWS_FIELD_TYPE written_by_and
: 1;
6242 RWS_FIELD_TYPE written_by_or
: 1;
6245 /* Cumulative info for the current instruction group. */
6246 struct reg_write_state rws_sum
[NUM_REGS
];
6248 /* Bitmap whether a register has been written in the current insn. */
6249 unsigned HOST_WIDEST_FAST_INT rws_insn
6250 [(NUM_REGS
+ HOST_BITS_PER_WIDEST_FAST_INT
- 1)
6251 / HOST_BITS_PER_WIDEST_FAST_INT
];
6254 rws_insn_set (unsigned int regno
)
6256 unsigned int elt
= regno
/ HOST_BITS_PER_WIDEST_FAST_INT
;
6257 unsigned int bit
= regno
% HOST_BITS_PER_WIDEST_FAST_INT
;
6258 gcc_assert (!((rws_insn
[elt
] >> bit
) & 1));
6259 rws_insn
[elt
] |= (unsigned HOST_WIDEST_FAST_INT
) 1 << bit
;
6263 rws_insn_test (unsigned int regno
)
6265 unsigned int elt
= regno
/ HOST_BITS_PER_WIDEST_FAST_INT
;
6266 unsigned int bit
= regno
% HOST_BITS_PER_WIDEST_FAST_INT
;
6267 return (rws_insn
[elt
] >> bit
) & 1;
6270 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6271 unsigned char rws_insn
[2];
6274 rws_insn_set (int regno
)
6276 if (regno
== REG_AR_CFM
)
6278 else if (regno
== REG_VOLATILE
)
6283 rws_insn_test (int regno
)
6285 if (regno
== REG_AR_CFM
)
6287 if (regno
== REG_VOLATILE
)
6293 /* Indicates whether this is the first instruction after a stop bit,
6294 in which case we don't need another stop bit. Without this,
6295 ia64_variable_issue will die when scheduling an alloc. */
6296 static int first_instruction
;
6298 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6299 RTL for one instruction. */
6302 unsigned int is_write
: 1; /* Is register being written? */
6303 unsigned int is_fp
: 1; /* Is register used as part of an fp op? */
6304 unsigned int is_branch
: 1; /* Is register used as part of a branch? */
6305 unsigned int is_and
: 1; /* Is register used as part of and.orcm? */
6306 unsigned int is_or
: 1; /* Is register used as part of or.andcm? */
6307 unsigned int is_sibcall
: 1; /* Is this a sibling or normal call? */
6310 static void rws_update (int, struct reg_flags
, int);
6311 static int rws_access_regno (int, struct reg_flags
, int);
6312 static int rws_access_reg (rtx
, struct reg_flags
, int);
6313 static void update_set_flags (rtx
, struct reg_flags
*);
6314 static int set_src_needs_barrier (rtx
, struct reg_flags
, int);
6315 static int rtx_needs_barrier (rtx
, struct reg_flags
, int);
6316 static void init_insn_group_barriers (void);
6317 static int group_barrier_needed (rtx_insn
*);
6318 static int safe_group_barrier_needed (rtx_insn
*);
6319 static int in_safe_group_barrier
;
6321 /* Update *RWS for REGNO, which is being written by the current instruction,
6322 with predicate PRED, and associated register flags in FLAGS. */
6325 rws_update (int regno
, struct reg_flags flags
, int pred
)
6328 rws_sum
[regno
].write_count
++;
6330 rws_sum
[regno
].write_count
= 2;
6331 rws_sum
[regno
].written_by_fp
|= flags
.is_fp
;
6332 /* ??? Not tracking and/or across differing predicates. */
6333 rws_sum
[regno
].written_by_and
= flags
.is_and
;
6334 rws_sum
[regno
].written_by_or
= flags
.is_or
;
6335 rws_sum
[regno
].first_pred
= pred
;
6338 /* Handle an access to register REGNO of type FLAGS using predicate register
6339 PRED. Update rws_sum array. Return 1 if this access creates
6340 a dependency with an earlier instruction in the same group. */
6343 rws_access_regno (int regno
, struct reg_flags flags
, int pred
)
6345 int need_barrier
= 0;
6347 gcc_assert (regno
< NUM_REGS
);
6349 if (! PR_REGNO_P (regno
))
6350 flags
.is_and
= flags
.is_or
= 0;
6356 rws_insn_set (regno
);
6357 write_count
= rws_sum
[regno
].write_count
;
6359 switch (write_count
)
6362 /* The register has not been written yet. */
6363 if (!in_safe_group_barrier
)
6364 rws_update (regno
, flags
, pred
);
6368 /* The register has been written via a predicate. Treat
6369 it like a unconditional write and do not try to check
6370 for complementary pred reg in earlier write. */
6371 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6373 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6377 if (!in_safe_group_barrier
)
6378 rws_update (regno
, flags
, pred
);
6382 /* The register has been unconditionally written already. We
6384 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6386 else if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6390 if (!in_safe_group_barrier
)
6392 rws_sum
[regno
].written_by_and
= flags
.is_and
;
6393 rws_sum
[regno
].written_by_or
= flags
.is_or
;
6403 if (flags
.is_branch
)
6405 /* Branches have several RAW exceptions that allow to avoid
6408 if (REGNO_REG_CLASS (regno
) == BR_REGS
|| regno
== AR_PFS_REGNUM
)
6409 /* RAW dependencies on branch regs are permissible as long
6410 as the writer is a non-branch instruction. Since we
6411 never generate code that uses a branch register written
6412 by a branch instruction, handling this case is
6416 if (REGNO_REG_CLASS (regno
) == PR_REGS
6417 && ! rws_sum
[regno
].written_by_fp
)
6418 /* The predicates of a branch are available within the
6419 same insn group as long as the predicate was written by
6420 something other than a floating-point instruction. */
6424 if (flags
.is_and
&& rws_sum
[regno
].written_by_and
)
6426 if (flags
.is_or
&& rws_sum
[regno
].written_by_or
)
6429 switch (rws_sum
[regno
].write_count
)
6432 /* The register has not been written yet. */
6436 /* The register has been written via a predicate, assume we
6437 need a barrier (don't check for complementary regs). */
6442 /* The register has been unconditionally written already. We
6452 return need_barrier
;
6456 rws_access_reg (rtx reg
, struct reg_flags flags
, int pred
)
6458 int regno
= REGNO (reg
);
6459 int n
= REG_NREGS (reg
);
6462 return rws_access_regno (regno
, flags
, pred
);
6465 int need_barrier
= 0;
6467 need_barrier
|= rws_access_regno (regno
+ n
, flags
, pred
);
6468 return need_barrier
;
6472 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6473 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6476 update_set_flags (rtx x
, struct reg_flags
*pflags
)
6478 rtx src
= SET_SRC (x
);
6480 switch (GET_CODE (src
))
6486 /* There are four cases here:
6487 (1) The destination is (pc), in which case this is a branch,
6488 nothing here applies.
6489 (2) The destination is ar.lc, in which case this is a
6490 doloop_end_internal,
6491 (3) The destination is an fp register, in which case this is
6492 an fselect instruction.
6493 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6494 this is a check load.
6495 In all cases, nothing we do in this function applies. */
6499 if (COMPARISON_P (src
)
6500 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src
, 0))))
6501 /* Set pflags->is_fp to 1 so that we know we're dealing
6502 with a floating point comparison when processing the
6503 destination of the SET. */
6506 /* Discover if this is a parallel comparison. We only handle
6507 and.orcm and or.andcm at present, since we must retain a
6508 strict inverse on the predicate pair. */
6509 else if (GET_CODE (src
) == AND
)
6511 else if (GET_CODE (src
) == IOR
)
6518 /* Subroutine of rtx_needs_barrier; this function determines whether the
6519 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6520 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6524 set_src_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6526 int need_barrier
= 0;
6528 rtx src
= SET_SRC (x
);
6530 if (GET_CODE (src
) == CALL
)
6531 /* We don't need to worry about the result registers that
6532 get written by subroutine call. */
6533 return rtx_needs_barrier (src
, flags
, pred
);
6534 else if (SET_DEST (x
) == pc_rtx
)
6536 /* X is a conditional branch. */
6537 /* ??? This seems redundant, as the caller sets this bit for
6539 if (!ia64_spec_check_src_p (src
))
6540 flags
.is_branch
= 1;
6541 return rtx_needs_barrier (src
, flags
, pred
);
6544 if (ia64_spec_check_src_p (src
))
6545 /* Avoid checking one register twice (in condition
6546 and in 'then' section) for ldc pattern. */
6548 gcc_assert (REG_P (XEXP (src
, 2)));
6549 need_barrier
= rtx_needs_barrier (XEXP (src
, 2), flags
, pred
);
6551 /* We process MEM below. */
6552 src
= XEXP (src
, 1);
6555 need_barrier
|= rtx_needs_barrier (src
, flags
, pred
);
6558 if (GET_CODE (dst
) == ZERO_EXTRACT
)
6560 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 1), flags
, pred
);
6561 need_barrier
|= rtx_needs_barrier (XEXP (dst
, 2), flags
, pred
);
6563 return need_barrier
;
6566 /* Handle an access to rtx X of type FLAGS using predicate register
6567 PRED. Return 1 if this access creates a dependency with an earlier
6568 instruction in the same group. */
6571 rtx_needs_barrier (rtx x
, struct reg_flags flags
, int pred
)
6574 int is_complemented
= 0;
6575 int need_barrier
= 0;
6576 const char *format_ptr
;
6577 struct reg_flags new_flags
;
6585 switch (GET_CODE (x
))
6588 update_set_flags (x
, &new_flags
);
6589 need_barrier
= set_src_needs_barrier (x
, new_flags
, pred
);
6590 if (GET_CODE (SET_SRC (x
)) != CALL
)
6592 new_flags
.is_write
= 1;
6593 need_barrier
|= rtx_needs_barrier (SET_DEST (x
), new_flags
, pred
);
6598 new_flags
.is_write
= 0;
6599 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6601 /* Avoid multiple register writes, in case this is a pattern with
6602 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6603 if (! flags
.is_sibcall
&& ! rws_insn_test (REG_AR_CFM
))
6605 new_flags
.is_write
= 1;
6606 need_barrier
|= rws_access_regno (REG_RP
, new_flags
, pred
);
6607 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, new_flags
, pred
);
6608 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6613 /* X is a predicated instruction. */
6615 cond
= COND_EXEC_TEST (x
);
6617 need_barrier
= rtx_needs_barrier (cond
, flags
, 0);
6619 if (GET_CODE (cond
) == EQ
)
6620 is_complemented
= 1;
6621 cond
= XEXP (cond
, 0);
6622 gcc_assert (GET_CODE (cond
) == REG
6623 && REGNO_REG_CLASS (REGNO (cond
)) == PR_REGS
);
6624 pred
= REGNO (cond
);
6625 if (is_complemented
)
6628 need_barrier
|= rtx_needs_barrier (COND_EXEC_CODE (x
), flags
, pred
);
6629 return need_barrier
;
6633 /* Clobber & use are for earlier compiler-phases only. */
6638 /* We always emit stop bits for traditional asms. We emit stop bits
6639 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6640 if (GET_CODE (x
) != ASM_OPERANDS
6641 || (MEM_VOLATILE_P (x
) && TARGET_VOL_ASM_STOP
))
6643 /* Avoid writing the register multiple times if we have multiple
6644 asm outputs. This avoids a failure in rws_access_reg. */
6645 if (! rws_insn_test (REG_VOLATILE
))
6647 new_flags
.is_write
= 1;
6648 rws_access_regno (REG_VOLATILE
, new_flags
, pred
);
6653 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6654 We cannot just fall through here since then we would be confused
6655 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6656 traditional asms unlike their normal usage. */
6658 for (i
= ASM_OPERANDS_INPUT_LENGTH (x
) - 1; i
>= 0; --i
)
6659 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x
, i
), flags
, pred
))
6664 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6666 rtx pat
= XVECEXP (x
, 0, i
);
6667 switch (GET_CODE (pat
))
6670 update_set_flags (pat
, &new_flags
);
6671 need_barrier
|= set_src_needs_barrier (pat
, new_flags
, pred
);
6678 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6682 if (REG_P (XEXP (pat
, 0))
6683 && extract_asm_operands (x
) != NULL_RTX
6684 && REGNO (XEXP (pat
, 0)) != AR_UNAT_REGNUM
)
6686 new_flags
.is_write
= 1;
6687 need_barrier
|= rtx_needs_barrier (XEXP (pat
, 0),
6700 for (i
= XVECLEN (x
, 0) - 1; i
>= 0; --i
)
6702 rtx pat
= XVECEXP (x
, 0, i
);
6703 if (GET_CODE (pat
) == SET
)
6705 if (GET_CODE (SET_SRC (pat
)) != CALL
)
6707 new_flags
.is_write
= 1;
6708 need_barrier
|= rtx_needs_barrier (SET_DEST (pat
), new_flags
,
6712 else if (GET_CODE (pat
) == CLOBBER
|| GET_CODE (pat
) == RETURN
)
6713 need_barrier
|= rtx_needs_barrier (pat
, flags
, pred
);
6718 need_barrier
|= rtx_needs_barrier (SUBREG_REG (x
), flags
, pred
);
6721 if (REGNO (x
) == AR_UNAT_REGNUM
)
6723 for (i
= 0; i
< 64; ++i
)
6724 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ i
, flags
, pred
);
6727 need_barrier
= rws_access_reg (x
, flags
, pred
);
6731 /* Find the regs used in memory address computation. */
6732 new_flags
.is_write
= 0;
6733 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6736 case CONST_INT
: case CONST_DOUBLE
: case CONST_VECTOR
:
6737 case SYMBOL_REF
: case LABEL_REF
: case CONST
:
6740 /* Operators with side-effects. */
6741 case POST_INC
: case POST_DEC
:
6742 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6744 new_flags
.is_write
= 0;
6745 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6746 new_flags
.is_write
= 1;
6747 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6751 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
6753 new_flags
.is_write
= 0;
6754 need_barrier
= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6755 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6756 new_flags
.is_write
= 1;
6757 need_barrier
|= rws_access_reg (XEXP (x
, 0), new_flags
, pred
);
6760 /* Handle common unary and binary ops for efficiency. */
6761 case COMPARE
: case PLUS
: case MINUS
: case MULT
: case DIV
:
6762 case MOD
: case UDIV
: case UMOD
: case AND
: case IOR
:
6763 case XOR
: case ASHIFT
: case ROTATE
: case ASHIFTRT
: case LSHIFTRT
:
6764 case ROTATERT
: case SMIN
: case SMAX
: case UMIN
: case UMAX
:
6765 case NE
: case EQ
: case GE
: case GT
: case LE
:
6766 case LT
: case GEU
: case GTU
: case LEU
: case LTU
:
6767 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), new_flags
, pred
);
6768 need_barrier
|= rtx_needs_barrier (XEXP (x
, 1), new_flags
, pred
);
6771 case NEG
: case NOT
: case SIGN_EXTEND
: case ZERO_EXTEND
:
6772 case TRUNCATE
: case FLOAT_EXTEND
: case FLOAT_TRUNCATE
: case FLOAT
:
6773 case FIX
: case UNSIGNED_FLOAT
: case UNSIGNED_FIX
: case ABS
:
6774 case SQRT
: case FFS
: case POPCOUNT
:
6775 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6779 /* VEC_SELECT's second argument is a PARALLEL with integers that
6780 describe the elements selected. On ia64, those integers are
6781 always constants. Avoid walking the PARALLEL so that we don't
6782 get confused with "normal" parallels and then die. */
6783 need_barrier
= rtx_needs_barrier (XEXP (x
, 0), flags
, pred
);
6787 switch (XINT (x
, 1))
6789 case UNSPEC_LTOFF_DTPMOD
:
6790 case UNSPEC_LTOFF_DTPREL
:
6792 case UNSPEC_LTOFF_TPREL
:
6794 case UNSPEC_PRED_REL_MUTEX
:
6795 case UNSPEC_PIC_CALL
:
6797 case UNSPEC_FETCHADD_ACQ
:
6798 case UNSPEC_FETCHADD_REL
:
6799 case UNSPEC_BSP_VALUE
:
6800 case UNSPEC_FLUSHRS
:
6801 case UNSPEC_BUNDLE_SELECTOR
:
6804 case UNSPEC_GR_SPILL
:
6805 case UNSPEC_GR_RESTORE
:
6807 HOST_WIDE_INT offset
= INTVAL (XVECEXP (x
, 0, 1));
6808 HOST_WIDE_INT bit
= (offset
>> 3) & 63;
6810 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6811 new_flags
.is_write
= (XINT (x
, 1) == UNSPEC_GR_SPILL
);
6812 need_barrier
|= rws_access_regno (AR_UNAT_BIT_0
+ bit
,
6817 case UNSPEC_FR_SPILL
:
6818 case UNSPEC_FR_RESTORE
:
6819 case UNSPEC_GETF_EXP
:
6820 case UNSPEC_SETF_EXP
:
6822 case UNSPEC_FR_SQRT_RECIP_APPROX
:
6823 case UNSPEC_FR_SQRT_RECIP_APPROX_RES
:
6828 case UNSPEC_CHKACLR
:
6830 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6833 case UNSPEC_FR_RECIP_APPROX
:
6835 case UNSPEC_COPYSIGN
:
6836 case UNSPEC_FR_RECIP_APPROX_RES
:
6837 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6838 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6841 case UNSPEC_CMPXCHG_ACQ
:
6842 case UNSPEC_CMPXCHG_REL
:
6843 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 1), flags
, pred
);
6844 need_barrier
|= rtx_needs_barrier (XVECEXP (x
, 0, 2), flags
, pred
);
6852 case UNSPEC_VOLATILE
:
6853 switch (XINT (x
, 1))
6856 /* Alloc must always be the first instruction of a group.
6857 We force this by always returning true. */
6858 /* ??? We might get better scheduling if we explicitly check for
6859 input/local/output register dependencies, and modify the
6860 scheduler so that alloc is always reordered to the start of
6861 the current group. We could then eliminate all of the
6862 first_instruction code. */
6863 rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6865 new_flags
.is_write
= 1;
6866 rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6869 case UNSPECV_SET_BSP
:
6870 case UNSPECV_PROBE_STACK_RANGE
:
6874 case UNSPECV_BLOCKAGE
:
6875 case UNSPECV_INSN_GROUP_BARRIER
:
6877 case UNSPECV_PSAC_ALL
:
6878 case UNSPECV_PSAC_NORMAL
:
6881 case UNSPECV_PROBE_STACK_ADDRESS
:
6882 need_barrier
= rtx_needs_barrier (XVECEXP (x
, 0, 0), flags
, pred
);
6891 new_flags
.is_write
= 0;
6892 need_barrier
= rws_access_regno (REG_RP
, flags
, pred
);
6893 need_barrier
|= rws_access_regno (AR_PFS_REGNUM
, flags
, pred
);
6895 new_flags
.is_write
= 1;
6896 need_barrier
|= rws_access_regno (AR_EC_REGNUM
, new_flags
, pred
);
6897 need_barrier
|= rws_access_regno (REG_AR_CFM
, new_flags
, pred
);
6901 format_ptr
= GET_RTX_FORMAT (GET_CODE (x
));
6902 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
6903 switch (format_ptr
[i
])
6905 case '0': /* unused field */
6906 case 'i': /* integer */
6907 case 'n': /* note */
6908 case 'L': /* location_t */
6909 case 'w': /* wide integer */
6910 case 's': /* pointer to string */
6911 case 'S': /* optional pointer to string */
6915 if (rtx_needs_barrier (XEXP (x
, i
), flags
, pred
))
6920 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; --j
)
6921 if (rtx_needs_barrier (XVECEXP (x
, i
, j
), flags
, pred
))
6930 return need_barrier
;
6933 /* Clear out the state for group_barrier_needed at the start of a
6934 sequence of insns. */
6937 init_insn_group_barriers (void)
6939 memset (rws_sum
, 0, sizeof (rws_sum
));
6940 first_instruction
= 1;
6943 /* Given the current state, determine whether a group barrier (a stop bit) is
6944 necessary before INSN. Return nonzero if so. This modifies the state to
6945 include the effects of INSN as a side-effect. */
6948 group_barrier_needed (rtx_insn
*insn
)
6951 int need_barrier
= 0;
6952 struct reg_flags flags
;
6954 memset (&flags
, 0, sizeof (flags
));
6955 switch (GET_CODE (insn
))
6962 /* A barrier doesn't imply an instruction group boundary. */
6966 memset (rws_insn
, 0, sizeof (rws_insn
));
6970 flags
.is_branch
= 1;
6971 flags
.is_sibcall
= SIBLING_CALL_P (insn
);
6972 memset (rws_insn
, 0, sizeof (rws_insn
));
6974 /* Don't bundle a call following another call. */
6975 if ((pat
= prev_active_insn (insn
)) && CALL_P (pat
))
6981 need_barrier
= rtx_needs_barrier (PATTERN (insn
), flags
, 0);
6985 if (!ia64_spec_check_p (insn
))
6986 flags
.is_branch
= 1;
6988 /* Don't bundle a jump following a call. */
6989 if ((pat
= prev_active_insn (insn
)) && CALL_P (pat
))
6997 if (GET_CODE (PATTERN (insn
)) == USE
6998 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6999 /* Don't care about USE and CLOBBER "insns"---those are used to
7000 indicate to the optimizer that it shouldn't get rid of
7001 certain operations. */
7004 pat
= PATTERN (insn
);
7006 /* Ug. Hack hacks hacked elsewhere. */
7007 switch (recog_memoized (insn
))
7009 /* We play dependency tricks with the epilogue in order
7010 to get proper schedules. Undo this for dv analysis. */
7011 case CODE_FOR_epilogue_deallocate_stack
:
7012 case CODE_FOR_prologue_allocate_stack
:
7013 pat
= XVECEXP (pat
, 0, 0);
7016 /* The pattern we use for br.cloop confuses the code above.
7017 The second element of the vector is representative. */
7018 case CODE_FOR_doloop_end_internal
:
7019 pat
= XVECEXP (pat
, 0, 1);
7022 /* Doesn't generate code. */
7023 case CODE_FOR_pred_rel_mutex
:
7024 case CODE_FOR_prologue_use
:
7031 memset (rws_insn
, 0, sizeof (rws_insn
));
7032 need_barrier
= rtx_needs_barrier (pat
, flags
, 0);
7034 /* Check to see if the previous instruction was a volatile
7037 need_barrier
= rws_access_regno (REG_VOLATILE
, flags
, 0);
7045 if (first_instruction
&& important_for_bundling_p (insn
))
7048 first_instruction
= 0;
7051 return need_barrier
;
7054 /* Like group_barrier_needed, but do not clobber the current state. */
7057 safe_group_barrier_needed (rtx_insn
*insn
)
7059 int saved_first_instruction
;
7062 saved_first_instruction
= first_instruction
;
7063 in_safe_group_barrier
= 1;
7065 t
= group_barrier_needed (insn
);
7067 first_instruction
= saved_first_instruction
;
7068 in_safe_group_barrier
= 0;
7073 /* Scan the current function and insert stop bits as necessary to
7074 eliminate dependencies. This function assumes that a final
7075 instruction scheduling pass has been run which has already
7076 inserted most of the necessary stop bits. This function only
7077 inserts new ones at basic block boundaries, since these are
7078 invisible to the scheduler. */
7081 emit_insn_group_barriers (FILE *dump
)
7084 rtx_insn
*last_label
= 0;
7085 int insns_since_last_label
= 0;
7087 init_insn_group_barriers ();
7089 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7093 if (insns_since_last_label
)
7095 insns_since_last_label
= 0;
7097 else if (NOTE_P (insn
)
7098 && NOTE_KIND (insn
) == NOTE_INSN_BASIC_BLOCK
)
7100 if (insns_since_last_label
)
7102 insns_since_last_label
= 0;
7104 else if (NONJUMP_INSN_P (insn
)
7105 && GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
7106 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
7108 init_insn_group_barriers ();
7111 else if (NONDEBUG_INSN_P (insn
))
7113 insns_since_last_label
= 1;
7115 if (group_barrier_needed (insn
))
7120 fprintf (dump
, "Emitting stop before label %d\n",
7121 INSN_UID (last_label
));
7122 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label
);
7125 init_insn_group_barriers ();
7133 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7134 This function has to emit all necessary group barriers. */
7137 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
7141 init_insn_group_barriers ();
7143 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
7145 if (BARRIER_P (insn
))
7147 rtx_insn
*last
= prev_active_insn (insn
);
7151 if (JUMP_TABLE_DATA_P (last
))
7152 last
= prev_active_insn (last
);
7153 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
7154 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
7156 init_insn_group_barriers ();
7158 else if (NONDEBUG_INSN_P (insn
))
7160 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
7161 init_insn_group_barriers ();
7162 else if (group_barrier_needed (insn
))
7164 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn
);
7165 init_insn_group_barriers ();
7166 group_barrier_needed (insn
);
7174 /* Instruction scheduling support. */
7176 #define NR_BUNDLES 10
7178 /* A list of names of all available bundles. */
7180 static const char *bundle_name
[NR_BUNDLES
] =
7186 #if NR_BUNDLES == 10
7196 /* Nonzero if we should insert stop bits into the schedule. */
7198 int ia64_final_schedule
= 0;
7200 /* Codes of the corresponding queried units: */
7202 static int _0mii_
, _0mmi_
, _0mfi_
, _0mmf_
;
7203 static int _0bbb_
, _0mbb_
, _0mib_
, _0mmb_
, _0mfb_
, _0mlx_
;
7205 static int _1mii_
, _1mmi_
, _1mfi_
, _1mmf_
;
7206 static int _1bbb_
, _1mbb_
, _1mib_
, _1mmb_
, _1mfb_
, _1mlx_
;
7208 static int pos_1
, pos_2
, pos_3
, pos_4
, pos_5
, pos_6
;
7210 /* The following variable value is an insn group barrier. */
7212 static rtx_insn
*dfa_stop_insn
;
7214 /* The following variable value is the last issued insn. */
7216 static rtx_insn
*last_scheduled_insn
;
7218 /* The following variable value is pointer to a DFA state used as
7219 temporary variable. */
7221 static state_t temp_dfa_state
= NULL
;
7223 /* The following variable value is DFA state after issuing the last
7226 static state_t prev_cycle_state
= NULL
;
7228 /* The following array element values are TRUE if the corresponding
7229 insn requires to add stop bits before it. */
7231 static char *stops_p
= NULL
;
7233 /* The following variable is used to set up the mentioned above array. */
7235 static int stop_before_p
= 0;
7237 /* The following variable value is length of the arrays `clocks' and
7240 static int clocks_length
;
7242 /* The following variable value is number of data speculations in progress. */
7243 static int pending_data_specs
= 0;
7245 /* Number of memory references on current and three future processor cycles. */
7246 static char mem_ops_in_group
[4];
7248 /* Number of current processor cycle (from scheduler's point of view). */
7249 static int current_cycle
;
7251 static rtx
ia64_single_set (rtx_insn
*);
7252 static void ia64_emit_insn_before (rtx
, rtx_insn
*);
7254 /* Map a bundle number to its pseudo-op. */
7257 get_bundle_name (int b
)
7259 return bundle_name
[b
];
7263 /* Return the maximum number of instructions a cpu can issue. */
7266 ia64_issue_rate (void)
7271 /* Helper function - like single_set, but look inside COND_EXEC. */
7274 ia64_single_set (rtx_insn
*insn
)
7276 rtx x
= PATTERN (insn
), ret
;
7277 if (GET_CODE (x
) == COND_EXEC
)
7278 x
= COND_EXEC_CODE (x
);
7279 if (GET_CODE (x
) == SET
)
7282 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7283 Although they are not classical single set, the second set is there just
7284 to protect it from moving past FP-relative stack accesses. */
7285 switch (recog_memoized (insn
))
7287 case CODE_FOR_prologue_allocate_stack
:
7288 case CODE_FOR_prologue_allocate_stack_pr
:
7289 case CODE_FOR_epilogue_deallocate_stack
:
7290 case CODE_FOR_epilogue_deallocate_stack_pr
:
7291 ret
= XVECEXP (x
, 0, 0);
7295 ret
= single_set_2 (insn
, x
);
7302 /* Adjust the cost of a scheduling dependency.
7303 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7304 COST is the current cost, DW is dependency weakness. */
7306 ia64_adjust_cost (rtx_insn
*insn
, int dep_type1
, rtx_insn
*dep_insn
,
7309 enum reg_note dep_type
= (enum reg_note
) dep_type1
;
7310 enum attr_itanium_class dep_class
;
7311 enum attr_itanium_class insn_class
;
7313 insn_class
= ia64_safe_itanium_class (insn
);
7314 dep_class
= ia64_safe_itanium_class (dep_insn
);
7316 /* Treat true memory dependencies separately. Ignore apparent true
7317 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7318 if (dep_type
== REG_DEP_TRUE
7319 && (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
)
7320 && (insn_class
== ITANIUM_CLASS_BR
|| insn_class
== ITANIUM_CLASS_SCALL
))
7323 if (dw
== MIN_DEP_WEAK
)
7324 /* Store and load are likely to alias, use higher cost to avoid stall. */
7325 return param_sched_mem_true_dep_cost
;
7326 else if (dw
> MIN_DEP_WEAK
)
7328 /* Store and load are less likely to alias. */
7329 if (mflag_sched_fp_mem_deps_zero_cost
&& dep_class
== ITANIUM_CLASS_STF
)
7330 /* Assume there will be no cache conflict for floating-point data.
7331 For integer data, L1 conflict penalty is huge (17 cycles), so we
7332 never assume it will not cause a conflict. */
7338 if (dep_type
!= REG_DEP_OUTPUT
)
7341 if (dep_class
== ITANIUM_CLASS_ST
|| dep_class
== ITANIUM_CLASS_STF
7342 || insn_class
== ITANIUM_CLASS_ST
|| insn_class
== ITANIUM_CLASS_STF
)
7348 /* Like emit_insn_before, but skip cycle_display notes.
7349 ??? When cycle display notes are implemented, update this. */
7352 ia64_emit_insn_before (rtx insn
, rtx_insn
*before
)
7354 emit_insn_before (insn
, before
);
7357 /* The following function marks insns who produce addresses for load
7358 and store insns. Such insns will be placed into M slots because it
7359 decrease latency time for Itanium1 (see function
7360 `ia64_produce_address_p' and the DFA descriptions). */
7363 ia64_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
7365 rtx_insn
*insn
, *next
, *next_tail
;
7367 /* Before reload, which_alternative is not set, which means that
7368 ia64_safe_itanium_class will produce wrong results for (at least)
7369 move instructions. */
7370 if (!reload_completed
)
7373 next_tail
= NEXT_INSN (tail
);
7374 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
7377 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
7379 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_IALU
)
7381 sd_iterator_def sd_it
;
7383 bool has_mem_op_consumer_p
= false;
7385 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
7387 enum attr_itanium_class c
;
7389 if (DEP_TYPE (dep
) != REG_DEP_TRUE
)
7392 next
= DEP_CON (dep
);
7393 c
= ia64_safe_itanium_class (next
);
7394 if ((c
== ITANIUM_CLASS_ST
7395 || c
== ITANIUM_CLASS_STF
)
7396 && ia64_st_address_bypass_p (insn
, next
))
7398 has_mem_op_consumer_p
= true;
7401 else if ((c
== ITANIUM_CLASS_LD
7402 || c
== ITANIUM_CLASS_FLD
7403 || c
== ITANIUM_CLASS_FLDP
)
7404 && ia64_ld_address_bypass_p (insn
, next
))
7406 has_mem_op_consumer_p
= true;
7411 insn
->call
= has_mem_op_consumer_p
;
7415 /* We're beginning a new block. Initialize data structures as necessary. */
7418 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
7419 int sched_verbose ATTRIBUTE_UNUSED
,
7420 int max_ready ATTRIBUTE_UNUSED
)
7422 if (flag_checking
&& !sel_sched_p () && reload_completed
)
7424 for (rtx_insn
*insn
= NEXT_INSN (current_sched_info
->prev_head
);
7425 insn
!= current_sched_info
->next_tail
;
7426 insn
= NEXT_INSN (insn
))
7427 gcc_assert (!SCHED_GROUP_P (insn
));
7429 last_scheduled_insn
= NULL
;
7430 init_insn_group_barriers ();
7433 memset (mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7436 /* We're beginning a scheduling pass. Check assertion. */
7439 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
7440 int sched_verbose ATTRIBUTE_UNUSED
,
7441 int max_ready ATTRIBUTE_UNUSED
)
7443 gcc_assert (pending_data_specs
== 0);
7446 /* Scheduling pass is now finished. Free/reset static variable. */
7448 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED
,
7449 int sched_verbose ATTRIBUTE_UNUSED
)
7451 gcc_assert (pending_data_specs
== 0);
7454 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7455 speculation check), FALSE otherwise. */
7457 is_load_p (rtx_insn
*insn
)
7459 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7462 ((insn_class
== ITANIUM_CLASS_LD
|| insn_class
== ITANIUM_CLASS_FLD
)
7463 && get_attr_check_load (insn
) == CHECK_LOAD_NO
);
7466 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7467 (taking account for 3-cycle cache reference postponing for stores: Intel
7468 Itanium 2 Reference Manual for Software Development and Optimization,
7471 record_memory_reference (rtx_insn
*insn
)
7473 enum attr_itanium_class insn_class
= ia64_safe_itanium_class (insn
);
7475 switch (insn_class
) {
7476 case ITANIUM_CLASS_FLD
:
7477 case ITANIUM_CLASS_LD
:
7478 mem_ops_in_group
[current_cycle
% 4]++;
7480 case ITANIUM_CLASS_STF
:
7481 case ITANIUM_CLASS_ST
:
7482 mem_ops_in_group
[(current_cycle
+ 3) % 4]++;
7488 /* We are about to being issuing insns for this clock cycle.
7489 Override the default sort algorithm to better slot instructions. */
7492 ia64_dfa_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
7493 int *pn_ready
, int clock_var
,
7497 int n_ready
= *pn_ready
;
7498 rtx_insn
**e_ready
= ready
+ n_ready
;
7502 fprintf (dump
, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type
);
7504 if (reorder_type
== 0)
7506 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7508 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7509 if (insnp
< e_ready
)
7511 rtx_insn
*insn
= *insnp
;
7512 enum attr_type t
= ia64_safe_type (insn
);
7513 if (t
== TYPE_UNKNOWN
)
7515 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
7516 || asm_noperands (PATTERN (insn
)) >= 0)
7518 rtx_insn
*lowest
= ready
[n_asms
];
7519 ready
[n_asms
] = insn
;
7525 rtx_insn
*highest
= ready
[n_ready
- 1];
7526 ready
[n_ready
- 1] = insn
;
7533 if (n_asms
< n_ready
)
7535 /* Some normal insns to process. Skip the asms. */
7539 else if (n_ready
> 0)
7543 if (ia64_final_schedule
)
7546 int nr_need_stop
= 0;
7548 for (insnp
= ready
; insnp
< e_ready
; insnp
++)
7549 if (safe_group_barrier_needed (*insnp
))
7552 if (reorder_type
== 1 && n_ready
== nr_need_stop
)
7554 if (reorder_type
== 0)
7557 /* Move down everything that needs a stop bit, preserving
7559 while (insnp
-- > ready
+ deleted
)
7560 while (insnp
>= ready
+ deleted
)
7562 rtx_insn
*insn
= *insnp
;
7563 if (! safe_group_barrier_needed (insn
))
7565 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7573 current_cycle
= clock_var
;
7574 if (reload_completed
&& mem_ops_in_group
[clock_var
% 4] >= ia64_max_memory_insns
)
7579 /* Move down loads/stores, preserving relative order. */
7580 while (insnp
-- > ready
+ moved
)
7581 while (insnp
>= ready
+ moved
)
7583 rtx_insn
*insn
= *insnp
;
7584 if (! is_load_p (insn
))
7586 memmove (ready
+ 1, ready
, (insnp
- ready
) * sizeof (rtx
));
7597 /* We are about to being issuing insns for this clock cycle. Override
7598 the default sort algorithm to better slot instructions. */
7601 ia64_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
7602 int *pn_ready
, int clock_var
)
7604 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
,
7605 pn_ready
, clock_var
, 0);
7608 /* Like ia64_sched_reorder, but called after issuing each insn.
7609 Override the default sort algorithm to better slot instructions. */
7612 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED
,
7613 int sched_verbose ATTRIBUTE_UNUSED
, rtx_insn
**ready
,
7614 int *pn_ready
, int clock_var
)
7616 return ia64_dfa_sched_reorder (dump
, sched_verbose
, ready
, pn_ready
,
7620 /* We are about to issue INSN. Return the number of insns left on the
7621 ready queue that can be issued this cycle. */
7624 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED
,
7625 int sched_verbose ATTRIBUTE_UNUSED
,
7627 int can_issue_more ATTRIBUTE_UNUSED
)
7629 if (sched_deps_info
->generate_spec_deps
&& !sel_sched_p ())
7630 /* Modulo scheduling does not extend h_i_d when emitting
7631 new instructions. Don't use h_i_d, if we don't have to. */
7633 if (DONE_SPEC (insn
) & BEGIN_DATA
)
7634 pending_data_specs
++;
7635 if (CHECK_SPEC (insn
) & BEGIN_DATA
)
7636 pending_data_specs
--;
7639 if (DEBUG_INSN_P (insn
))
7642 last_scheduled_insn
= insn
;
7643 memcpy (prev_cycle_state
, curr_state
, dfa_state_size
);
7644 if (reload_completed
)
7646 int needed
= group_barrier_needed (insn
);
7648 gcc_assert (!needed
);
7650 init_insn_group_barriers ();
7651 stops_p
[INSN_UID (insn
)] = stop_before_p
;
7654 record_memory_reference (insn
);
7659 /* We are choosing insn from the ready queue. Return zero if INSN
7663 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
7665 gcc_assert (insn
&& INSN_P (insn
));
7667 /* Size of ALAT is 32. As far as we perform conservative
7668 data speculation, we keep ALAT half-empty. */
7669 if (pending_data_specs
>= 16 && (TODO_SPEC (insn
) & BEGIN_DATA
))
7670 return ready_index
== 0 ? -1 : 1;
7672 if (ready_index
== 0)
7675 if ((!reload_completed
7676 || !safe_group_barrier_needed (insn
))
7677 && (!mflag_sched_mem_insns_hard_limit
7678 || !is_load_p (insn
)
7679 || mem_ops_in_group
[current_cycle
% 4] < ia64_max_memory_insns
))
7685 /* The following variable value is pseudo-insn used by the DFA insn
7686 scheduler to change the DFA state when the simulated clock is
7689 static rtx_insn
*dfa_pre_cycle_insn
;
7691 /* Returns 1 when a meaningful insn was scheduled between the last group
7692 barrier and LAST. */
7694 scheduled_good_insn (rtx_insn
*last
)
7696 if (last
&& recog_memoized (last
) >= 0)
7700 last
!= NULL
&& !NOTE_INSN_BASIC_BLOCK_P (last
)
7701 && !stops_p
[INSN_UID (last
)];
7702 last
= PREV_INSN (last
))
7703 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7704 the ebb we're scheduling. */
7705 if (INSN_P (last
) && recog_memoized (last
) >= 0)
7711 /* We are about to being issuing INSN. Return nonzero if we cannot
7712 issue it on given cycle CLOCK and return zero if we should not sort
7713 the ready queue on the next clock start. */
7716 ia64_dfa_new_cycle (FILE *dump
, int verbose
, rtx_insn
*insn
, int last_clock
,
7717 int clock
, int *sort_p
)
7719 gcc_assert (insn
&& INSN_P (insn
));
7721 if (DEBUG_INSN_P (insn
))
7724 /* When a group barrier is needed for insn, last_scheduled_insn
7726 gcc_assert (!(reload_completed
&& safe_group_barrier_needed (insn
))
7727 || last_scheduled_insn
);
7729 if ((reload_completed
7730 && (safe_group_barrier_needed (insn
)
7731 || (mflag_sched_stop_bits_after_every_cycle
7732 && last_clock
!= clock
7733 && last_scheduled_insn
7734 && scheduled_good_insn (last_scheduled_insn
))))
7735 || (last_scheduled_insn
7736 && (CALL_P (last_scheduled_insn
)
7737 || unknown_for_bundling_p (last_scheduled_insn
))))
7739 init_insn_group_barriers ();
7741 if (verbose
&& dump
)
7742 fprintf (dump
, "// Stop should be before %d%s\n", INSN_UID (insn
),
7743 last_clock
== clock
? " + cycle advance" : "");
7746 current_cycle
= clock
;
7747 mem_ops_in_group
[current_cycle
% 4] = 0;
7749 if (last_clock
== clock
)
7751 state_transition (curr_state
, dfa_stop_insn
);
7752 if (TARGET_EARLY_STOP_BITS
)
7753 *sort_p
= (last_scheduled_insn
== NULL_RTX
7754 || ! CALL_P (last_scheduled_insn
));
7760 if (last_scheduled_insn
)
7762 if (unknown_for_bundling_p (last_scheduled_insn
))
7763 state_reset (curr_state
);
7766 memcpy (curr_state
, prev_cycle_state
, dfa_state_size
);
7767 state_transition (curr_state
, dfa_stop_insn
);
7768 state_transition (curr_state
, dfa_pre_cycle_insn
);
7769 state_transition (curr_state
, NULL
);
7776 /* Implement targetm.sched.h_i_d_extended hook.
7777 Extend internal data structures. */
7779 ia64_h_i_d_extended (void)
7781 if (stops_p
!= NULL
)
7783 int new_clocks_length
= get_max_uid () * 3 / 2;
7784 stops_p
= (char *) xrecalloc (stops_p
, new_clocks_length
, clocks_length
, 1);
7785 clocks_length
= new_clocks_length
;
7790 /* This structure describes the data used by the backend to guide scheduling.
7791 When the current scheduling point is switched, this data should be saved
7792 and restored later, if the scheduler returns to this point. */
7793 struct _ia64_sched_context
7795 state_t prev_cycle_state
;
7796 rtx_insn
*last_scheduled_insn
;
7797 struct reg_write_state rws_sum
[NUM_REGS
];
7798 struct reg_write_state rws_insn
[NUM_REGS
];
7799 int first_instruction
;
7800 int pending_data_specs
;
7802 char mem_ops_in_group
[4];
7804 typedef struct _ia64_sched_context
*ia64_sched_context_t
;
7806 /* Allocates a scheduling context. */
7808 ia64_alloc_sched_context (void)
7810 return xmalloc (sizeof (struct _ia64_sched_context
));
7813 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7814 the global context otherwise. */
7816 ia64_init_sched_context (void *_sc
, bool clean_p
)
7818 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7820 sc
->prev_cycle_state
= xmalloc (dfa_state_size
);
7823 state_reset (sc
->prev_cycle_state
);
7824 sc
->last_scheduled_insn
= NULL
;
7825 memset (sc
->rws_sum
, 0, sizeof (rws_sum
));
7826 memset (sc
->rws_insn
, 0, sizeof (rws_insn
));
7827 sc
->first_instruction
= 1;
7828 sc
->pending_data_specs
= 0;
7829 sc
->current_cycle
= 0;
7830 memset (sc
->mem_ops_in_group
, 0, sizeof (mem_ops_in_group
));
7834 memcpy (sc
->prev_cycle_state
, prev_cycle_state
, dfa_state_size
);
7835 sc
->last_scheduled_insn
= last_scheduled_insn
;
7836 memcpy (sc
->rws_sum
, rws_sum
, sizeof (rws_sum
));
7837 memcpy (sc
->rws_insn
, rws_insn
, sizeof (rws_insn
));
7838 sc
->first_instruction
= first_instruction
;
7839 sc
->pending_data_specs
= pending_data_specs
;
7840 sc
->current_cycle
= current_cycle
;
7841 memcpy (sc
->mem_ops_in_group
, mem_ops_in_group
, sizeof (mem_ops_in_group
));
7845 /* Sets the global scheduling context to the one pointed to by _SC. */
7847 ia64_set_sched_context (void *_sc
)
7849 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7851 gcc_assert (sc
!= NULL
);
7853 memcpy (prev_cycle_state
, sc
->prev_cycle_state
, dfa_state_size
);
7854 last_scheduled_insn
= sc
->last_scheduled_insn
;
7855 memcpy (rws_sum
, sc
->rws_sum
, sizeof (rws_sum
));
7856 memcpy (rws_insn
, sc
->rws_insn
, sizeof (rws_insn
));
7857 first_instruction
= sc
->first_instruction
;
7858 pending_data_specs
= sc
->pending_data_specs
;
7859 current_cycle
= sc
->current_cycle
;
7860 memcpy (mem_ops_in_group
, sc
->mem_ops_in_group
, sizeof (mem_ops_in_group
));
7863 /* Clears the data in the _SC scheduling context. */
7865 ia64_clear_sched_context (void *_sc
)
7867 ia64_sched_context_t sc
= (ia64_sched_context_t
) _sc
;
7869 free (sc
->prev_cycle_state
);
7870 sc
->prev_cycle_state
= NULL
;
7873 /* Frees the _SC scheduling context. */
7875 ia64_free_sched_context (void *_sc
)
7877 gcc_assert (_sc
!= NULL
);
7882 typedef rtx (* gen_func_t
) (rtx
, rtx
);
7884 /* Return a function that will generate a load of mode MODE_NO
7885 with speculation types TS. */
7887 get_spec_load_gen_function (ds_t ts
, int mode_no
)
7889 static gen_func_t gen_ld_
[] = {
7899 gen_zero_extendqidi2
,
7900 gen_zero_extendhidi2
,
7901 gen_zero_extendsidi2
,
7904 static gen_func_t gen_ld_a
[] = {
7914 gen_zero_extendqidi2_advanced
,
7915 gen_zero_extendhidi2_advanced
,
7916 gen_zero_extendsidi2_advanced
,
7918 static gen_func_t gen_ld_s
[] = {
7919 gen_movbi_speculative
,
7920 gen_movqi_speculative
,
7921 gen_movhi_speculative
,
7922 gen_movsi_speculative
,
7923 gen_movdi_speculative
,
7924 gen_movsf_speculative
,
7925 gen_movdf_speculative
,
7926 gen_movxf_speculative
,
7927 gen_movti_speculative
,
7928 gen_zero_extendqidi2_speculative
,
7929 gen_zero_extendhidi2_speculative
,
7930 gen_zero_extendsidi2_speculative
,
7932 static gen_func_t gen_ld_sa
[] = {
7933 gen_movbi_speculative_advanced
,
7934 gen_movqi_speculative_advanced
,
7935 gen_movhi_speculative_advanced
,
7936 gen_movsi_speculative_advanced
,
7937 gen_movdi_speculative_advanced
,
7938 gen_movsf_speculative_advanced
,
7939 gen_movdf_speculative_advanced
,
7940 gen_movxf_speculative_advanced
,
7941 gen_movti_speculative_advanced
,
7942 gen_zero_extendqidi2_speculative_advanced
,
7943 gen_zero_extendhidi2_speculative_advanced
,
7944 gen_zero_extendsidi2_speculative_advanced
,
7946 static gen_func_t gen_ld_s_a
[] = {
7947 gen_movbi_speculative_a
,
7948 gen_movqi_speculative_a
,
7949 gen_movhi_speculative_a
,
7950 gen_movsi_speculative_a
,
7951 gen_movdi_speculative_a
,
7952 gen_movsf_speculative_a
,
7953 gen_movdf_speculative_a
,
7954 gen_movxf_speculative_a
,
7955 gen_movti_speculative_a
,
7956 gen_zero_extendqidi2_speculative_a
,
7957 gen_zero_extendhidi2_speculative_a
,
7958 gen_zero_extendsidi2_speculative_a
,
7963 if (ts
& BEGIN_DATA
)
7965 if (ts
& BEGIN_CONTROL
)
7970 else if (ts
& BEGIN_CONTROL
)
7972 if ((spec_info
->flags
& SEL_SCHED_SPEC_DONT_CHECK_CONTROL
)
7973 || ia64_needs_block_p (ts
))
7976 gen_ld
= gen_ld_s_a
;
7983 return gen_ld
[mode_no
];
7986 /* Constants that help mapping 'machine_mode' to int. */
7989 SPEC_MODE_INVALID
= -1,
7990 SPEC_MODE_FIRST
= 0,
7991 SPEC_MODE_FOR_EXTEND_FIRST
= 1,
7992 SPEC_MODE_FOR_EXTEND_LAST
= 3,
7998 /* Offset to reach ZERO_EXTEND patterns. */
7999 SPEC_GEN_EXTEND_OFFSET
= SPEC_MODE_LAST
- SPEC_MODE_FOR_EXTEND_FIRST
+ 1
8002 /* Return index of the MODE. */
8004 ia64_mode_to_int (machine_mode mode
)
8008 case E_BImode
: return 0; /* SPEC_MODE_FIRST */
8009 case E_QImode
: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
8010 case E_HImode
: return 2;
8011 case E_SImode
: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
8012 case E_DImode
: return 4;
8013 case E_SFmode
: return 5;
8014 case E_DFmode
: return 6;
8015 case E_XFmode
: return 7;
8017 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
8018 mentioned in itanium[12].md. Predicate fp_register_operand also
8019 needs to be defined. Bottom line: better disable for now. */
8020 return SPEC_MODE_INVALID
;
8021 default: return SPEC_MODE_INVALID
;
8025 /* Provide information about speculation capabilities. */
8027 ia64_set_sched_flags (spec_info_t spec_info
)
8029 unsigned int *flags
= &(current_sched_info
->flags
);
8031 if (*flags
& SCHED_RGN
8032 || *flags
& SCHED_EBB
8033 || *flags
& SEL_SCHED
)
8037 if ((mflag_sched_br_data_spec
&& !reload_completed
&& optimize
> 0)
8038 || (mflag_sched_ar_data_spec
&& reload_completed
))
8043 && ((mflag_sched_br_in_data_spec
&& !reload_completed
)
8044 || (mflag_sched_ar_in_data_spec
&& reload_completed
)))
8048 if (mflag_sched_control_spec
8050 || reload_completed
))
8052 mask
|= BEGIN_CONTROL
;
8054 if (!sel_sched_p () && mflag_sched_in_control_spec
)
8055 mask
|= BE_IN_CONTROL
;
8058 spec_info
->mask
= mask
;
8062 *flags
|= USE_DEPS_LIST
| DO_SPECULATION
;
8064 if (mask
& BE_IN_SPEC
)
8067 spec_info
->flags
= 0;
8069 if ((mask
& CONTROL_SPEC
)
8070 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec
)
8071 spec_info
->flags
|= SEL_SCHED_SPEC_DONT_CHECK_CONTROL
;
8073 if (sched_verbose
>= 1)
8074 spec_info
->dump
= sched_dump
;
8076 spec_info
->dump
= 0;
8078 if (mflag_sched_count_spec_in_critical_path
)
8079 spec_info
->flags
|= COUNT_SPEC_IN_CRITICAL_PATH
;
8083 spec_info
->mask
= 0;
8086 /* If INSN is an appropriate load return its mode.
8087 Return -1 otherwise. */
8089 get_mode_no_for_insn (rtx_insn
*insn
)
8091 rtx reg
, mem
, mode_rtx
;
8095 extract_insn_cached (insn
);
8097 /* We use WHICH_ALTERNATIVE only after reload. This will
8098 guarantee that reload won't touch a speculative insn. */
8100 if (recog_data
.n_operands
!= 2)
8103 reg
= recog_data
.operand
[0];
8104 mem
= recog_data
.operand
[1];
8106 /* We should use MEM's mode since REG's mode in presence of
8107 ZERO_EXTEND will always be DImode. */
8108 if (get_attr_speculable1 (insn
) == SPECULABLE1_YES
)
8109 /* Process non-speculative ld. */
8111 if (!reload_completed
)
8113 /* Do not speculate into regs like ar.lc. */
8114 if (!REG_P (reg
) || AR_REGNO_P (REGNO (reg
)))
8121 rtx mem_reg
= XEXP (mem
, 0);
8123 if (!REG_P (mem_reg
))
8129 else if (get_attr_speculable2 (insn
) == SPECULABLE2_YES
)
8131 gcc_assert (REG_P (reg
) && MEM_P (mem
));
8137 else if (get_attr_data_speculative (insn
) == DATA_SPECULATIVE_YES
8138 || get_attr_control_speculative (insn
) == CONTROL_SPECULATIVE_YES
8139 || get_attr_check_load (insn
) == CHECK_LOAD_YES
)
8140 /* Process speculative ld or ld.c. */
8142 gcc_assert (REG_P (reg
) && MEM_P (mem
));
8147 enum attr_itanium_class attr_class
= get_attr_itanium_class (insn
);
8149 if (attr_class
== ITANIUM_CLASS_CHK_A
8150 || attr_class
== ITANIUM_CLASS_CHK_S_I
8151 || attr_class
== ITANIUM_CLASS_CHK_S_F
)
8158 mode_no
= ia64_mode_to_int (GET_MODE (mode_rtx
));
8160 if (mode_no
== SPEC_MODE_INVALID
)
8163 extend_p
= (GET_MODE (reg
) != GET_MODE (mode_rtx
));
8167 if (!(SPEC_MODE_FOR_EXTEND_FIRST
<= mode_no
8168 && mode_no
<= SPEC_MODE_FOR_EXTEND_LAST
))
8171 mode_no
+= SPEC_GEN_EXTEND_OFFSET
;
8177 /* If X is an unspec part of a speculative load, return its code.
8178 Return -1 otherwise. */
8180 get_spec_unspec_code (const_rtx x
)
8182 if (GET_CODE (x
) != UNSPEC
)
8204 /* Implement skip_rtx_p hook. */
8206 ia64_skip_rtx_p (const_rtx x
)
8208 return get_spec_unspec_code (x
) != -1;
8211 /* If INSN is a speculative load, return its UNSPEC code.
8212 Return -1 otherwise. */
8214 get_insn_spec_code (const_rtx insn
)
8218 pat
= PATTERN (insn
);
8220 if (GET_CODE (pat
) == COND_EXEC
)
8221 pat
= COND_EXEC_CODE (pat
);
8223 if (GET_CODE (pat
) != SET
)
8226 reg
= SET_DEST (pat
);
8230 mem
= SET_SRC (pat
);
8231 if (GET_CODE (mem
) == ZERO_EXTEND
)
8232 mem
= XEXP (mem
, 0);
8234 return get_spec_unspec_code (mem
);
8237 /* If INSN is a speculative load, return a ds with the speculation types.
8238 Otherwise [if INSN is a normal instruction] return 0. */
8240 ia64_get_insn_spec_ds (rtx_insn
*insn
)
8242 int code
= get_insn_spec_code (insn
);
8251 return BEGIN_CONTROL
;
8254 return BEGIN_DATA
| BEGIN_CONTROL
;
8261 /* If INSN is a speculative load return a ds with the speculation types that
8263 Otherwise [if INSN is a normal instruction] return 0. */
8265 ia64_get_insn_checked_ds (rtx_insn
*insn
)
8267 int code
= get_insn_spec_code (insn
);
8272 return BEGIN_DATA
| BEGIN_CONTROL
;
8275 return BEGIN_CONTROL
;
8279 return BEGIN_DATA
| BEGIN_CONTROL
;
8286 /* If GEN_P is true, calculate the index of needed speculation check and return
8287 speculative pattern for INSN with speculative mode TS, machine mode
8288 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8289 If GEN_P is false, just calculate the index of needed speculation check. */
8291 ia64_gen_spec_load (rtx insn
, ds_t ts
, int mode_no
)
8294 gen_func_t gen_load
;
8296 gen_load
= get_spec_load_gen_function (ts
, mode_no
);
8298 new_pat
= gen_load (copy_rtx (recog_data
.operand
[0]),
8299 copy_rtx (recog_data
.operand
[1]));
8301 pat
= PATTERN (insn
);
8302 if (GET_CODE (pat
) == COND_EXEC
)
8303 new_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
8310 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED
,
8311 ds_t ds ATTRIBUTE_UNUSED
)
8316 /* Implement targetm.sched.speculate_insn hook.
8317 Check if the INSN can be TS speculative.
8318 If 'no' - return -1.
8319 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8320 If current pattern of the INSN already provides TS speculation,
8323 ia64_speculate_insn (rtx_insn
*insn
, ds_t ts
, rtx
*new_pat
)
8328 gcc_assert (!(ts
& ~SPECULATIVE
));
8330 if (ia64_spec_check_p (insn
))
8333 if ((ts
& BE_IN_SPEC
)
8334 && !insn_can_be_in_speculative_p (insn
, ts
))
8337 mode_no
= get_mode_no_for_insn (insn
);
8339 if (mode_no
!= SPEC_MODE_INVALID
)
8341 if (ia64_get_insn_spec_ds (insn
) == ds_get_speculation_types (ts
))
8346 *new_pat
= ia64_gen_spec_load (insn
, ts
, mode_no
);
8355 /* Return a function that will generate a check for speculation TS with mode
8357 If simple check is needed, pass true for SIMPLE_CHECK_P.
8358 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8360 get_spec_check_gen_function (ds_t ts
, int mode_no
,
8361 bool simple_check_p
, bool clearing_check_p
)
8363 static gen_func_t gen_ld_c_clr
[] = {
8373 gen_zero_extendqidi2_clr
,
8374 gen_zero_extendhidi2_clr
,
8375 gen_zero_extendsidi2_clr
,
8377 static gen_func_t gen_ld_c_nc
[] = {
8387 gen_zero_extendqidi2_nc
,
8388 gen_zero_extendhidi2_nc
,
8389 gen_zero_extendsidi2_nc
,
8391 static gen_func_t gen_chk_a_clr
[] = {
8392 gen_advanced_load_check_clr_bi
,
8393 gen_advanced_load_check_clr_qi
,
8394 gen_advanced_load_check_clr_hi
,
8395 gen_advanced_load_check_clr_si
,
8396 gen_advanced_load_check_clr_di
,
8397 gen_advanced_load_check_clr_sf
,
8398 gen_advanced_load_check_clr_df
,
8399 gen_advanced_load_check_clr_xf
,
8400 gen_advanced_load_check_clr_ti
,
8401 gen_advanced_load_check_clr_di
,
8402 gen_advanced_load_check_clr_di
,
8403 gen_advanced_load_check_clr_di
,
8405 static gen_func_t gen_chk_a_nc
[] = {
8406 gen_advanced_load_check_nc_bi
,
8407 gen_advanced_load_check_nc_qi
,
8408 gen_advanced_load_check_nc_hi
,
8409 gen_advanced_load_check_nc_si
,
8410 gen_advanced_load_check_nc_di
,
8411 gen_advanced_load_check_nc_sf
,
8412 gen_advanced_load_check_nc_df
,
8413 gen_advanced_load_check_nc_xf
,
8414 gen_advanced_load_check_nc_ti
,
8415 gen_advanced_load_check_nc_di
,
8416 gen_advanced_load_check_nc_di
,
8417 gen_advanced_load_check_nc_di
,
8419 static gen_func_t gen_chk_s
[] = {
8420 gen_speculation_check_bi
,
8421 gen_speculation_check_qi
,
8422 gen_speculation_check_hi
,
8423 gen_speculation_check_si
,
8424 gen_speculation_check_di
,
8425 gen_speculation_check_sf
,
8426 gen_speculation_check_df
,
8427 gen_speculation_check_xf
,
8428 gen_speculation_check_ti
,
8429 gen_speculation_check_di
,
8430 gen_speculation_check_di
,
8431 gen_speculation_check_di
,
8434 gen_func_t
*gen_check
;
8436 if (ts
& BEGIN_DATA
)
8438 /* We don't need recovery because even if this is ld.sa
8439 ALAT entry will be allocated only if NAT bit is set to zero.
8440 So it is enough to use ld.c here. */
8444 gcc_assert (mflag_sched_spec_ldc
);
8446 if (clearing_check_p
)
8447 gen_check
= gen_ld_c_clr
;
8449 gen_check
= gen_ld_c_nc
;
8453 if (clearing_check_p
)
8454 gen_check
= gen_chk_a_clr
;
8456 gen_check
= gen_chk_a_nc
;
8459 else if (ts
& BEGIN_CONTROL
)
8462 /* We might want to use ld.sa -> ld.c instead of
8465 gcc_assert (!ia64_needs_block_p (ts
));
8467 if (clearing_check_p
)
8468 gen_check
= gen_ld_c_clr
;
8470 gen_check
= gen_ld_c_nc
;
8474 gen_check
= gen_chk_s
;
8480 gcc_assert (mode_no
>= 0);
8481 return gen_check
[mode_no
];
8484 /* Return nonzero, if INSN needs branchy recovery check. */
8486 ia64_needs_block_p (ds_t ts
)
8488 if (ts
& BEGIN_DATA
)
8489 return !mflag_sched_spec_ldc
;
8491 gcc_assert ((ts
& BEGIN_CONTROL
) != 0);
8493 return !(mflag_sched_spec_control_ldc
&& mflag_sched_spec_ldc
);
8496 /* Generate (or regenerate) a recovery check for INSN. */
8498 ia64_gen_spec_check (rtx_insn
*insn
, rtx_insn
*label
, ds_t ds
)
8500 rtx op1
, pat
, check_pat
;
8501 gen_func_t gen_check
;
8504 mode_no
= get_mode_no_for_insn (insn
);
8505 gcc_assert (mode_no
>= 0);
8511 gcc_assert (!ia64_needs_block_p (ds
));
8512 op1
= copy_rtx (recog_data
.operand
[1]);
8515 gen_check
= get_spec_check_gen_function (ds
, mode_no
, label
== NULL_RTX
,
8518 check_pat
= gen_check (copy_rtx (recog_data
.operand
[0]), op1
);
8520 pat
= PATTERN (insn
);
8521 if (GET_CODE (pat
) == COND_EXEC
)
8522 check_pat
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (COND_EXEC_TEST (pat
)),
8528 /* Return nonzero, if X is branchy recovery check. */
8530 ia64_spec_check_p (rtx x
)
8533 if (GET_CODE (x
) == COND_EXEC
)
8534 x
= COND_EXEC_CODE (x
);
8535 if (GET_CODE (x
) == SET
)
8536 return ia64_spec_check_src_p (SET_SRC (x
));
8540 /* Return nonzero, if SRC belongs to recovery check. */
8542 ia64_spec_check_src_p (rtx src
)
8544 if (GET_CODE (src
) == IF_THEN_ELSE
)
8549 if (GET_CODE (t
) == NE
)
8553 if (GET_CODE (t
) == UNSPEC
)
8559 if (code
== UNSPEC_LDCCLR
8560 || code
== UNSPEC_LDCNC
8561 || code
== UNSPEC_CHKACLR
8562 || code
== UNSPEC_CHKANC
8563 || code
== UNSPEC_CHKS
)
8565 gcc_assert (code
!= 0);
8575 /* The following page contains abstract data `bundle states' which are
8576 used for bundling insns (inserting nops and template generation). */
8578 /* The following describes state of insn bundling. */
8582 /* Unique bundle state number to identify them in the debugging
8585 rtx_insn
*insn
; /* corresponding insn, NULL for the 1st and the last state */
8586 /* number nops before and after the insn */
8587 short before_nops_num
, after_nops_num
;
8588 int insn_num
; /* insn number (0 - for initial state, 1 - for the 1st
8590 int cost
; /* cost of the state in cycles */
8591 int accumulated_insns_num
; /* number of all previous insns including
8592 nops. L is considered as 2 insns */
8593 int branch_deviation
; /* deviation of previous branches from 3rd slots */
8594 int middle_bundle_stops
; /* number of stop bits in the middle of bundles */
8595 struct bundle_state
*next
; /* next state with the same insn_num */
8596 struct bundle_state
*originator
; /* originator (previous insn state) */
8597 /* All bundle states are in the following chain. */
8598 struct bundle_state
*allocated_states_chain
;
8599 /* The DFA State after issuing the insn and the nops. */
8603 /* The following is map insn number to the corresponding bundle state. */
8605 static struct bundle_state
**index_to_bundle_states
;
8607 /* The unique number of next bundle state. */
8609 static int bundle_states_num
;
8611 /* All allocated bundle states are in the following chain. */
8613 static struct bundle_state
*allocated_bundle_states_chain
;
8615 /* All allocated but not used bundle states are in the following
8618 static struct bundle_state
*free_bundle_state_chain
;
8621 /* The following function returns a free bundle state. */
8623 static struct bundle_state
*
8624 get_free_bundle_state (void)
8626 struct bundle_state
*result
;
8628 if (free_bundle_state_chain
!= NULL
)
8630 result
= free_bundle_state_chain
;
8631 free_bundle_state_chain
= result
->next
;
8635 result
= XNEW (struct bundle_state
);
8636 result
->dfa_state
= xmalloc (dfa_state_size
);
8637 result
->allocated_states_chain
= allocated_bundle_states_chain
;
8638 allocated_bundle_states_chain
= result
;
8640 result
->unique_num
= bundle_states_num
++;
8645 /* The following function frees given bundle state. */
8648 free_bundle_state (struct bundle_state
*state
)
8650 state
->next
= free_bundle_state_chain
;
8651 free_bundle_state_chain
= state
;
8654 /* Start work with abstract data `bundle states'. */
8657 initiate_bundle_states (void)
8659 bundle_states_num
= 0;
8660 free_bundle_state_chain
= NULL
;
8661 allocated_bundle_states_chain
= NULL
;
8664 /* Finish work with abstract data `bundle states'. */
8667 finish_bundle_states (void)
8669 struct bundle_state
*curr_state
, *next_state
;
8671 for (curr_state
= allocated_bundle_states_chain
;
8673 curr_state
= next_state
)
8675 next_state
= curr_state
->allocated_states_chain
;
8676 free (curr_state
->dfa_state
);
8681 /* Hashtable helpers. */
8683 struct bundle_state_hasher
: nofree_ptr_hash
<bundle_state
>
8685 static inline hashval_t
hash (const bundle_state
*);
8686 static inline bool equal (const bundle_state
*, const bundle_state
*);
8689 /* The function returns hash of BUNDLE_STATE. */
8692 bundle_state_hasher::hash (const bundle_state
*state
)
8696 for (result
= i
= 0; i
< dfa_state_size
; i
++)
8697 result
+= (((unsigned char *) state
->dfa_state
) [i
]
8698 << ((i
% CHAR_BIT
) * 3 + CHAR_BIT
));
8699 return result
+ state
->insn_num
;
8702 /* The function returns nonzero if the bundle state keys are equal. */
8705 bundle_state_hasher::equal (const bundle_state
*state1
,
8706 const bundle_state
*state2
)
8708 return (state1
->insn_num
== state2
->insn_num
8709 && memcmp (state1
->dfa_state
, state2
->dfa_state
,
8710 dfa_state_size
) == 0);
8713 /* Hash table of the bundle states. The key is dfa_state and insn_num
8714 of the bundle states. */
8716 static hash_table
<bundle_state_hasher
> *bundle_state_table
;
8718 /* The function inserts the BUNDLE_STATE into the hash table. The
8719 function returns nonzero if the bundle has been inserted into the
8720 table. The table contains the best bundle state with given key. */
8723 insert_bundle_state (struct bundle_state
*bundle_state
)
8725 struct bundle_state
**entry_ptr
;
8727 entry_ptr
= bundle_state_table
->find_slot (bundle_state
, INSERT
);
8728 if (*entry_ptr
== NULL
)
8730 bundle_state
->next
= index_to_bundle_states
[bundle_state
->insn_num
];
8731 index_to_bundle_states
[bundle_state
->insn_num
] = bundle_state
;
8732 *entry_ptr
= bundle_state
;
8735 else if (bundle_state
->cost
< (*entry_ptr
)->cost
8736 || (bundle_state
->cost
== (*entry_ptr
)->cost
8737 && ((*entry_ptr
)->accumulated_insns_num
8738 > bundle_state
->accumulated_insns_num
8739 || ((*entry_ptr
)->accumulated_insns_num
8740 == bundle_state
->accumulated_insns_num
8741 && ((*entry_ptr
)->branch_deviation
8742 > bundle_state
->branch_deviation
8743 || ((*entry_ptr
)->branch_deviation
8744 == bundle_state
->branch_deviation
8745 && (*entry_ptr
)->middle_bundle_stops
8746 > bundle_state
->middle_bundle_stops
))))))
8749 struct bundle_state temp
;
8752 **entry_ptr
= *bundle_state
;
8753 (*entry_ptr
)->next
= temp
.next
;
8754 *bundle_state
= temp
;
8759 /* Start work with the hash table. */
8762 initiate_bundle_state_table (void)
8764 bundle_state_table
= new hash_table
<bundle_state_hasher
> (50);
8767 /* Finish work with the hash table. */
8770 finish_bundle_state_table (void)
8772 delete bundle_state_table
;
8773 bundle_state_table
= NULL
;
8778 /* The following variable is a insn `nop' used to check bundle states
8779 with different number of inserted nops. */
8781 static rtx_insn
*ia64_nop
;
8783 /* The following function tries to issue NOPS_NUM nops for the current
8784 state without advancing processor cycle. If it failed, the
8785 function returns FALSE and frees the current state. */
8788 try_issue_nops (struct bundle_state
*curr_state
, int nops_num
)
8792 for (i
= 0; i
< nops_num
; i
++)
8793 if (state_transition (curr_state
->dfa_state
, ia64_nop
) >= 0)
8795 free_bundle_state (curr_state
);
8801 /* The following function tries to issue INSN for the current
8802 state without advancing processor cycle. If it failed, the
8803 function returns FALSE and frees the current state. */
8806 try_issue_insn (struct bundle_state
*curr_state
, rtx insn
)
8808 if (insn
&& state_transition (curr_state
->dfa_state
, insn
) >= 0)
8810 free_bundle_state (curr_state
);
8816 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8817 starting with ORIGINATOR without advancing processor cycle. If
8818 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8819 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8820 If it was successful, the function creates new bundle state and
8821 insert into the hash table and into `index_to_bundle_states'. */
8824 issue_nops_and_insn (struct bundle_state
*originator
, int before_nops_num
,
8825 rtx_insn
*insn
, int try_bundle_end_p
,
8826 int only_bundle_end_p
)
8828 struct bundle_state
*curr_state
;
8830 curr_state
= get_free_bundle_state ();
8831 memcpy (curr_state
->dfa_state
, originator
->dfa_state
, dfa_state_size
);
8832 curr_state
->insn
= insn
;
8833 curr_state
->insn_num
= originator
->insn_num
+ 1;
8834 curr_state
->cost
= originator
->cost
;
8835 curr_state
->originator
= originator
;
8836 curr_state
->before_nops_num
= before_nops_num
;
8837 curr_state
->after_nops_num
= 0;
8838 curr_state
->accumulated_insns_num
8839 = originator
->accumulated_insns_num
+ before_nops_num
;
8840 curr_state
->branch_deviation
= originator
->branch_deviation
;
8841 curr_state
->middle_bundle_stops
= originator
->middle_bundle_stops
;
8843 if (INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
)
8845 gcc_assert (GET_MODE (insn
) != TImode
);
8846 if (!try_issue_nops (curr_state
, before_nops_num
))
8848 if (!try_issue_insn (curr_state
, insn
))
8850 memcpy (temp_dfa_state
, curr_state
->dfa_state
, dfa_state_size
);
8851 if (curr_state
->accumulated_insns_num
% 3 != 0)
8852 curr_state
->middle_bundle_stops
++;
8853 if (state_transition (temp_dfa_state
, dfa_pre_cycle_insn
) >= 0
8854 && curr_state
->accumulated_insns_num
% 3 != 0)
8856 free_bundle_state (curr_state
);
8860 else if (GET_MODE (insn
) != TImode
)
8862 if (!try_issue_nops (curr_state
, before_nops_num
))
8864 if (!try_issue_insn (curr_state
, insn
))
8866 curr_state
->accumulated_insns_num
++;
8867 gcc_assert (!unknown_for_bundling_p (insn
));
8869 if (ia64_safe_type (insn
) == TYPE_L
)
8870 curr_state
->accumulated_insns_num
++;
8874 /* If this is an insn that must be first in a group, then don't allow
8875 nops to be emitted before it. Currently, alloc is the only such
8876 supported instruction. */
8877 /* ??? The bundling automatons should handle this for us, but they do
8878 not yet have support for the first_insn attribute. */
8879 if (before_nops_num
> 0 && get_attr_first_insn (insn
) == FIRST_INSN_YES
)
8881 free_bundle_state (curr_state
);
8885 state_transition (curr_state
->dfa_state
, dfa_pre_cycle_insn
);
8886 state_transition (curr_state
->dfa_state
, NULL
);
8888 if (!try_issue_nops (curr_state
, before_nops_num
))
8890 if (!try_issue_insn (curr_state
, insn
))
8892 curr_state
->accumulated_insns_num
++;
8893 if (unknown_for_bundling_p (insn
))
8895 /* Finish bundle containing asm insn. */
8896 curr_state
->after_nops_num
8897 = 3 - curr_state
->accumulated_insns_num
% 3;
8898 curr_state
->accumulated_insns_num
8899 += 3 - curr_state
->accumulated_insns_num
% 3;
8901 else if (ia64_safe_type (insn
) == TYPE_L
)
8902 curr_state
->accumulated_insns_num
++;
8904 if (ia64_safe_type (insn
) == TYPE_B
)
8905 curr_state
->branch_deviation
8906 += 2 - (curr_state
->accumulated_insns_num
- 1) % 3;
8907 if (try_bundle_end_p
&& curr_state
->accumulated_insns_num
% 3 != 0)
8909 if (!only_bundle_end_p
&& insert_bundle_state (curr_state
))
8912 struct bundle_state
*curr_state1
;
8913 struct bundle_state
*allocated_states_chain
;
8915 curr_state1
= get_free_bundle_state ();
8916 dfa_state
= curr_state1
->dfa_state
;
8917 allocated_states_chain
= curr_state1
->allocated_states_chain
;
8918 *curr_state1
= *curr_state
;
8919 curr_state1
->dfa_state
= dfa_state
;
8920 curr_state1
->allocated_states_chain
= allocated_states_chain
;
8921 memcpy (curr_state1
->dfa_state
, curr_state
->dfa_state
,
8923 curr_state
= curr_state1
;
8925 if (!try_issue_nops (curr_state
,
8926 3 - curr_state
->accumulated_insns_num
% 3))
8928 curr_state
->after_nops_num
8929 = 3 - curr_state
->accumulated_insns_num
% 3;
8930 curr_state
->accumulated_insns_num
8931 += 3 - curr_state
->accumulated_insns_num
% 3;
8933 if (!insert_bundle_state (curr_state
))
8934 free_bundle_state (curr_state
);
8938 /* The following function returns position in the two window bundle
8942 get_max_pos (state_t state
)
8944 if (cpu_unit_reservation_p (state
, pos_6
))
8946 else if (cpu_unit_reservation_p (state
, pos_5
))
8948 else if (cpu_unit_reservation_p (state
, pos_4
))
8950 else if (cpu_unit_reservation_p (state
, pos_3
))
8952 else if (cpu_unit_reservation_p (state
, pos_2
))
8954 else if (cpu_unit_reservation_p (state
, pos_1
))
8960 /* The function returns code of a possible template for given position
8961 and state. The function should be called only with 2 values of
8962 position equal to 3 or 6. We avoid generating F NOPs by putting
8963 templates containing F insns at the end of the template search
8964 because undocumented anomaly in McKinley derived cores which can
8965 cause stalls if an F-unit insn (including a NOP) is issued within a
8966 six-cycle window after reading certain application registers (such
8967 as ar.bsp). Furthermore, power-considerations also argue against
8968 the use of F-unit instructions unless they're really needed. */
8971 get_template (state_t state
, int pos
)
8976 if (cpu_unit_reservation_p (state
, _0mmi_
))
8978 else if (cpu_unit_reservation_p (state
, _0mii_
))
8980 else if (cpu_unit_reservation_p (state
, _0mmb_
))
8982 else if (cpu_unit_reservation_p (state
, _0mib_
))
8984 else if (cpu_unit_reservation_p (state
, _0mbb_
))
8986 else if (cpu_unit_reservation_p (state
, _0bbb_
))
8988 else if (cpu_unit_reservation_p (state
, _0mmf_
))
8990 else if (cpu_unit_reservation_p (state
, _0mfi_
))
8992 else if (cpu_unit_reservation_p (state
, _0mfb_
))
8994 else if (cpu_unit_reservation_p (state
, _0mlx_
))
8999 if (cpu_unit_reservation_p (state
, _1mmi_
))
9001 else if (cpu_unit_reservation_p (state
, _1mii_
))
9003 else if (cpu_unit_reservation_p (state
, _1mmb_
))
9005 else if (cpu_unit_reservation_p (state
, _1mib_
))
9007 else if (cpu_unit_reservation_p (state
, _1mbb_
))
9009 else if (cpu_unit_reservation_p (state
, _1bbb_
))
9011 else if (_1mmf_
>= 0 && cpu_unit_reservation_p (state
, _1mmf_
))
9013 else if (cpu_unit_reservation_p (state
, _1mfi_
))
9015 else if (cpu_unit_reservation_p (state
, _1mfb_
))
9017 else if (cpu_unit_reservation_p (state
, _1mlx_
))
9026 /* True when INSN is important for bundling. */
9029 important_for_bundling_p (rtx_insn
*insn
)
9031 return (INSN_P (insn
)
9032 && ia64_safe_itanium_class (insn
) != ITANIUM_CLASS_IGNORE
9033 && GET_CODE (PATTERN (insn
)) != USE
9034 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
9037 /* The following function returns an insn important for insn bundling
9038 followed by INSN and before TAIL. */
9041 get_next_important_insn (rtx_insn
*insn
, rtx_insn
*tail
)
9043 for (; insn
&& insn
!= tail
; insn
= NEXT_INSN (insn
))
9044 if (important_for_bundling_p (insn
))
9049 /* True when INSN is unknown, but important, for bundling. */
9052 unknown_for_bundling_p (rtx_insn
*insn
)
9054 return (INSN_P (insn
)
9055 && ia64_safe_itanium_class (insn
) == ITANIUM_CLASS_UNKNOWN
9056 && GET_CODE (PATTERN (insn
)) != USE
9057 && GET_CODE (PATTERN (insn
)) != CLOBBER
);
9060 /* Add a bundle selector TEMPLATE0 before INSN. */
9063 ia64_add_bundle_selector_before (int template0
, rtx_insn
*insn
)
9065 rtx b
= gen_bundle_selector (GEN_INT (template0
));
9067 ia64_emit_insn_before (b
, insn
);
9068 #if NR_BUNDLES == 10
9069 if ((template0
== 4 || template0
== 5)
9070 && ia64_except_unwind_info (&global_options
) == UI_TARGET
)
9073 rtx note
= NULL_RTX
;
9075 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
9076 first or second slot. If it is and has REG_EH_NOTE set, copy it
9077 to following nops, as br.call sets rp to the address of following
9078 bundle and therefore an EH region end must be on a bundle
9080 insn
= PREV_INSN (insn
);
9081 for (i
= 0; i
< 3; i
++)
9084 insn
= next_active_insn (insn
);
9085 while (NONJUMP_INSN_P (insn
)
9086 && get_attr_empty (insn
) == EMPTY_YES
);
9088 note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
9093 gcc_assert ((code
= recog_memoized (insn
)) == CODE_FOR_nop
9094 || code
== CODE_FOR_nop_b
);
9095 if (find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
))
9098 add_reg_note (insn
, REG_EH_REGION
, XEXP (note
, 0));
9105 /* The following function does insn bundling. Bundling means
9106 inserting templates and nop insns to fit insn groups into permitted
9107 templates. Instruction scheduling uses NDFA (non-deterministic
9108 finite automata) encoding informations about the templates and the
9109 inserted nops. Nondeterminism of the automata permits follows
9110 all possible insn sequences very fast.
9112 Unfortunately it is not possible to get information about inserting
9113 nop insns and used templates from the automata states. The
9114 automata only says that we can issue an insn possibly inserting
9115 some nops before it and using some template. Therefore insn
9116 bundling in this function is implemented by using DFA
9117 (deterministic finite automata). We follow all possible insn
9118 sequences by inserting 0-2 nops (that is what the NDFA describe for
9119 insn scheduling) before/after each insn being bundled. We know the
9120 start of simulated processor cycle from insn scheduling (insn
9121 starting a new cycle has TImode).
9123 Simple implementation of insn bundling would create enormous
9124 number of possible insn sequences satisfying information about new
9125 cycle ticks taken from the insn scheduling. To make the algorithm
9126 practical we use dynamic programming. Each decision (about
9127 inserting nops and implicitly about previous decisions) is described
9128 by structure bundle_state (see above). If we generate the same
9129 bundle state (key is automaton state after issuing the insns and
9130 nops for it), we reuse already generated one. As consequence we
9131 reject some decisions which cannot improve the solution and
9132 reduce memory for the algorithm.
9134 When we reach the end of EBB (extended basic block), we choose the
9135 best sequence and then, moving back in EBB, insert templates for
9136 the best alternative. The templates are taken from querying
9137 automaton state for each insn in chosen bundle states.
9139 So the algorithm makes two (forward and backward) passes through
9143 bundling (FILE *dump
, int verbose
, rtx_insn
*prev_head_insn
, rtx_insn
*tail
)
9145 struct bundle_state
*curr_state
, *next_state
, *best_state
;
9146 rtx_insn
*insn
, *next_insn
;
9148 int i
, bundle_end_p
, only_bundle_end_p
, asm_p
;
9149 int pos
= 0, max_pos
, template0
, template1
;
9151 enum attr_type type
;
9154 /* Count insns in the EBB. */
9155 for (insn
= NEXT_INSN (prev_head_insn
);
9156 insn
&& insn
!= tail
;
9157 insn
= NEXT_INSN (insn
))
9163 dfa_clean_insn_cache ();
9164 initiate_bundle_state_table ();
9165 index_to_bundle_states
= XNEWVEC (struct bundle_state
*, insn_num
+ 2);
9166 /* First (forward) pass -- generation of bundle states. */
9167 curr_state
= get_free_bundle_state ();
9168 curr_state
->insn
= NULL
;
9169 curr_state
->before_nops_num
= 0;
9170 curr_state
->after_nops_num
= 0;
9171 curr_state
->insn_num
= 0;
9172 curr_state
->cost
= 0;
9173 curr_state
->accumulated_insns_num
= 0;
9174 curr_state
->branch_deviation
= 0;
9175 curr_state
->middle_bundle_stops
= 0;
9176 curr_state
->next
= NULL
;
9177 curr_state
->originator
= NULL
;
9178 state_reset (curr_state
->dfa_state
);
9179 index_to_bundle_states
[0] = curr_state
;
9181 /* Shift cycle mark if it is put on insn which could be ignored. */
9182 for (insn
= NEXT_INSN (prev_head_insn
);
9184 insn
= NEXT_INSN (insn
))
9186 && !important_for_bundling_p (insn
)
9187 && GET_MODE (insn
) == TImode
)
9189 PUT_MODE (insn
, VOIDmode
);
9190 for (next_insn
= NEXT_INSN (insn
);
9192 next_insn
= NEXT_INSN (next_insn
))
9193 if (important_for_bundling_p (next_insn
)
9194 && INSN_CODE (next_insn
) != CODE_FOR_insn_group_barrier
)
9196 PUT_MODE (next_insn
, TImode
);
9200 /* Forward pass: generation of bundle states. */
9201 for (insn
= get_next_important_insn (NEXT_INSN (prev_head_insn
), tail
);
9205 gcc_assert (important_for_bundling_p (insn
));
9206 type
= ia64_safe_type (insn
);
9207 next_insn
= get_next_important_insn (NEXT_INSN (insn
), tail
);
9209 index_to_bundle_states
[insn_num
] = NULL
;
9210 for (curr_state
= index_to_bundle_states
[insn_num
- 1];
9212 curr_state
= next_state
)
9214 pos
= curr_state
->accumulated_insns_num
% 3;
9215 next_state
= curr_state
->next
;
9216 /* We must fill up the current bundle in order to start a
9217 subsequent asm insn in a new bundle. Asm insn is always
9218 placed in a separate bundle. */
9220 = (next_insn
!= NULL_RTX
9221 && INSN_CODE (insn
) == CODE_FOR_insn_group_barrier
9222 && unknown_for_bundling_p (next_insn
));
9223 /* We may fill up the current bundle if it is the cycle end
9224 without a group barrier. */
9226 = (only_bundle_end_p
|| next_insn
== NULL_RTX
9227 || (GET_MODE (next_insn
) == TImode
9228 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
));
9229 if (type
== TYPE_F
|| type
== TYPE_B
|| type
== TYPE_L
9231 issue_nops_and_insn (curr_state
, 2, insn
, bundle_end_p
,
9233 issue_nops_and_insn (curr_state
, 1, insn
, bundle_end_p
,
9235 issue_nops_and_insn (curr_state
, 0, insn
, bundle_end_p
,
9238 gcc_assert (index_to_bundle_states
[insn_num
]);
9239 for (curr_state
= index_to_bundle_states
[insn_num
];
9241 curr_state
= curr_state
->next
)
9242 if (verbose
>= 2 && dump
)
9244 /* This structure is taken from generated code of the
9245 pipeline hazard recognizer (see file insn-attrtab.cc).
9246 Please don't forget to change the structure if a new
9247 automaton is added to .md file. */
9250 unsigned short one_automaton_state
;
9251 unsigned short oneb_automaton_state
;
9252 unsigned short two_automaton_state
;
9253 unsigned short twob_automaton_state
;
9258 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9259 curr_state
->unique_num
,
9260 (curr_state
->originator
== NULL
9261 ? -1 : curr_state
->originator
->unique_num
),
9263 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
9264 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
9265 curr_state
->middle_bundle_stops
,
9266 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
9271 /* We should find a solution because the 2nd insn scheduling has
9273 gcc_assert (index_to_bundle_states
[insn_num
]);
9274 /* Find a state corresponding to the best insn sequence. */
9276 for (curr_state
= index_to_bundle_states
[insn_num
];
9278 curr_state
= curr_state
->next
)
9279 /* We are just looking at the states with fully filled up last
9280 bundle. The first we prefer insn sequences with minimal cost
9281 then with minimal inserted nops and finally with branch insns
9282 placed in the 3rd slots. */
9283 if (curr_state
->accumulated_insns_num
% 3 == 0
9284 && (best_state
== NULL
|| best_state
->cost
> curr_state
->cost
9285 || (best_state
->cost
== curr_state
->cost
9286 && (curr_state
->accumulated_insns_num
9287 < best_state
->accumulated_insns_num
9288 || (curr_state
->accumulated_insns_num
9289 == best_state
->accumulated_insns_num
9290 && (curr_state
->branch_deviation
9291 < best_state
->branch_deviation
9292 || (curr_state
->branch_deviation
9293 == best_state
->branch_deviation
9294 && curr_state
->middle_bundle_stops
9295 < best_state
->middle_bundle_stops
)))))))
9296 best_state
= curr_state
;
9297 /* Second (backward) pass: adding nops and templates. */
9298 gcc_assert (best_state
);
9299 insn_num
= best_state
->before_nops_num
;
9300 template0
= template1
= -1;
9301 for (curr_state
= best_state
;
9302 curr_state
->originator
!= NULL
;
9303 curr_state
= curr_state
->originator
)
9305 insn
= curr_state
->insn
;
9306 asm_p
= unknown_for_bundling_p (insn
);
9308 if (verbose
>= 2 && dump
)
9312 unsigned short one_automaton_state
;
9313 unsigned short oneb_automaton_state
;
9314 unsigned short two_automaton_state
;
9315 unsigned short twob_automaton_state
;
9320 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9321 curr_state
->unique_num
,
9322 (curr_state
->originator
== NULL
9323 ? -1 : curr_state
->originator
->unique_num
),
9325 curr_state
->before_nops_num
, curr_state
->after_nops_num
,
9326 curr_state
->accumulated_insns_num
, curr_state
->branch_deviation
,
9327 curr_state
->middle_bundle_stops
,
9328 ((struct DFA_chip
*) curr_state
->dfa_state
)->twob_automaton_state
,
9331 /* Find the position in the current bundle window. The window can
9332 contain at most two bundles. Two bundle window means that
9333 the processor will make two bundle rotation. */
9334 max_pos
= get_max_pos (curr_state
->dfa_state
);
9336 /* The following (negative template number) means that the
9337 processor did one bundle rotation. */
9338 || (max_pos
== 3 && template0
< 0))
9340 /* We are at the end of the window -- find template(s) for
9344 template0
= get_template (curr_state
->dfa_state
, 3);
9347 template1
= get_template (curr_state
->dfa_state
, 3);
9348 template0
= get_template (curr_state
->dfa_state
, 6);
9351 if (max_pos
> 3 && template1
< 0)
9352 /* It may happen when we have the stop inside a bundle. */
9354 gcc_assert (pos
<= 3);
9355 template1
= get_template (curr_state
->dfa_state
, 3);
9359 /* Emit nops after the current insn. */
9360 for (i
= 0; i
< curr_state
->after_nops_num
; i
++)
9362 rtx nop_pat
= gen_nop ();
9363 rtx_insn
*nop
= emit_insn_after (nop_pat
, insn
);
9365 gcc_assert (pos
>= 0);
9368 /* We are at the start of a bundle: emit the template
9369 (it should be defined). */
9370 gcc_assert (template0
>= 0);
9371 ia64_add_bundle_selector_before (template0
, nop
);
9372 /* If we have two bundle window, we make one bundle
9373 rotation. Otherwise template0 will be undefined
9374 (negative value). */
9375 template0
= template1
;
9379 /* Move the position backward in the window. Group barrier has
9380 no slot. Asm insn takes all bundle. */
9381 if (INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
9382 && !unknown_for_bundling_p (insn
))
9384 /* Long insn takes 2 slots. */
9385 if (ia64_safe_type (insn
) == TYPE_L
)
9387 gcc_assert (pos
>= 0);
9389 && INSN_CODE (insn
) != CODE_FOR_insn_group_barrier
9390 && !unknown_for_bundling_p (insn
))
9392 /* The current insn is at the bundle start: emit the
9394 gcc_assert (template0
>= 0);
9395 ia64_add_bundle_selector_before (template0
, insn
);
9396 b
= PREV_INSN (insn
);
9398 /* See comment above in analogous place for emitting nops
9400 template0
= template1
;
9403 /* Emit nops after the current insn. */
9404 for (i
= 0; i
< curr_state
->before_nops_num
; i
++)
9406 rtx nop_pat
= gen_nop ();
9407 ia64_emit_insn_before (nop_pat
, insn
);
9408 rtx_insn
*nop
= PREV_INSN (insn
);
9411 gcc_assert (pos
>= 0);
9414 /* See comment above in analogous place for emitting nops
9416 gcc_assert (template0
>= 0);
9417 ia64_add_bundle_selector_before (template0
, insn
);
9418 b
= PREV_INSN (insn
);
9420 template0
= template1
;
9428 /* Assert right calculation of middle_bundle_stops. */
9429 int num
= best_state
->middle_bundle_stops
;
9430 bool start_bundle
= true, end_bundle
= false;
9432 for (insn
= NEXT_INSN (prev_head_insn
);
9433 insn
&& insn
!= tail
;
9434 insn
= NEXT_INSN (insn
))
9438 if (recog_memoized (insn
) == CODE_FOR_bundle_selector
)
9439 start_bundle
= true;
9442 rtx_insn
*next_insn
;
9444 for (next_insn
= NEXT_INSN (insn
);
9445 next_insn
&& next_insn
!= tail
;
9446 next_insn
= NEXT_INSN (next_insn
))
9447 if (INSN_P (next_insn
)
9448 && (ia64_safe_itanium_class (next_insn
)
9449 != ITANIUM_CLASS_IGNORE
9450 || recog_memoized (next_insn
)
9451 == CODE_FOR_bundle_selector
)
9452 && GET_CODE (PATTERN (next_insn
)) != USE
9453 && GET_CODE (PATTERN (next_insn
)) != CLOBBER
)
9456 end_bundle
= next_insn
== NULL_RTX
9457 || next_insn
== tail
9458 || (INSN_P (next_insn
)
9459 && recog_memoized (next_insn
) == CODE_FOR_bundle_selector
);
9460 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
9461 && !start_bundle
&& !end_bundle
9463 && !unknown_for_bundling_p (next_insn
))
9466 start_bundle
= false;
9470 gcc_assert (num
== 0);
9473 free (index_to_bundle_states
);
9474 finish_bundle_state_table ();
9476 dfa_clean_insn_cache ();
9479 /* The following function is called at the end of scheduling BB or
9480 EBB. After reload, it inserts stop bits and does insn bundling. */
9483 ia64_sched_finish (FILE *dump
, int sched_verbose
)
9486 fprintf (dump
, "// Finishing schedule.\n");
9487 if (!reload_completed
)
9489 if (reload_completed
)
9491 final_emit_insn_group_barriers (dump
);
9492 bundling (dump
, sched_verbose
, current_sched_info
->prev_head
,
9493 current_sched_info
->next_tail
);
9494 if (sched_verbose
&& dump
)
9495 fprintf (dump
, "// finishing %d-%d\n",
9496 INSN_UID (NEXT_INSN (current_sched_info
->prev_head
)),
9497 INSN_UID (PREV_INSN (current_sched_info
->next_tail
)));
9503 /* The following function inserts stop bits in scheduled BB or EBB. */
9506 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED
)
9509 int need_barrier_p
= 0;
9510 int seen_good_insn
= 0;
9512 init_insn_group_barriers ();
9514 for (insn
= NEXT_INSN (current_sched_info
->prev_head
);
9515 insn
!= current_sched_info
->next_tail
;
9516 insn
= NEXT_INSN (insn
))
9518 if (BARRIER_P (insn
))
9520 rtx_insn
*last
= prev_active_insn (insn
);
9524 if (JUMP_TABLE_DATA_P (last
))
9525 last
= prev_active_insn (last
);
9526 if (recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9527 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last
);
9529 init_insn_group_barriers ();
9533 else if (NONDEBUG_INSN_P (insn
))
9535 if (recog_memoized (insn
) == CODE_FOR_insn_group_barrier
)
9537 init_insn_group_barriers ();
9541 else if (need_barrier_p
|| group_barrier_needed (insn
)
9542 || (mflag_sched_stop_bits_after_every_cycle
9543 && GET_MODE (insn
) == TImode
9546 if (TARGET_EARLY_STOP_BITS
)
9551 last
!= current_sched_info
->prev_head
;
9552 last
= PREV_INSN (last
))
9553 if (INSN_P (last
) && GET_MODE (last
) == TImode
9554 && stops_p
[INSN_UID (last
)])
9556 if (last
== current_sched_info
->prev_head
)
9558 last
= prev_active_insn (last
);
9560 && recog_memoized (last
) != CODE_FOR_insn_group_barrier
)
9561 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9563 init_insn_group_barriers ();
9564 for (last
= NEXT_INSN (last
);
9566 last
= NEXT_INSN (last
))
9569 group_barrier_needed (last
);
9570 if (recog_memoized (last
) >= 0
9571 && important_for_bundling_p (last
))
9577 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9579 init_insn_group_barriers ();
9582 group_barrier_needed (insn
);
9583 if (recog_memoized (insn
) >= 0
9584 && important_for_bundling_p (insn
))
9587 else if (recog_memoized (insn
) >= 0
9588 && important_for_bundling_p (insn
))
9590 need_barrier_p
= (CALL_P (insn
) || unknown_for_bundling_p (insn
));
9597 /* If the following function returns TRUE, we will use the DFA
9601 ia64_first_cycle_multipass_dfa_lookahead (void)
9603 return (reload_completed
? 6 : 4);
9606 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9609 ia64_init_dfa_pre_cycle_insn (void)
9611 if (temp_dfa_state
== NULL
)
9613 dfa_state_size
= state_size ();
9614 temp_dfa_state
= xmalloc (dfa_state_size
);
9615 prev_cycle_state
= xmalloc (dfa_state_size
);
9617 dfa_pre_cycle_insn
= make_insn_raw (gen_pre_cycle ());
9618 SET_PREV_INSN (dfa_pre_cycle_insn
) = SET_NEXT_INSN (dfa_pre_cycle_insn
) = NULL_RTX
;
9619 recog_memoized (dfa_pre_cycle_insn
);
9620 dfa_stop_insn
= make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9621 SET_PREV_INSN (dfa_stop_insn
) = SET_NEXT_INSN (dfa_stop_insn
) = NULL_RTX
;
9622 recog_memoized (dfa_stop_insn
);
9625 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9626 used by the DFA insn scheduler. */
9629 ia64_dfa_pre_cycle_insn (void)
9631 return dfa_pre_cycle_insn
;
9634 /* The following function returns TRUE if PRODUCER (of type ilog or
9635 ld) produces address for CONSUMER (of type st or stf). */
9638 ia64_st_address_bypass_p (rtx_insn
*producer
, rtx_insn
*consumer
)
9642 gcc_assert (producer
&& consumer
);
9643 dest
= ia64_single_set (producer
);
9645 reg
= SET_DEST (dest
);
9647 if (GET_CODE (reg
) == SUBREG
)
9648 reg
= SUBREG_REG (reg
);
9649 gcc_assert (GET_CODE (reg
) == REG
);
9651 dest
= ia64_single_set (consumer
);
9653 mem
= SET_DEST (dest
);
9654 gcc_assert (mem
&& GET_CODE (mem
) == MEM
);
9655 return reg_mentioned_p (reg
, mem
);
9658 /* The following function returns TRUE if PRODUCER (of type ilog or
9659 ld) produces address for CONSUMER (of type ld or fld). */
9662 ia64_ld_address_bypass_p (rtx_insn
*producer
, rtx_insn
*consumer
)
9664 rtx dest
, src
, reg
, mem
;
9666 gcc_assert (producer
&& consumer
);
9667 dest
= ia64_single_set (producer
);
9669 reg
= SET_DEST (dest
);
9671 if (GET_CODE (reg
) == SUBREG
)
9672 reg
= SUBREG_REG (reg
);
9673 gcc_assert (GET_CODE (reg
) == REG
);
9675 src
= ia64_single_set (consumer
);
9677 mem
= SET_SRC (src
);
9680 if (GET_CODE (mem
) == UNSPEC
&& XVECLEN (mem
, 0) > 0)
9681 mem
= XVECEXP (mem
, 0, 0);
9682 else if (GET_CODE (mem
) == IF_THEN_ELSE
)
9683 /* ??? Is this bypass necessary for ld.c? */
9685 gcc_assert (XINT (XEXP (XEXP (mem
, 0), 0), 1) == UNSPEC_LDCCLR
);
9686 mem
= XEXP (mem
, 1);
9689 while (GET_CODE (mem
) == SUBREG
|| GET_CODE (mem
) == ZERO_EXTEND
)
9690 mem
= XEXP (mem
, 0);
9692 if (GET_CODE (mem
) == UNSPEC
)
9694 int c
= XINT (mem
, 1);
9696 gcc_assert (c
== UNSPEC_LDA
|| c
== UNSPEC_LDS
|| c
== UNSPEC_LDS_A
9697 || c
== UNSPEC_LDSA
);
9698 mem
= XVECEXP (mem
, 0, 0);
9701 /* Note that LO_SUM is used for GOT loads. */
9702 gcc_assert (GET_CODE (mem
) == LO_SUM
|| GET_CODE (mem
) == MEM
);
9704 return reg_mentioned_p (reg
, mem
);
9707 /* The following function returns TRUE if INSN produces address for a
9708 load/store insn. We will place such insns into M slot because it
9709 decreases its latency time. */
9712 ia64_produce_address_p (rtx insn
)
9718 /* Emit pseudo-ops for the assembler to describe predicate relations.
9719 At present this assumes that we only consider predicate pairs to
9720 be mutex, and that the assembler can deduce proper values from
9721 straight-line code. */
9724 emit_predicate_relation_info (void)
9728 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
9731 rtx_insn
*head
= BB_HEAD (bb
);
9733 /* We only need such notes at code labels. */
9734 if (! LABEL_P (head
))
9736 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head
)))
9737 head
= NEXT_INSN (head
);
9739 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9740 grabbing the entire block of predicate registers. */
9741 for (r
= PR_REG (2); r
< PR_REG (64); r
+= 2)
9742 if (REGNO_REG_SET_P (df_get_live_in (bb
), r
))
9744 rtx p
= gen_rtx_REG (BImode
, r
);
9745 rtx_insn
*n
= emit_insn_after (gen_pred_rel_mutex (p
), head
);
9746 if (head
== BB_END (bb
))
9752 /* Look for conditional calls that do not return, and protect predicate
9753 relations around them. Otherwise the assembler will assume the call
9754 returns, and complain about uses of call-clobbered predicates after
9756 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
9758 rtx_insn
*insn
= BB_HEAD (bb
);
9763 && GET_CODE (PATTERN (insn
)) == COND_EXEC
9764 && find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
9767 emit_insn_before (gen_safe_across_calls_all (), insn
);
9768 rtx_insn
*a
= emit_insn_after (gen_safe_across_calls_normal (), insn
);
9769 if (BB_HEAD (bb
) == insn
)
9771 if (BB_END (bb
) == insn
)
9775 if (insn
== BB_END (bb
))
9777 insn
= NEXT_INSN (insn
);
9782 /* Perform machine dependent operations on the rtl chain INSNS. */
9787 /* We are freeing block_for_insn in the toplev to keep compatibility
9788 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9789 compute_bb_for_insn ();
9791 /* If optimizing, we'll have split before scheduling. */
9795 if (optimize
&& flag_schedule_insns_after_reload
9796 && dbg_cnt (ia64_sched2
))
9799 timevar_push (TV_SCHED2
);
9800 ia64_final_schedule
= 1;
9802 /* We can't let modulo-sched prevent us from scheduling any bbs,
9803 since we need the final schedule to produce bundle information. */
9804 FOR_EACH_BB_FN (bb
, cfun
)
9805 bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
9807 initiate_bundle_states ();
9808 ia64_nop
= make_insn_raw (gen_nop ());
9809 SET_PREV_INSN (ia64_nop
) = SET_NEXT_INSN (ia64_nop
) = NULL_RTX
;
9810 recog_memoized (ia64_nop
);
9811 clocks_length
= get_max_uid () + 1;
9812 stops_p
= XCNEWVEC (char, clocks_length
);
9814 if (ia64_tune
== PROCESSOR_ITANIUM2
)
9816 pos_1
= get_cpu_unit_code ("2_1");
9817 pos_2
= get_cpu_unit_code ("2_2");
9818 pos_3
= get_cpu_unit_code ("2_3");
9819 pos_4
= get_cpu_unit_code ("2_4");
9820 pos_5
= get_cpu_unit_code ("2_5");
9821 pos_6
= get_cpu_unit_code ("2_6");
9822 _0mii_
= get_cpu_unit_code ("2b_0mii.");
9823 _0mmi_
= get_cpu_unit_code ("2b_0mmi.");
9824 _0mfi_
= get_cpu_unit_code ("2b_0mfi.");
9825 _0mmf_
= get_cpu_unit_code ("2b_0mmf.");
9826 _0bbb_
= get_cpu_unit_code ("2b_0bbb.");
9827 _0mbb_
= get_cpu_unit_code ("2b_0mbb.");
9828 _0mib_
= get_cpu_unit_code ("2b_0mib.");
9829 _0mmb_
= get_cpu_unit_code ("2b_0mmb.");
9830 _0mfb_
= get_cpu_unit_code ("2b_0mfb.");
9831 _0mlx_
= get_cpu_unit_code ("2b_0mlx.");
9832 _1mii_
= get_cpu_unit_code ("2b_1mii.");
9833 _1mmi_
= get_cpu_unit_code ("2b_1mmi.");
9834 _1mfi_
= get_cpu_unit_code ("2b_1mfi.");
9835 _1mmf_
= get_cpu_unit_code ("2b_1mmf.");
9836 _1bbb_
= get_cpu_unit_code ("2b_1bbb.");
9837 _1mbb_
= get_cpu_unit_code ("2b_1mbb.");
9838 _1mib_
= get_cpu_unit_code ("2b_1mib.");
9839 _1mmb_
= get_cpu_unit_code ("2b_1mmb.");
9840 _1mfb_
= get_cpu_unit_code ("2b_1mfb.");
9841 _1mlx_
= get_cpu_unit_code ("2b_1mlx.");
9845 pos_1
= get_cpu_unit_code ("1_1");
9846 pos_2
= get_cpu_unit_code ("1_2");
9847 pos_3
= get_cpu_unit_code ("1_3");
9848 pos_4
= get_cpu_unit_code ("1_4");
9849 pos_5
= get_cpu_unit_code ("1_5");
9850 pos_6
= get_cpu_unit_code ("1_6");
9851 _0mii_
= get_cpu_unit_code ("1b_0mii.");
9852 _0mmi_
= get_cpu_unit_code ("1b_0mmi.");
9853 _0mfi_
= get_cpu_unit_code ("1b_0mfi.");
9854 _0mmf_
= get_cpu_unit_code ("1b_0mmf.");
9855 _0bbb_
= get_cpu_unit_code ("1b_0bbb.");
9856 _0mbb_
= get_cpu_unit_code ("1b_0mbb.");
9857 _0mib_
= get_cpu_unit_code ("1b_0mib.");
9858 _0mmb_
= get_cpu_unit_code ("1b_0mmb.");
9859 _0mfb_
= get_cpu_unit_code ("1b_0mfb.");
9860 _0mlx_
= get_cpu_unit_code ("1b_0mlx.");
9861 _1mii_
= get_cpu_unit_code ("1b_1mii.");
9862 _1mmi_
= get_cpu_unit_code ("1b_1mmi.");
9863 _1mfi_
= get_cpu_unit_code ("1b_1mfi.");
9864 _1mmf_
= get_cpu_unit_code ("1b_1mmf.");
9865 _1bbb_
= get_cpu_unit_code ("1b_1bbb.");
9866 _1mbb_
= get_cpu_unit_code ("1b_1mbb.");
9867 _1mib_
= get_cpu_unit_code ("1b_1mib.");
9868 _1mmb_
= get_cpu_unit_code ("1b_1mmb.");
9869 _1mfb_
= get_cpu_unit_code ("1b_1mfb.");
9870 _1mlx_
= get_cpu_unit_code ("1b_1mlx.");
9873 if (flag_selective_scheduling2
9874 && !maybe_skip_selective_scheduling ())
9875 run_selective_scheduling ();
9879 /* Redo alignment computation, as it might gone wrong. */
9880 compute_alignments ();
9882 /* We cannot reuse this one because it has been corrupted by the
9884 finish_bundle_states ();
9887 emit_insn_group_barriers (dump_file
);
9889 ia64_final_schedule
= 0;
9890 timevar_pop (TV_SCHED2
);
9893 emit_all_insn_group_barriers (dump_file
);
9897 /* A call must not be the last instruction in a function, so that the
9898 return address is still within the function, so that unwinding works
9899 properly. Note that IA-64 differs from dwarf2 on this point. */
9900 if (ia64_except_unwind_info (&global_options
) == UI_TARGET
)
9905 insn
= get_last_insn ();
9906 if (! INSN_P (insn
))
9907 insn
= prev_active_insn (insn
);
9910 /* Skip over insns that expand to nothing. */
9911 while (NONJUMP_INSN_P (insn
)
9912 && get_attr_empty (insn
) == EMPTY_YES
)
9914 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
9915 && XINT (PATTERN (insn
), 1) == UNSPECV_INSN_GROUP_BARRIER
)
9917 insn
= prev_active_insn (insn
);
9922 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9923 emit_insn (gen_break_f ());
9924 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9929 emit_predicate_relation_info ();
9931 if (flag_var_tracking
)
9933 timevar_push (TV_VAR_TRACKING
);
9934 variable_tracking_main ();
9935 timevar_pop (TV_VAR_TRACKING
);
9937 df_finish_pass (false);
9940 /* Return true if REGNO is used by the epilogue. */
9943 ia64_epilogue_uses (int regno
)
9948 /* With a call to a function in another module, we will write a new
9949 value to "gp". After returning from such a call, we need to make
9950 sure the function restores the original gp-value, even if the
9951 function itself does not use the gp anymore. */
9952 return !(TARGET_AUTO_PIC
|| TARGET_NO_PIC
);
9954 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9955 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9956 /* For functions defined with the syscall_linkage attribute, all
9957 input registers are marked as live at all function exits. This
9958 prevents the register allocator from using the input registers,
9959 which in turn makes it possible to restart a system call after
9960 an interrupt without having to save/restore the input registers.
9961 This also prevents kernel data from leaking to application code. */
9962 return lookup_attribute ("syscall_linkage",
9963 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))) != NULL
;
9966 /* Conditional return patterns can't represent the use of `b0' as
9967 the return address, so we force the value live this way. */
9971 /* Likewise for ar.pfs, which is used by br.ret. */
9979 /* Return true if REGNO is used by the frame unwinder. */
9982 ia64_eh_uses (int regno
)
9986 if (! reload_completed
)
9992 for (r
= reg_save_b0
; r
<= reg_save_ar_lc
; r
++)
9993 if (regno
== current_frame_info
.r
[r
]
9994 || regno
== emitted_frame_related_regs
[r
])
10000 /* Return true if this goes in small data/bss. */
10002 /* ??? We could also support own long data here. Generating movl/add/ld8
10003 instead of addl,ld8/ld8. This makes the code bigger, but should make the
10004 code faster because there is one less load. This also includes incomplete
10005 types which can't go in sdata/sbss. */
10008 ia64_in_small_data_p (const_tree exp
)
10010 if (TARGET_NO_SDATA
)
10013 /* We want to merge strings, so we never consider them small data. */
10014 if (TREE_CODE (exp
) == STRING_CST
)
10017 /* Functions are never small data. */
10018 if (TREE_CODE (exp
) == FUNCTION_DECL
)
10021 if (VAR_P (exp
) && DECL_SECTION_NAME (exp
))
10023 const char *section
= DECL_SECTION_NAME (exp
);
10025 if (strcmp (section
, ".sdata") == 0
10026 || startswith (section
, ".sdata.")
10027 || startswith (section
, ".gnu.linkonce.s.")
10028 || strcmp (section
, ".sbss") == 0
10029 || startswith (section
, ".sbss.")
10030 || startswith (section
, ".gnu.linkonce.sb."))
10035 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
10037 /* If this is an incomplete type with size 0, then we can't put it
10038 in sdata because it might be too big when completed. */
10039 if (size
> 0 && size
<= ia64_section_threshold
)
10046 /* Output assembly directives for prologue regions. */
10048 /* The current basic block number. */
10050 static bool last_block
;
10052 /* True if we need a copy_state command at the start of the next block. */
10054 static bool need_copy_state
;
10056 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
10057 # define MAX_ARTIFICIAL_LABEL_BYTES 30
10060 /* The function emits unwind directives for the start of an epilogue. */
10063 process_epilogue (FILE *out_file
, rtx insn ATTRIBUTE_UNUSED
,
10064 bool unwind
, bool frame ATTRIBUTE_UNUSED
)
10066 /* If this isn't the last block of the function, then we need to label the
10067 current state, and copy it back in at the start of the next block. */
10072 fprintf (out_file
, "\t.label_state %d\n",
10073 ++cfun
->machine
->state_num
);
10074 need_copy_state
= true;
10078 fprintf (out_file
, "\t.restore sp\n");
10081 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
10084 process_cfa_adjust_cfa (FILE *out_file
, rtx pat
, rtx insn
,
10085 bool unwind
, bool frame
)
10087 rtx dest
= SET_DEST (pat
);
10088 rtx src
= SET_SRC (pat
);
10090 if (dest
== stack_pointer_rtx
)
10092 if (GET_CODE (src
) == PLUS
)
10094 rtx op0
= XEXP (src
, 0);
10095 rtx op1
= XEXP (src
, 1);
10097 gcc_assert (op0
== dest
&& GET_CODE (op1
) == CONST_INT
);
10099 if (INTVAL (op1
) < 0)
10101 gcc_assert (!frame_pointer_needed
);
10104 "\t.fframe " HOST_WIDE_INT_PRINT_DEC
"\n",
10108 process_epilogue (out_file
, insn
, unwind
, frame
);
10112 gcc_assert (src
== hard_frame_pointer_rtx
);
10113 process_epilogue (out_file
, insn
, unwind
, frame
);
10116 else if (dest
== hard_frame_pointer_rtx
)
10118 gcc_assert (src
== stack_pointer_rtx
);
10119 gcc_assert (frame_pointer_needed
);
10122 fprintf (out_file
, "\t.vframe r%d\n",
10123 ia64_debugger_regno (REGNO (dest
)));
10126 gcc_unreachable ();
10129 /* This function processes a SET pattern for REG_CFA_REGISTER. */
10132 process_cfa_register (FILE *out_file
, rtx pat
, bool unwind
)
10134 rtx dest
= SET_DEST (pat
);
10135 rtx src
= SET_SRC (pat
);
10136 int dest_regno
= REGNO (dest
);
10141 /* Saving return address pointer. */
10143 fprintf (out_file
, "\t.save rp, r%d\n",
10144 ia64_debugger_regno (dest_regno
));
10148 src_regno
= REGNO (src
);
10153 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_pr
]);
10155 fprintf (out_file
, "\t.save pr, r%d\n",
10156 ia64_debugger_regno (dest_regno
));
10159 case AR_UNAT_REGNUM
:
10160 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_unat
]);
10162 fprintf (out_file
, "\t.save ar.unat, r%d\n",
10163 ia64_debugger_regno (dest_regno
));
10167 gcc_assert (dest_regno
== current_frame_info
.r
[reg_save_ar_lc
]);
10169 fprintf (out_file
, "\t.save ar.lc, r%d\n",
10170 ia64_debugger_regno (dest_regno
));
10174 /* Everything else should indicate being stored to memory. */
10175 gcc_unreachable ();
10179 /* This function processes a SET pattern for REG_CFA_OFFSET. */
10182 process_cfa_offset (FILE *out_file
, rtx pat
, bool unwind
)
10184 rtx dest
= SET_DEST (pat
);
10185 rtx src
= SET_SRC (pat
);
10186 int src_regno
= REGNO (src
);
10187 const char *saveop
;
10191 gcc_assert (MEM_P (dest
));
10192 if (GET_CODE (XEXP (dest
, 0)) == REG
)
10194 base
= XEXP (dest
, 0);
10199 gcc_assert (GET_CODE (XEXP (dest
, 0)) == PLUS
10200 && GET_CODE (XEXP (XEXP (dest
, 0), 1)) == CONST_INT
);
10201 base
= XEXP (XEXP (dest
, 0), 0);
10202 off
= INTVAL (XEXP (XEXP (dest
, 0), 1));
10205 if (base
== hard_frame_pointer_rtx
)
10207 saveop
= ".savepsp";
10212 gcc_assert (base
== stack_pointer_rtx
);
10213 saveop
= ".savesp";
10216 src_regno
= REGNO (src
);
10220 gcc_assert (!current_frame_info
.r
[reg_save_b0
]);
10222 fprintf (out_file
, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC
"\n",
10227 gcc_assert (!current_frame_info
.r
[reg_save_pr
]);
10229 fprintf (out_file
, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC
"\n",
10234 gcc_assert (!current_frame_info
.r
[reg_save_ar_lc
]);
10236 fprintf (out_file
, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC
"\n",
10240 case AR_PFS_REGNUM
:
10241 gcc_assert (!current_frame_info
.r
[reg_save_ar_pfs
]);
10243 fprintf (out_file
, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC
"\n",
10247 case AR_UNAT_REGNUM
:
10248 gcc_assert (!current_frame_info
.r
[reg_save_ar_unat
]);
10250 fprintf (out_file
, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC
"\n",
10259 fprintf (out_file
, "\t.save.g 0x%x\n",
10260 1 << (src_regno
- GR_REG (4)));
10269 fprintf (out_file
, "\t.save.b 0x%x\n",
10270 1 << (src_regno
- BR_REG (1)));
10278 fprintf (out_file
, "\t.save.f 0x%x\n",
10279 1 << (src_regno
- FR_REG (2)));
10282 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10283 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10284 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10285 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10287 fprintf (out_file
, "\t.save.gf 0x0, 0x%x\n",
10288 1 << (src_regno
- FR_REG (12)));
10292 /* ??? For some reason we mark other general registers, even those
10293 we can't represent in the unwind info. Ignore them. */
10298 /* This function looks at a single insn and emits any directives
10299 required to unwind this insn. */
10302 ia64_asm_unwind_emit (FILE *out_file
, rtx_insn
*insn
)
10304 bool unwind
= ia64_except_unwind_info (&global_options
) == UI_TARGET
;
10305 bool frame
= dwarf2out_do_frame ();
10309 if (!unwind
&& !frame
)
10312 if (NOTE_INSN_BASIC_BLOCK_P (insn
))
10314 last_block
= NOTE_BASIC_BLOCK (insn
)->next_bb
10315 == EXIT_BLOCK_PTR_FOR_FN (cfun
);
10317 /* Restore unwind state from immediately before the epilogue. */
10318 if (need_copy_state
)
10322 fprintf (out_file
, "\t.body\n");
10323 fprintf (out_file
, "\t.copy_state %d\n",
10324 cfun
->machine
->state_num
);
10326 need_copy_state
= false;
10330 if (NOTE_P (insn
) || ! RTX_FRAME_RELATED_P (insn
))
10333 /* Look for the ALLOC insn. */
10334 if (INSN_CODE (insn
) == CODE_FOR_alloc
)
10336 rtx dest
= SET_DEST (XVECEXP (PATTERN (insn
), 0, 0));
10337 int dest_regno
= REGNO (dest
);
10339 /* If this is the final destination for ar.pfs, then this must
10340 be the alloc in the prologue. */
10341 if (dest_regno
== current_frame_info
.r
[reg_save_ar_pfs
])
10344 fprintf (out_file
, "\t.save ar.pfs, r%d\n",
10345 ia64_debugger_regno (dest_regno
));
10349 /* This must be an alloc before a sibcall. We must drop the
10350 old frame info. The easiest way to drop the old frame
10351 info is to ensure we had a ".restore sp" directive
10352 followed by a new prologue. If the procedure doesn't
10353 have a memory-stack frame, we'll issue a dummy ".restore
10355 if (current_frame_info
.total_size
== 0 && !frame_pointer_needed
)
10356 /* if haven't done process_epilogue() yet, do it now */
10357 process_epilogue (out_file
, insn
, unwind
, frame
);
10359 fprintf (out_file
, "\t.prologue\n");
10364 handled_one
= false;
10365 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
10366 switch (REG_NOTE_KIND (note
))
10368 case REG_CFA_ADJUST_CFA
:
10369 pat
= XEXP (note
, 0);
10371 pat
= PATTERN (insn
);
10372 process_cfa_adjust_cfa (out_file
, pat
, insn
, unwind
, frame
);
10373 handled_one
= true;
10376 case REG_CFA_OFFSET
:
10377 pat
= XEXP (note
, 0);
10379 pat
= PATTERN (insn
);
10380 process_cfa_offset (out_file
, pat
, unwind
);
10381 handled_one
= true;
10384 case REG_CFA_REGISTER
:
10385 pat
= XEXP (note
, 0);
10387 pat
= PATTERN (insn
);
10388 process_cfa_register (out_file
, pat
, unwind
);
10389 handled_one
= true;
10392 case REG_FRAME_RELATED_EXPR
:
10393 case REG_CFA_DEF_CFA
:
10394 case REG_CFA_EXPRESSION
:
10395 case REG_CFA_RESTORE
:
10396 case REG_CFA_SET_VDRAP
:
10397 /* Not used in the ia64 port. */
10398 gcc_unreachable ();
10401 /* Not a frame-related note. */
10405 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10406 explicit action to take. No guessing required. */
10407 gcc_assert (handled_one
);
10410 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10413 ia64_asm_emit_except_personality (rtx personality
)
10415 fputs ("\t.personality\t", asm_out_file
);
10416 output_addr_const (asm_out_file
, personality
);
10417 fputc ('\n', asm_out_file
);
10420 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10423 ia64_asm_init_sections (void)
10425 exception_section
= get_unnamed_section (0, output_section_asm_op
,
10429 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10431 static enum unwind_info_type
10432 ia64_debug_unwind_info (void)
10440 IA64_BUILTIN_COPYSIGNQ
,
10441 IA64_BUILTIN_FABSQ
,
10442 IA64_BUILTIN_FLUSHRS
,
10444 IA64_BUILTIN_HUGE_VALQ
,
10446 IA64_BUILTIN_NANSQ
,
10450 static GTY(()) tree ia64_builtins
[(int) IA64_BUILTIN_max
];
10453 ia64_init_builtins (void)
10459 /* The __fpreg type. */
10460 fpreg_type
= make_node (REAL_TYPE
);
10461 TYPE_PRECISION (fpreg_type
) = 82;
10462 layout_type (fpreg_type
);
10463 (*lang_hooks
.types
.register_builtin_type
) (fpreg_type
, "__fpreg");
10465 /* The __float80 type. */
10466 if (float64x_type_node
!= NULL_TREE
10467 && TYPE_MODE (float64x_type_node
) == XFmode
)
10468 float80_type
= float64x_type_node
;
10471 float80_type
= make_node (REAL_TYPE
);
10472 TYPE_PRECISION (float80_type
) = 80;
10473 layout_type (float80_type
);
10475 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
10477 /* The __float128 type. */
10481 tree const_string_type
10482 = build_pointer_type (build_qualified_type
10483 (char_type_node
, TYPE_QUAL_CONST
));
10485 if (float128t_type_node
== NULL_TREE
)
10487 float128t_type_node
= make_node (REAL_TYPE
);
10488 TYPE_PRECISION (float128t_type_node
)
10489 = TYPE_PRECISION (float128_type_node
);
10490 layout_type (float128t_type_node
);
10491 SET_TYPE_MODE (float128t_type_node
, TYPE_MODE (float128_type_node
));
10493 (*lang_hooks
.types
.register_builtin_type
) (float128t_type_node
,
10496 /* TFmode support builtins. */
10497 ftype
= build_function_type_list (float128t_type_node
, NULL_TREE
);
10498 decl
= add_builtin_function ("__builtin_infq", ftype
,
10499 IA64_BUILTIN_INFQ
, BUILT_IN_MD
,
10501 ia64_builtins
[IA64_BUILTIN_INFQ
] = decl
;
10503 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
10504 IA64_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
10506 ia64_builtins
[IA64_BUILTIN_HUGE_VALQ
] = decl
;
10508 ftype
= build_function_type_list (float128t_type_node
,
10511 decl
= add_builtin_function ("__builtin_nanq", ftype
,
10512 IA64_BUILTIN_NANQ
, BUILT_IN_MD
,
10513 "nanq", NULL_TREE
);
10514 TREE_READONLY (decl
) = 1;
10515 ia64_builtins
[IA64_BUILTIN_NANQ
] = decl
;
10517 decl
= add_builtin_function ("__builtin_nansq", ftype
,
10518 IA64_BUILTIN_NANSQ
, BUILT_IN_MD
,
10519 "nansq", NULL_TREE
);
10520 TREE_READONLY (decl
) = 1;
10521 ia64_builtins
[IA64_BUILTIN_NANSQ
] = decl
;
10523 ftype
= build_function_type_list (float128t_type_node
,
10524 float128t_type_node
,
10526 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
10527 IA64_BUILTIN_FABSQ
, BUILT_IN_MD
,
10528 "__fabstf2", NULL_TREE
);
10529 TREE_READONLY (decl
) = 1;
10530 ia64_builtins
[IA64_BUILTIN_FABSQ
] = decl
;
10532 ftype
= build_function_type_list (float128t_type_node
,
10533 float128t_type_node
,
10534 float128t_type_node
,
10536 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
10537 IA64_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
10538 "__copysigntf3", NULL_TREE
);
10539 TREE_READONLY (decl
) = 1;
10540 ia64_builtins
[IA64_BUILTIN_COPYSIGNQ
] = decl
;
10543 /* Under HPUX, this is a synonym for "long double". */
10544 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
10547 /* Fwrite on VMS is non-standard. */
10548 #if TARGET_ABI_OPEN_VMS
10549 vms_patch_builtins ();
10552 #define def_builtin(name, type, code) \
10553 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10556 decl
= def_builtin ("__builtin_ia64_bsp",
10557 build_function_type_list (ptr_type_node
, NULL_TREE
),
10559 ia64_builtins
[IA64_BUILTIN_BSP
] = decl
;
10561 decl
= def_builtin ("__builtin_ia64_flushrs",
10562 build_function_type_list (void_type_node
, NULL_TREE
),
10563 IA64_BUILTIN_FLUSHRS
);
10564 ia64_builtins
[IA64_BUILTIN_FLUSHRS
] = decl
;
10570 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
10571 set_user_assembler_name (decl
, "_Isfinite");
10572 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
10573 set_user_assembler_name (decl
, "_Isfinitef");
10574 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEL
)) != NULL_TREE
)
10575 set_user_assembler_name (decl
, "_Isfinitef128");
10580 ia64_fold_builtin (tree fndecl
, int n_args ATTRIBUTE_UNUSED
,
10581 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
10583 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
10585 enum ia64_builtins fn_code
10586 = (enum ia64_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
10589 case IA64_BUILTIN_NANQ
:
10590 case IA64_BUILTIN_NANSQ
:
10592 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
10593 const char *str
= c_getstr (*args
);
10594 int quiet
= fn_code
== IA64_BUILTIN_NANQ
;
10595 REAL_VALUE_TYPE real
;
10597 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
10598 return build_real (type
, real
);
10607 #ifdef SUBTARGET_FOLD_BUILTIN
10608 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
10615 ia64_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
10616 machine_mode mode ATTRIBUTE_UNUSED
,
10617 int ignore ATTRIBUTE_UNUSED
)
10619 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
10620 unsigned int fcode
= DECL_MD_FUNCTION_CODE (fndecl
);
10624 case IA64_BUILTIN_BSP
:
10625 if (! target
|| ! register_operand (target
, DImode
))
10626 target
= gen_reg_rtx (DImode
);
10627 emit_insn (gen_bsp_value (target
));
10628 #ifdef POINTERS_EXTEND_UNSIGNED
10629 target
= convert_memory_address (ptr_mode
, target
);
10633 case IA64_BUILTIN_FLUSHRS
:
10634 emit_insn (gen_flushrs ());
10637 case IA64_BUILTIN_INFQ
:
10638 case IA64_BUILTIN_HUGE_VALQ
:
10640 machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
10641 REAL_VALUE_TYPE inf
;
10645 tmp
= const_double_from_real_value (inf
, target_mode
);
10647 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
10650 target
= gen_reg_rtx (target_mode
);
10652 emit_move_insn (target
, tmp
);
10656 case IA64_BUILTIN_NANQ
:
10657 case IA64_BUILTIN_NANSQ
:
10658 case IA64_BUILTIN_FABSQ
:
10659 case IA64_BUILTIN_COPYSIGNQ
:
10660 return expand_call (exp
, target
, ignore
);
10663 gcc_unreachable ();
10669 /* Return the ia64 builtin for CODE. */
10672 ia64_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
10674 if (code
>= IA64_BUILTIN_max
)
10675 return error_mark_node
;
10677 return ia64_builtins
[code
];
10680 /* Implement TARGET_FUNCTION_ARG_PADDING.
10682 For the HP-UX IA64 aggregate parameters are passed stored in the
10683 most significant bits of the stack slot. */
10685 static pad_direction
10686 ia64_function_arg_padding (machine_mode mode
, const_tree type
)
10688 /* Exception to normal case for structures/unions/etc. */
10691 && AGGREGATE_TYPE_P (type
)
10692 && int_size_in_bytes (type
) < UNITS_PER_WORD
)
10695 /* Fall back to the default. */
10696 return default_function_arg_padding (mode
, type
);
10699 /* Emit text to declare externally defined variables and functions, because
10700 the Intel assembler does not support undefined externals. */
10703 ia64_asm_output_external (FILE *file
, tree decl
, const char *name
)
10705 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10706 set in order to avoid putting out names that are never really
10708 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl
)))
10710 /* maybe_assemble_visibility will return 1 if the assembler
10711 visibility directive is output. */
10712 int need_visibility
= ((*targetm
.binds_local_p
) (decl
)
10713 && maybe_assemble_visibility (decl
));
10715 /* GNU as does not need anything here, but the HP linker does
10716 need something for external functions. */
10717 if ((TARGET_HPUX_LD
|| !TARGET_GNU_AS
)
10718 && TREE_CODE (decl
) == FUNCTION_DECL
)
10719 (*targetm
.asm_out
.globalize_decl_name
) (file
, decl
);
10720 else if (need_visibility
&& !TARGET_GNU_AS
)
10721 (*targetm
.asm_out
.globalize_label
) (file
, name
);
10725 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10726 modes of word_mode and larger. Rename the TFmode libfuncs using the
10727 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10728 backward compatibility. */
10731 ia64_init_libfuncs (void)
10733 set_optab_libfunc (sdiv_optab
, SImode
, "__divsi3");
10734 set_optab_libfunc (udiv_optab
, SImode
, "__udivsi3");
10735 set_optab_libfunc (smod_optab
, SImode
, "__modsi3");
10736 set_optab_libfunc (umod_optab
, SImode
, "__umodsi3");
10738 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
10739 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
10740 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
10741 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
10742 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
10744 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
10745 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
10746 set_conv_libfunc (sext_optab
, TFmode
, XFmode
, "_U_Qfcnvff_f80_to_quad");
10747 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
10748 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
10749 set_conv_libfunc (trunc_optab
, XFmode
, TFmode
, "_U_Qfcnvff_quad_to_f80");
10751 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_sgl");
10752 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
10753 set_conv_libfunc (sfix_optab
, TImode
, TFmode
, "_U_Qfcnvfxt_quad_to_quad");
10754 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxut_quad_to_sgl");
10755 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxut_quad_to_dbl");
10757 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
10758 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
10759 set_conv_libfunc (sfloat_optab
, TFmode
, TImode
, "_U_Qfcnvxf_quad_to_quad");
10760 /* HP-UX 11.23 libc does not have a function for unsigned
10761 SImode-to-TFmode conversion. */
10762 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
, "_U_Qfcnvxuf_dbl_to_quad");
10765 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10768 ia64_hpux_init_libfuncs (void)
10770 ia64_init_libfuncs ();
10772 /* The HP SI millicode division and mod functions expect DI arguments.
10773 By turning them off completely we avoid using both libgcc and the
10774 non-standard millicode routines and use the HP DI millicode routines
10777 set_optab_libfunc (sdiv_optab
, SImode
, 0);
10778 set_optab_libfunc (udiv_optab
, SImode
, 0);
10779 set_optab_libfunc (smod_optab
, SImode
, 0);
10780 set_optab_libfunc (umod_optab
, SImode
, 0);
10782 set_optab_libfunc (sdiv_optab
, DImode
, "__milli_divI");
10783 set_optab_libfunc (udiv_optab
, DImode
, "__milli_divU");
10784 set_optab_libfunc (smod_optab
, DImode
, "__milli_remI");
10785 set_optab_libfunc (umod_optab
, DImode
, "__milli_remU");
10787 /* HP-UX libc has TF min/max/abs routines in it. */
10788 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qfmin");
10789 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
10790 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
10792 /* ia64_expand_compare uses this. */
10793 cmptf_libfunc
= init_one_libfunc ("_U_Qfcmp");
10795 /* These should never be used. */
10796 set_optab_libfunc (eq_optab
, TFmode
, 0);
10797 set_optab_libfunc (ne_optab
, TFmode
, 0);
10798 set_optab_libfunc (gt_optab
, TFmode
, 0);
10799 set_optab_libfunc (ge_optab
, TFmode
, 0);
10800 set_optab_libfunc (lt_optab
, TFmode
, 0);
10801 set_optab_libfunc (le_optab
, TFmode
, 0);
10804 /* Rename the division and modulus functions in VMS. */
10807 ia64_vms_init_libfuncs (void)
10809 set_optab_libfunc (sdiv_optab
, SImode
, "OTS$DIV_I");
10810 set_optab_libfunc (sdiv_optab
, DImode
, "OTS$DIV_L");
10811 set_optab_libfunc (udiv_optab
, SImode
, "OTS$DIV_UI");
10812 set_optab_libfunc (udiv_optab
, DImode
, "OTS$DIV_UL");
10813 set_optab_libfunc (smod_optab
, SImode
, "OTS$REM_I");
10814 set_optab_libfunc (smod_optab
, DImode
, "OTS$REM_L");
10815 set_optab_libfunc (umod_optab
, SImode
, "OTS$REM_UI");
10816 set_optab_libfunc (umod_optab
, DImode
, "OTS$REM_UL");
10817 #ifdef MEM_LIBFUNCS_INIT
10822 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10823 the HPUX conventions. */
10826 ia64_sysv4_init_libfuncs (void)
10828 ia64_init_libfuncs ();
10830 /* These functions are not part of the HPUX TFmode interface. We
10831 use them instead of _U_Qfcmp, which doesn't work the way we
10833 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
10834 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
10835 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
10836 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
10837 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
10838 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
10840 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10841 glibc doesn't have them. */
10847 ia64_soft_fp_init_libfuncs (void)
10852 ia64_vms_valid_pointer_mode (scalar_int_mode mode
)
10854 return (mode
== SImode
|| mode
== DImode
);
10857 /* For HPUX, it is illegal to have relocations in shared segments. */
10860 ia64_hpux_reloc_rw_mask (void)
10865 /* For others, relax this so that relocations to local data goes in
10866 read-only segments, but we still cannot allow global relocations
10867 in read-only segments. */
10870 ia64_reloc_rw_mask (void)
10872 return flag_pic
? 3 : 2;
10875 /* Return the section to use for X. The only special thing we do here
10876 is to honor small data. */
10879 ia64_select_rtx_section (machine_mode mode
, rtx x
,
10880 unsigned HOST_WIDE_INT align
)
10882 if (GET_MODE_SIZE (mode
) > 0
10883 && GET_MODE_SIZE (mode
) <= ia64_section_threshold
10884 && !TARGET_NO_SDATA
)
10885 return sdata_section
;
10887 return default_elf_select_rtx_section (mode
, x
, align
);
10890 static unsigned int
10891 ia64_section_type_flags (tree decl
, const char *name
, int reloc
)
10893 unsigned int flags
= 0;
10895 if (strcmp (name
, ".sdata") == 0
10896 || startswith (name
, ".sdata.")
10897 || startswith (name
, ".gnu.linkonce.s.")
10898 || startswith (name
, ".sdata2.")
10899 || startswith (name
, ".gnu.linkonce.s2.")
10900 || strcmp (name
, ".sbss") == 0
10901 || startswith (name
, ".sbss.")
10902 || startswith (name
, ".gnu.linkonce.sb."))
10903 flags
= SECTION_SMALL
;
10905 flags
|= default_section_type_flags (decl
, name
, reloc
);
10909 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10910 structure type and that the address of that type should be passed
10911 in out0, rather than in r8. */
10914 ia64_struct_retval_addr_is_first_parm_p (tree fntype
)
10916 tree ret_type
= TREE_TYPE (fntype
);
10918 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10919 as the structure return address parameter, if the return value
10920 type has a non-trivial copy constructor or destructor. It is not
10921 clear if this same convention should be used for other
10922 programming languages. Until G++ 3.4, we incorrectly used r8 for
10923 these return values. */
10924 return (abi_version_at_least (2)
10926 && TYPE_MODE (ret_type
) == BLKmode
10927 && TREE_ADDRESSABLE (ret_type
)
10928 && lang_GNU_CXX ());
10931 /* Output the assembler code for a thunk function. THUNK_DECL is the
10932 declaration for the thunk function itself, FUNCTION is the decl for
10933 the target function. DELTA is an immediate constant offset to be
10934 added to THIS. If VCALL_OFFSET is nonzero, the word at
10935 *(*this + vcall_offset) should be added to THIS. */
10938 ia64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
10939 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
10942 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
10943 rtx this_rtx
, funexp
;
10945 unsigned int this_parmno
;
10946 unsigned int this_regno
;
10949 reload_completed
= 1;
10950 epilogue_completed
= 1;
10952 /* Set things up as ia64_expand_prologue might. */
10953 last_scratch_gr_reg
= 15;
10955 memset (¤t_frame_info
, 0, sizeof (current_frame_info
));
10956 current_frame_info
.spill_cfa_off
= -16;
10957 current_frame_info
.n_input_regs
= 1;
10958 current_frame_info
.need_regstk
= (TARGET_REG_NAMES
!= 0);
10960 /* Mark the end of the (empty) prologue. */
10961 emit_note (NOTE_INSN_PROLOGUE_END
);
10963 /* Figure out whether "this" will be the first parameter (the
10964 typical case) or the second parameter (as happens when the
10965 virtual function returns certain class objects). */
10967 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk
))
10969 this_regno
= IN_REG (this_parmno
);
10970 if (!TARGET_REG_NAMES
)
10971 reg_names
[this_regno
] = ia64_reg_numbers
[this_parmno
];
10973 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
10975 /* Apply the constant offset, if required. */
10976 delta_rtx
= GEN_INT (delta
);
10979 rtx tmp
= gen_rtx_REG (ptr_mode
, this_regno
);
10980 REG_POINTER (tmp
) = 1;
10981 if (delta
&& satisfies_constraint_I (delta_rtx
))
10983 emit_insn (gen_ptr_extend_plus_imm (this_rtx
, tmp
, delta_rtx
));
10987 emit_insn (gen_ptr_extend (this_rtx
, tmp
));
10991 if (!satisfies_constraint_I (delta_rtx
))
10993 rtx tmp
= gen_rtx_REG (Pmode
, 2);
10994 emit_move_insn (tmp
, delta_rtx
);
10997 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, delta_rtx
));
11000 /* Apply the offset from the vtable, if required. */
11003 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
11004 rtx tmp
= gen_rtx_REG (Pmode
, 2);
11008 rtx t
= gen_rtx_REG (ptr_mode
, 2);
11009 REG_POINTER (t
) = 1;
11010 emit_move_insn (t
, gen_rtx_MEM (ptr_mode
, this_rtx
));
11011 if (satisfies_constraint_I (vcall_offset_rtx
))
11013 emit_insn (gen_ptr_extend_plus_imm (tmp
, t
, vcall_offset_rtx
));
11017 emit_insn (gen_ptr_extend (tmp
, t
));
11020 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this_rtx
));
11024 if (!satisfies_constraint_J (vcall_offset_rtx
))
11026 rtx tmp2
= gen_rtx_REG (Pmode
, next_scratch_gr_reg ());
11027 emit_move_insn (tmp2
, vcall_offset_rtx
);
11028 vcall_offset_rtx
= tmp2
;
11030 emit_insn (gen_adddi3 (tmp
, tmp
, vcall_offset_rtx
));
11034 emit_insn (gen_zero_extendsidi2 (tmp
, gen_rtx_MEM (ptr_mode
, tmp
)));
11036 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
11038 emit_insn (gen_adddi3 (this_rtx
, this_rtx
, tmp
));
11041 /* Generate a tail call to the target function. */
11042 if (! TREE_USED (function
))
11044 assemble_external (function
);
11045 TREE_USED (function
) = 1;
11047 funexp
= XEXP (DECL_RTL (function
), 0);
11048 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
11049 ia64_expand_call (NULL_RTX
, funexp
, NULL_RTX
, 1);
11050 insn
= get_last_insn ();
11051 SIBLING_CALL_P (insn
) = 1;
11053 /* Code generation for calls relies on splitting. */
11054 reload_completed
= 1;
11055 epilogue_completed
= 1;
11056 try_split (PATTERN (insn
), insn
, 0);
11060 /* Run just enough of rest_of_compilation to get the insns emitted.
11061 There's not really enough bulk here to make other passes such as
11062 instruction scheduling worth while. */
11064 emit_all_insn_group_barriers (NULL
);
11065 insn
= get_insns ();
11066 shorten_branches (insn
);
11067 assemble_start_function (thunk
, fnname
);
11068 final_start_function (insn
, file
, 1);
11069 final (insn
, file
, 1);
11070 final_end_function ();
11071 assemble_end_function (thunk
, fnname
);
11073 reload_completed
= 0;
11074 epilogue_completed
= 0;
11077 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
11080 ia64_struct_value_rtx (tree fntype
,
11081 int incoming ATTRIBUTE_UNUSED
)
11083 if (TARGET_ABI_OPEN_VMS
||
11084 (fntype
&& ia64_struct_retval_addr_is_first_parm_p (fntype
)))
11086 return gen_rtx_REG (Pmode
, GR_REG (8));
11090 ia64_scalar_mode_supported_p (scalar_mode mode
)
11116 ia64_vector_mode_supported_p (machine_mode mode
)
11133 /* Implement the FUNCTION_PROFILER macro. */
11136 ia64_output_function_profiler (FILE *file
, int labelno
)
11138 bool indirect_call
;
11140 /* If the function needs a static chain and the static chain
11141 register is r15, we use an indirect call so as to bypass
11142 the PLT stub in case the executable is dynamically linked,
11143 because the stub clobbers r15 as per 5.3.6 of the psABI.
11144 We don't need to do that in non canonical PIC mode. */
11146 if (cfun
->static_chain_decl
&& !TARGET_NO_PIC
&& !TARGET_AUTO_PIC
)
11148 gcc_assert (STATIC_CHAIN_REGNUM
== 15);
11149 indirect_call
= true;
11152 indirect_call
= false;
11155 fputs ("\t.prologue 4, r40\n", file
);
11157 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file
);
11158 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file
);
11160 if (NO_PROFILE_COUNTERS
)
11161 fputs ("\tmov out3 = r0\n", file
);
11165 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
11167 if (TARGET_AUTO_PIC
)
11168 fputs ("\tmovl out3 = @gprel(", file
);
11170 fputs ("\taddl out3 = @ltoff(", file
);
11171 assemble_name (file
, buf
);
11172 if (TARGET_AUTO_PIC
)
11173 fputs (")\n", file
);
11175 fputs ("), r1\n", file
);
11179 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file
);
11180 fputs ("\t;;\n", file
);
11182 fputs ("\t.save rp, r42\n", file
);
11183 fputs ("\tmov out2 = b0\n", file
);
11185 fputs ("\tld8 r14 = [r14]\n\t;;\n", file
);
11186 fputs ("\t.body\n", file
);
11187 fputs ("\tmov out1 = r1\n", file
);
11190 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file
);
11191 fputs ("\tmov b6 = r16\n", file
);
11192 fputs ("\tld8 r1 = [r14]\n", file
);
11193 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file
);
11196 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file
);
11199 static GTY(()) rtx mcount_func_rtx
;
11201 gen_mcount_func_rtx (void)
11203 if (!mcount_func_rtx
)
11204 mcount_func_rtx
= init_one_libfunc ("_mcount");
11205 return mcount_func_rtx
;
11209 ia64_profile_hook (int labelno
)
11213 if (NO_PROFILE_COUNTERS
)
11214 label
= const0_rtx
;
11218 const char *label_name
;
11219 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
11220 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
11221 label
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
11222 SYMBOL_REF_FLAGS (label
) = SYMBOL_FLAG_LOCAL
;
11224 ip
= gen_reg_rtx (Pmode
);
11225 emit_insn (gen_ip_value (ip
));
11226 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL
,
11228 gen_rtx_REG (Pmode
, BR_REG (0)), Pmode
,
11233 /* Return the mangling of TYPE if it is an extended fundamental type. */
11235 static const char *
11236 ia64_mangle_type (const_tree type
)
11238 type
= TYPE_MAIN_VARIANT (type
);
11240 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
11241 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
11244 if (type
== float128_type_node
|| type
== float64x_type_node
)
11247 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11249 if (!TARGET_HPUX
&& TYPE_MODE (type
) == TFmode
)
11251 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11252 an extended mangling. Elsewhere, "e" is available since long
11253 double is 80 bits. */
11254 if (TYPE_MODE (type
) == XFmode
)
11255 return TARGET_HPUX
? "u9__float80" : "e";
11256 if (TYPE_MODE (type
) == RFmode
)
11257 return "u7__fpreg";
11261 /* Return the diagnostic message string if conversion from FROMTYPE to
11262 TOTYPE is not allowed, NULL otherwise. */
11263 static const char *
11264 ia64_invalid_conversion (const_tree fromtype
, const_tree totype
)
11266 /* Reject nontrivial conversion to or from __fpreg. */
11267 if (TYPE_MODE (fromtype
) == RFmode
11268 && TYPE_MODE (totype
) != RFmode
11269 && TYPE_MODE (totype
) != VOIDmode
)
11270 return N_("invalid conversion from %<__fpreg%>");
11271 if (TYPE_MODE (totype
) == RFmode
11272 && TYPE_MODE (fromtype
) != RFmode
)
11273 return N_("invalid conversion to %<__fpreg%>");
11277 /* Return the diagnostic message string if the unary operation OP is
11278 not permitted on TYPE, NULL otherwise. */
11279 static const char *
11280 ia64_invalid_unary_op (int op
, const_tree type
)
11282 /* Reject operations on __fpreg other than unary + or &. */
11283 if (TYPE_MODE (type
) == RFmode
11284 && op
!= CONVERT_EXPR
11285 && op
!= ADDR_EXPR
)
11286 return N_("invalid operation on %<__fpreg%>");
11290 /* Return the diagnostic message string if the binary operation OP is
11291 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11292 static const char *
11293 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
11295 /* Reject operations on __fpreg. */
11296 if (TYPE_MODE (type1
) == RFmode
|| TYPE_MODE (type2
) == RFmode
)
11297 return N_("invalid operation on %<__fpreg%>");
11301 /* HP-UX version_id attribute.
11302 For object foo, if the version_id is set to 1234 put out an alias
11303 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11304 other than an alias statement because it is an illegal symbol name. */
11307 ia64_handle_version_id_attribute (tree
*node ATTRIBUTE_UNUSED
,
11308 tree name ATTRIBUTE_UNUSED
,
11310 int flags ATTRIBUTE_UNUSED
,
11311 bool *no_add_attrs
)
11313 tree arg
= TREE_VALUE (args
);
11315 if (TREE_CODE (arg
) != STRING_CST
)
11317 error("version attribute is not a string");
11318 *no_add_attrs
= true;
11324 /* Target hook for c_mode_for_suffix. */
11326 static machine_mode
11327 ia64_c_mode_for_suffix (char suffix
)
11337 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return DFmode, XFmode
11338 or TFmode for TI_LONG_DOUBLE_TYPE which is for long double type,
11339 go with the default one for the others. */
11341 static machine_mode
11342 ia64_c_mode_for_floating_type (enum tree_index ti
)
11344 /* long double is XFmode normally, and TFmode for HPUX. It should be
11345 TFmode for VMS as well but we only support up to DFmode now. */
11346 if (ti
== TI_LONG_DOUBLE_TYPE
)
11347 return TARGET_HPUX
? TFmode
: (TARGET_ABI_OPEN_VMS
? DFmode
: XFmode
);
11348 return default_mode_for_floating_type (ti
);
11351 static GTY(()) rtx ia64_dconst_0_5_rtx
;
11354 ia64_dconst_0_5 (void)
11356 if (! ia64_dconst_0_5_rtx
)
11358 REAL_VALUE_TYPE rv
;
11359 real_from_string (&rv
, "0.5");
11360 ia64_dconst_0_5_rtx
= const_double_from_real_value (rv
, DFmode
);
11362 return ia64_dconst_0_5_rtx
;
11365 static GTY(()) rtx ia64_dconst_0_375_rtx
;
11368 ia64_dconst_0_375 (void)
11370 if (! ia64_dconst_0_375_rtx
)
11372 REAL_VALUE_TYPE rv
;
11373 real_from_string (&rv
, "0.375");
11374 ia64_dconst_0_375_rtx
= const_double_from_real_value (rv
, DFmode
);
11376 return ia64_dconst_0_375_rtx
;
11379 static fixed_size_mode
11380 ia64_get_reg_raw_mode (int regno
)
11382 if (FR_REGNO_P (regno
))
11384 return default_get_reg_raw_mode(regno
);
11387 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11391 ia64_member_type_forces_blk (const_tree
, machine_mode mode
)
11393 return TARGET_HPUX
&& mode
== TFmode
;
11396 /* Always default to .text section until HP-UX linker is fixed. */
11398 ATTRIBUTE_UNUSED
static section
*
11399 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED
,
11400 enum node_frequency freq ATTRIBUTE_UNUSED
,
11401 bool startup ATTRIBUTE_UNUSED
,
11402 bool exit ATTRIBUTE_UNUSED
)
11407 /* Construct (set target (vec_select op0 (parallel perm))) and
11408 return true if that's a valid instruction in the active ISA. */
11411 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
11413 rtx rperm
[MAX_VECT_LEN
], x
;
11416 for (i
= 0; i
< nelt
; ++i
)
11417 rperm
[i
] = GEN_INT (perm
[i
]);
11419 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
11420 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
11421 x
= gen_rtx_SET (target
, x
);
11423 rtx_insn
*insn
= emit_insn (x
);
11424 if (recog_memoized (insn
) < 0)
11426 remove_insn (insn
);
11432 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11435 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
11436 const unsigned char *perm
, unsigned nelt
)
11438 machine_mode v2mode
;
11441 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0
)).exists (&v2mode
))
11443 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
11444 return expand_vselect (target
, x
, perm
, nelt
);
11447 /* Try to expand a no-op permutation. */
11450 expand_vec_perm_identity (struct expand_vec_perm_d
*d
)
11452 unsigned i
, nelt
= d
->nelt
;
11454 for (i
= 0; i
< nelt
; ++i
)
11455 if (d
->perm
[i
] != i
)
11459 emit_move_insn (d
->target
, d
->op0
);
11464 /* Try to expand D via a shrp instruction. */
11467 expand_vec_perm_shrp (struct expand_vec_perm_d
*d
)
11469 unsigned i
, nelt
= d
->nelt
, shift
, mask
;
11472 /* ??? Don't force V2SFmode into the integer registers. */
11473 if (d
->vmode
== V2SFmode
)
11476 mask
= (d
->one_operand_p
? nelt
- 1 : 2 * nelt
- 1);
11478 shift
= d
->perm
[0];
11479 if (BYTES_BIG_ENDIAN
&& shift
> nelt
)
11482 for (i
= 1; i
< nelt
; ++i
)
11483 if (d
->perm
[i
] != ((shift
+ i
) & mask
))
11489 hi
= shift
< nelt
? d
->op1
: d
->op0
;
11490 lo
= shift
< nelt
? d
->op0
: d
->op1
;
11494 shift
*= GET_MODE_UNIT_SIZE (d
->vmode
) * BITS_PER_UNIT
;
11496 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11497 gcc_assert (IN_RANGE (shift
, 1, 63));
11499 /* Recall that big-endian elements are numbered starting at the top of
11500 the register. Ideally we'd have a shift-left-pair. But since we
11501 don't, convert to a shift the other direction. */
11502 if (BYTES_BIG_ENDIAN
)
11503 shift
= 64 - shift
;
11505 tmp
= gen_reg_rtx (DImode
);
11506 hi
= gen_lowpart (DImode
, hi
);
11507 lo
= gen_lowpart (DImode
, lo
);
11508 emit_insn (gen_shrp (tmp
, hi
, lo
, GEN_INT (shift
)));
11510 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, tmp
));
11514 /* Try to instantiate D in a single instruction. */
11517 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
11519 unsigned i
, nelt
= d
->nelt
;
11520 unsigned char perm2
[MAX_VECT_LEN
];
11522 /* Try single-operand selections. */
11523 if (d
->one_operand_p
)
11525 if (expand_vec_perm_identity (d
))
11527 if (expand_vselect (d
->target
, d
->op0
, d
->perm
, nelt
))
11531 /* Try two operand selections. */
11532 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
11535 /* Recognize interleave style patterns with reversed operands. */
11536 if (!d
->one_operand_p
)
11538 for (i
= 0; i
< nelt
; ++i
)
11540 unsigned e
= d
->perm
[i
];
11548 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
11552 if (expand_vec_perm_shrp (d
))
11555 /* ??? Look for deposit-like permutations where most of the result
11556 comes from one vector unchanged and the rest comes from a
11557 sequential hunk of the other vector. */
11562 /* Pattern match broadcast permutations. */
11565 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
11567 unsigned i
, elt
, nelt
= d
->nelt
;
11568 unsigned char perm2
[2];
11572 if (!d
->one_operand_p
)
11576 for (i
= 1; i
< nelt
; ++i
)
11577 if (d
->perm
[i
] != elt
)
11584 /* Implementable by interleave. */
11586 perm2
[1] = elt
+ 2;
11587 ok
= expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, 2);
11592 /* Implementable by extract + broadcast. */
11593 if (BYTES_BIG_ENDIAN
)
11595 elt
*= BITS_PER_UNIT
;
11596 temp
= gen_reg_rtx (DImode
);
11597 emit_insn (gen_extzv (temp
, gen_lowpart (DImode
, d
->op0
),
11598 GEN_INT (8), GEN_INT (elt
)));
11599 emit_insn (gen_mux1_brcst_qi (d
->target
, gen_lowpart (QImode
, temp
)));
11603 /* Should have been matched directly by vec_select. */
11605 gcc_unreachable ();
11611 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11612 two vector permutation into a single vector permutation by using
11613 an interleave operation to merge the vectors. */
11616 expand_vec_perm_interleave_2 (struct expand_vec_perm_d
*d
)
11618 struct expand_vec_perm_d dremap
, dfinal
;
11619 unsigned char remap
[2 * MAX_VECT_LEN
];
11620 unsigned contents
, i
, nelt
, nelt2
;
11621 unsigned h0
, h1
, h2
, h3
;
11625 if (d
->one_operand_p
)
11631 /* Examine from whence the elements come. */
11633 for (i
= 0; i
< nelt
; ++i
)
11634 contents
|= 1u << d
->perm
[i
];
11636 memset (remap
, 0xff, sizeof (remap
));
11639 h0
= (1u << nelt2
) - 1;
11642 h3
= h0
<< (nelt
+ nelt2
);
11644 if ((contents
& (h0
| h2
)) == contents
) /* punpck even halves */
11646 for (i
= 0; i
< nelt
; ++i
)
11648 unsigned which
= i
/ 2 + (i
& 1 ? nelt
: 0);
11650 dremap
.perm
[i
] = which
;
11653 else if ((contents
& (h1
| h3
)) == contents
) /* punpck odd halves */
11655 for (i
= 0; i
< nelt
; ++i
)
11657 unsigned which
= i
/ 2 + nelt2
+ (i
& 1 ? nelt
: 0);
11659 dremap
.perm
[i
] = which
;
11662 else if ((contents
& 0x5555) == contents
) /* mix even elements */
11664 for (i
= 0; i
< nelt
; ++i
)
11666 unsigned which
= (i
& ~1) + (i
& 1 ? nelt
: 0);
11668 dremap
.perm
[i
] = which
;
11671 else if ((contents
& 0xaaaa) == contents
) /* mix odd elements */
11673 for (i
= 0; i
< nelt
; ++i
)
11675 unsigned which
= (i
| 1) + (i
& 1 ? nelt
: 0);
11677 dremap
.perm
[i
] = which
;
11680 else if (floor_log2 (contents
) - ctz_hwi (contents
) < (int)nelt
) /* shrp */
11682 unsigned shift
= ctz_hwi (contents
);
11683 for (i
= 0; i
< nelt
; ++i
)
11685 unsigned which
= (i
+ shift
) & (2 * nelt
- 1);
11687 dremap
.perm
[i
] = which
;
11693 /* Use the remapping array set up above to move the elements from their
11694 swizzled locations into their final destinations. */
11696 for (i
= 0; i
< nelt
; ++i
)
11698 unsigned e
= remap
[d
->perm
[i
]];
11699 gcc_assert (e
< nelt
);
11700 dfinal
.perm
[i
] = e
;
11703 dfinal
.op0
= gen_raw_REG (dfinal
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
11705 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
11706 dfinal
.op1
= dfinal
.op0
;
11707 dfinal
.one_operand_p
= true;
11708 dremap
.target
= dfinal
.op0
;
11710 /* Test if the final remap can be done with a single insn. For V4HImode
11711 this *will* succeed. For V8QImode or V2SImode it may not. */
11713 ok
= expand_vec_perm_1 (&dfinal
);
11714 seq
= get_insns ();
11721 ok
= expand_vec_perm_1 (&dremap
);
11728 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11729 constant permutation via two mux2 and a merge. */
11732 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d
*d
)
11734 unsigned char perm2
[4];
11737 rtx t0
, t1
, mask
, x
;
11740 if (d
->vmode
!= V4HImode
|| d
->one_operand_p
)
11745 for (i
= 0; i
< 4; ++i
)
11747 perm2
[i
] = d
->perm
[i
] & 3;
11748 rmask
[i
] = (d
->perm
[i
] & 4 ? const0_rtx
: constm1_rtx
);
11750 mask
= gen_rtx_CONST_VECTOR (V4HImode
, gen_rtvec_v (4, rmask
));
11751 mask
= force_reg (V4HImode
, mask
);
11753 t0
= gen_reg_rtx (V4HImode
);
11754 t1
= gen_reg_rtx (V4HImode
);
11756 ok
= expand_vselect (t0
, d
->op0
, perm2
, 4);
11758 ok
= expand_vselect (t1
, d
->op1
, perm2
, 4);
11761 x
= gen_rtx_AND (V4HImode
, mask
, t0
);
11762 emit_insn (gen_rtx_SET (t0
, x
));
11764 x
= gen_rtx_NOT (V4HImode
, mask
);
11765 x
= gen_rtx_AND (V4HImode
, x
, t1
);
11766 emit_insn (gen_rtx_SET (t1
, x
));
11768 x
= gen_rtx_IOR (V4HImode
, t0
, t1
);
11769 emit_insn (gen_rtx_SET (d
->target
, x
));
11774 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11775 With all of the interface bits taken care of, perform the expansion
11776 in D and return true on success. */
11779 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
11781 if (expand_vec_perm_1 (d
))
11783 if (expand_vec_perm_broadcast (d
))
11785 if (expand_vec_perm_interleave_2 (d
))
11787 if (expand_vec_perm_v4hi_5 (d
))
11792 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
11795 ia64_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
11796 rtx target
, rtx op0
, rtx op1
,
11797 const vec_perm_indices
&sel
)
11799 if (vmode
!= op_mode
)
11802 struct expand_vec_perm_d d
;
11803 unsigned char perm
[MAX_VECT_LEN
];
11804 unsigned int i
, nelt
, which
;
11809 rtx nop0
= force_reg (vmode
, op0
);
11815 op1
= force_reg (vmode
, op1
);
11820 gcc_assert (VECTOR_MODE_P (d
.vmode
));
11821 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
11822 d
.testing_p
= !target
;
11824 gcc_assert (sel
.length () == nelt
);
11825 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
11827 for (i
= which
= 0; i
< nelt
; ++i
)
11829 unsigned int ei
= sel
[i
] & (2 * nelt
- 1);
11831 which
|= (ei
< nelt
? 1 : 2);
11842 if (d
.testing_p
|| !rtx_equal_p (d
.op0
, d
.op1
))
11844 d
.one_operand_p
= false;
11848 /* The elements of PERM do not suggest that only the first operand
11849 is used, but both operands are identical. Allow easier matching
11850 of the permutation by folding the permutation into the single
11852 for (i
= 0; i
< nelt
; ++i
)
11853 if (d
.perm
[i
] >= nelt
)
11859 d
.one_operand_p
= true;
11863 for (i
= 0; i
< nelt
; ++i
)
11866 d
.one_operand_p
= true;
11872 /* We have to go through the motions and see if we can
11873 figure out how to generate the requested permutation. */
11874 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
11875 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
11876 if (!d
.one_operand_p
)
11877 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
11880 bool ret
= ia64_expand_vec_perm_const_1 (&d
);
11886 if (ia64_expand_vec_perm_const_1 (&d
))
11889 /* If the mask says both arguments are needed, but they are the same,
11890 the above tried to expand with one_operand_p true. If that didn't
11891 work, retry with one_operand_p false, as that's what we used in _ok. */
11892 if (which
== 3 && d
.one_operand_p
)
11894 memcpy (d
.perm
, perm
, sizeof (perm
));
11895 d
.one_operand_p
= false;
11896 return ia64_expand_vec_perm_const_1 (&d
);
11903 ia64_expand_vec_setv2sf (rtx operands
[3])
11905 struct expand_vec_perm_d d
;
11906 unsigned int which
;
11909 d
.target
= operands
[0];
11910 d
.op0
= operands
[0];
11911 d
.op1
= gen_reg_rtx (V2SFmode
);
11912 d
.vmode
= V2SFmode
;
11914 d
.one_operand_p
= false;
11915 d
.testing_p
= false;
11917 which
= INTVAL (operands
[2]);
11918 gcc_assert (which
<= 1);
11919 d
.perm
[0] = 1 - which
;
11920 d
.perm
[1] = which
+ 2;
11922 emit_insn (gen_fpack (d
.op1
, operands
[1], CONST0_RTX (SFmode
)));
11924 ok
= ia64_expand_vec_perm_const_1 (&d
);
11929 ia64_expand_vec_perm_even_odd (rtx target
, rtx op0
, rtx op1
, int odd
)
11931 struct expand_vec_perm_d d
;
11932 machine_mode vmode
= GET_MODE (target
);
11933 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
11941 d
.one_operand_p
= false;
11942 d
.testing_p
= false;
11944 for (i
= 0; i
< nelt
; ++i
)
11945 d
.perm
[i
] = i
* 2 + odd
;
11947 ok
= ia64_expand_vec_perm_const_1 (&d
);
11951 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
11953 In BR regs, we can't change the DImode at all.
11954 In FP regs, we can't change FP values to integer values and vice versa,
11955 but we can change e.g. DImode to SImode, and V2SFmode into DImode. */
11958 ia64_can_change_mode_class (machine_mode from
, machine_mode to
,
11959 reg_class_t rclass
)
11961 if (reg_classes_intersect_p (rclass
, BR_REGS
))
11963 if (SCALAR_FLOAT_MODE_P (from
) != SCALAR_FLOAT_MODE_P (to
))
11964 return !reg_classes_intersect_p (rclass
, FR_REGS
);
11968 #include "gt-ia64.h"