Daily bump.
[official-gcc.git] / gcc / config / ia64 / ia64.cc
blob91b7310b65606c42f5daf4de903eaf08716ae5b2
1 /* Definitions of target machine for GNU compiler.
2 Copyright (C) 1999-2025 Free Software Foundation, Inc.
3 Contributed by James E. Wilson <wilson@cygnus.com> and
4 David Mosberger <davidm@hpl.hp.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "memmodel.h"
32 #include "cfghooks.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "attribs.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "libfuncs.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "gimplify.h"
58 #include "intl.h"
59 #include "debug.h"
60 #include "dbgcnt.h"
61 #include "tm-constrs.h"
62 #include "sel-sched.h"
63 #include "reload.h"
64 #include "opts.h"
65 #include "dumpfile.h"
66 #include "builtins.h"
68 /* This file should be included last. */
69 #include "target-def.h"
71 /* This is used for communication between ASM_OUTPUT_LABEL and
72 ASM_OUTPUT_LABELREF. */
73 int ia64_asm_output_label = 0;
75 /* Register names for ia64_expand_prologue. */
76 static const char * const ia64_reg_numbers[96] =
77 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
78 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
79 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
80 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
81 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
82 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
83 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
84 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
85 "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
86 "r104","r105","r106","r107","r108","r109","r110","r111",
87 "r112","r113","r114","r115","r116","r117","r118","r119",
88 "r120","r121","r122","r123","r124","r125","r126","r127"};
90 /* ??? These strings could be shared with REGISTER_NAMES. */
91 static const char * const ia64_input_reg_names[8] =
92 { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
94 /* ??? These strings could be shared with REGISTER_NAMES. */
95 static const char * const ia64_local_reg_names[80] =
96 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
97 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
98 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
99 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
100 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
101 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
102 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
103 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
104 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
105 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
107 /* ??? These strings could be shared with REGISTER_NAMES. */
108 static const char * const ia64_output_reg_names[8] =
109 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
111 /* Variables which are this size or smaller are put in the sdata/sbss
112 sections. */
114 unsigned int ia64_section_threshold;
116 /* The following variable is used by the DFA insn scheduler. The value is
117 TRUE if we do insn bundling instead of insn scheduling. */
118 int bundling_p = 0;
120 enum ia64_frame_regs
122 reg_fp,
123 reg_save_b0,
124 reg_save_pr,
125 reg_save_ar_pfs,
126 reg_save_ar_unat,
127 reg_save_ar_lc,
128 reg_save_gp,
129 number_of_ia64_frame_regs
132 /* Structure to be filled in by ia64_compute_frame_size with register
133 save masks and offsets for the current function. */
135 struct ia64_frame_info
137 HOST_WIDE_INT total_size; /* size of the stack frame, not including
138 the caller's scratch area. */
139 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
140 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
141 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
142 HARD_REG_SET mask; /* mask of saved registers. */
143 unsigned int gr_used_mask; /* mask of registers in use as gr spill
144 registers or long-term scratches. */
145 int n_spilled; /* number of spilled registers. */
146 int r[number_of_ia64_frame_regs]; /* Frame related registers. */
147 int n_input_regs; /* number of input registers used. */
148 int n_local_regs; /* number of local registers used. */
149 int n_output_regs; /* number of output registers used. */
150 int n_rotate_regs; /* number of rotating registers used. */
152 char need_regstk; /* true if a .regstk directive needed. */
153 char initialized; /* true if the data is finalized. */
156 /* Current frame information calculated by ia64_compute_frame_size. */
157 static struct ia64_frame_info current_frame_info;
158 /* The actual registers that are emitted. */
159 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
161 static int ia64_first_cycle_multipass_dfa_lookahead (void);
162 static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
163 static void ia64_init_dfa_pre_cycle_insn (void);
164 static rtx ia64_dfa_pre_cycle_insn (void);
165 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
166 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
167 static void ia64_h_i_d_extended (void);
168 static void * ia64_alloc_sched_context (void);
169 static void ia64_init_sched_context (void *, bool);
170 static void ia64_set_sched_context (void *);
171 static void ia64_clear_sched_context (void *);
172 static void ia64_free_sched_context (void *);
173 static int ia64_mode_to_int (machine_mode);
174 static void ia64_set_sched_flags (spec_info_t);
175 static ds_t ia64_get_insn_spec_ds (rtx_insn *);
176 static ds_t ia64_get_insn_checked_ds (rtx_insn *);
177 static bool ia64_skip_rtx_p (const_rtx);
178 static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
179 static bool ia64_needs_block_p (ds_t);
180 static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
181 static int ia64_spec_check_p (rtx);
182 static int ia64_spec_check_src_p (rtx);
183 static rtx gen_tls_get_addr (void);
184 static rtx gen_thread_pointer (void);
185 static int find_gr_spill (enum ia64_frame_regs, int);
186 static int next_scratch_gr_reg (void);
187 static void mark_reg_gr_used_mask (rtx, void *);
188 static void ia64_compute_frame_size (HOST_WIDE_INT);
189 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
190 static void finish_spill_pointers (void);
191 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
192 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
193 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
194 static rtx gen_movdi_x (rtx, rtx, rtx);
195 static rtx gen_fr_spill_x (rtx, rtx, rtx);
196 static rtx gen_fr_restore_x (rtx, rtx, rtx);
198 static void ia64_option_override (void);
199 static bool ia64_can_eliminate (const int, const int);
200 static machine_mode hfa_element_mode (const_tree, bool);
201 static void ia64_setup_incoming_varargs (cumulative_args_t,
202 const function_arg_info &,
203 int *, int);
204 static int ia64_arg_partial_bytes (cumulative_args_t,
205 const function_arg_info &);
206 static rtx ia64_function_arg (cumulative_args_t, const function_arg_info &);
207 static rtx ia64_function_incoming_arg (cumulative_args_t,
208 const function_arg_info &);
209 static void ia64_function_arg_advance (cumulative_args_t,
210 const function_arg_info &);
211 static pad_direction ia64_function_arg_padding (machine_mode, const_tree);
212 static unsigned int ia64_function_arg_boundary (machine_mode,
213 const_tree);
214 static bool ia64_function_ok_for_sibcall (tree, tree);
215 static bool ia64_return_in_memory (const_tree, const_tree);
216 static rtx ia64_function_value (const_tree, const_tree, bool);
217 static rtx ia64_libcall_value (machine_mode, const_rtx);
218 static bool ia64_function_value_regno_p (const unsigned int);
219 static int ia64_register_move_cost (machine_mode, reg_class_t,
220 reg_class_t);
221 static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
222 bool);
223 static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool);
224 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
225 static void fix_range (const char *);
226 static struct machine_function * ia64_init_machine_status (void);
227 static void emit_insn_group_barriers (FILE *);
228 static void emit_all_insn_group_barriers (FILE *);
229 static void final_emit_insn_group_barriers (FILE *);
230 static void emit_predicate_relation_info (void);
231 static void ia64_reorg (void);
232 static bool ia64_in_small_data_p (const_tree);
233 static void process_epilogue (FILE *, rtx, bool, bool);
235 static bool ia64_assemble_integer (rtx, unsigned int, int);
236 static void ia64_output_function_prologue (FILE *);
237 static void ia64_output_function_epilogue (FILE *);
238 static void ia64_output_function_end_prologue (FILE *);
240 static void ia64_print_operand (FILE *, rtx, int);
241 static void ia64_print_operand_address (FILE *, machine_mode, rtx);
242 static bool ia64_print_operand_punct_valid_p (unsigned char code);
244 static int ia64_issue_rate (void);
245 static int ia64_adjust_cost (rtx_insn *, int, rtx_insn *, int, dw_t);
246 static void ia64_sched_init (FILE *, int, int);
247 static void ia64_sched_init_global (FILE *, int, int);
248 static void ia64_sched_finish_global (FILE *, int);
249 static void ia64_sched_finish (FILE *, int);
250 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
251 static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
252 static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
253 static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
255 static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
256 static void ia64_asm_emit_except_personality (rtx);
257 static void ia64_asm_init_sections (void);
259 static enum unwind_info_type ia64_debug_unwind_info (void);
261 static struct bundle_state *get_free_bundle_state (void);
262 static void free_bundle_state (struct bundle_state *);
263 static void initiate_bundle_states (void);
264 static void finish_bundle_states (void);
265 static int insert_bundle_state (struct bundle_state *);
266 static void initiate_bundle_state_table (void);
267 static void finish_bundle_state_table (void);
268 static int try_issue_nops (struct bundle_state *, int);
269 static int try_issue_insn (struct bundle_state *, rtx);
270 static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
271 int, int);
272 static int get_max_pos (state_t);
273 static int get_template (state_t, int);
275 static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
276 static bool important_for_bundling_p (rtx_insn *);
277 static bool unknown_for_bundling_p (rtx_insn *);
278 static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
280 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
281 HOST_WIDE_INT, tree);
282 static void ia64_file_start (void);
283 static void ia64_globalize_decl_name (FILE *, tree);
285 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
286 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
287 static section *ia64_select_rtx_section (machine_mode, rtx,
288 unsigned HOST_WIDE_INT);
289 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
290 ATTRIBUTE_UNUSED;
291 static unsigned int ia64_section_type_flags (tree, const char *, int);
292 static void ia64_init_libfuncs (void)
293 ATTRIBUTE_UNUSED;
294 static void ia64_hpux_init_libfuncs (void)
295 ATTRIBUTE_UNUSED;
296 static void ia64_sysv4_init_libfuncs (void)
297 ATTRIBUTE_UNUSED;
298 static void ia64_vms_init_libfuncs (void)
299 ATTRIBUTE_UNUSED;
300 static void ia64_soft_fp_init_libfuncs (void)
301 ATTRIBUTE_UNUSED;
302 static bool ia64_vms_valid_pointer_mode (scalar_int_mode mode)
303 ATTRIBUTE_UNUSED;
304 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
305 ATTRIBUTE_UNUSED;
307 static bool ia64_attribute_takes_identifier_p (const_tree);
308 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
309 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
310 static void ia64_encode_section_info (tree, rtx, int);
311 static rtx ia64_struct_value_rtx (tree, int);
312 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
313 static bool ia64_scalar_mode_supported_p (scalar_mode mode);
314 static bool ia64_vector_mode_supported_p (machine_mode mode);
315 static bool ia64_legitimate_constant_p (machine_mode, rtx);
316 static bool ia64_legitimate_address_p (machine_mode, rtx, bool,
317 code_helper = ERROR_MARK);
318 static bool ia64_cannot_force_const_mem (machine_mode, rtx);
319 static const char *ia64_mangle_type (const_tree);
320 static const char *ia64_invalid_conversion (const_tree, const_tree);
321 static const char *ia64_invalid_unary_op (int, const_tree);
322 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
323 static machine_mode ia64_c_mode_for_suffix (char);
324 static machine_mode ia64_c_mode_for_floating_type (enum tree_index);
325 static void ia64_trampoline_init (rtx, tree, rtx);
326 static void ia64_override_options_after_change (void);
327 static bool ia64_member_type_forces_blk (const_tree, machine_mode);
329 static tree ia64_fold_builtin (tree, int, tree *, bool);
330 static tree ia64_builtin_decl (unsigned, bool);
332 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
333 static fixed_size_mode ia64_get_reg_raw_mode (int regno);
334 static section * ia64_hpux_function_section (tree, enum node_frequency,
335 bool, bool);
337 static bool ia64_vectorize_vec_perm_const (machine_mode, machine_mode, rtx,
338 rtx, rtx, const vec_perm_indices &);
340 static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode);
341 static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode);
342 static bool ia64_modes_tieable_p (machine_mode, machine_mode);
343 static bool ia64_can_change_mode_class (machine_mode, machine_mode,
344 reg_class_t);
346 #define MAX_VECT_LEN 8
348 struct expand_vec_perm_d
350 rtx target, op0, op1;
351 unsigned char perm[MAX_VECT_LEN];
352 machine_mode vmode;
353 unsigned char nelt;
354 bool one_operand_p;
355 bool testing_p;
358 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
361 /* Table of valid machine attributes. */
362 static const attribute_spec ia64_gnu_attributes[] =
364 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
365 affects_type_identity, handler, exclude } */
366 { "syscall_linkage", 0, 0, false, true, true, false, NULL, NULL },
367 { "model", 1, 1, true, false, false, false,
368 ia64_handle_model_attribute, NULL },
369 #if TARGET_ABI_OPEN_VMS
370 { "common_object", 1, 1, true, false, false, false,
371 ia64_vms_common_object_attribute, NULL },
372 #endif
373 { "version_id", 1, 1, true, false, false, false,
374 ia64_handle_version_id_attribute, NULL }
377 static const scoped_attribute_specs ia64_gnu_attribute_table =
379 "gnu", { ia64_gnu_attributes }
382 static const scoped_attribute_specs *const ia64_attribute_table[] =
384 &ia64_gnu_attribute_table
387 /* Initialize the GCC target structure. */
388 #undef TARGET_ATTRIBUTE_TABLE
389 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
391 #undef TARGET_INIT_BUILTINS
392 #define TARGET_INIT_BUILTINS ia64_init_builtins
394 #undef TARGET_FOLD_BUILTIN
395 #define TARGET_FOLD_BUILTIN ia64_fold_builtin
397 #undef TARGET_EXPAND_BUILTIN
398 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
400 #undef TARGET_BUILTIN_DECL
401 #define TARGET_BUILTIN_DECL ia64_builtin_decl
403 #undef TARGET_ASM_BYTE_OP
404 #define TARGET_ASM_BYTE_OP "\tdata1\t"
405 #undef TARGET_ASM_ALIGNED_HI_OP
406 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
407 #undef TARGET_ASM_ALIGNED_SI_OP
408 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
409 #undef TARGET_ASM_ALIGNED_DI_OP
410 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
411 #undef TARGET_ASM_UNALIGNED_HI_OP
412 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
413 #undef TARGET_ASM_UNALIGNED_SI_OP
414 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
415 #undef TARGET_ASM_UNALIGNED_DI_OP
416 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
417 #undef TARGET_ASM_INTEGER
418 #define TARGET_ASM_INTEGER ia64_assemble_integer
420 #undef TARGET_OPTION_OVERRIDE
421 #define TARGET_OPTION_OVERRIDE ia64_option_override
423 #undef TARGET_ASM_FUNCTION_PROLOGUE
424 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
425 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
426 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
427 #undef TARGET_ASM_FUNCTION_EPILOGUE
428 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
430 #undef TARGET_PRINT_OPERAND
431 #define TARGET_PRINT_OPERAND ia64_print_operand
432 #undef TARGET_PRINT_OPERAND_ADDRESS
433 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
434 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
435 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
437 #undef TARGET_IN_SMALL_DATA_P
438 #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
440 #undef TARGET_SCHED_ADJUST_COST
441 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
442 #undef TARGET_SCHED_ISSUE_RATE
443 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
444 #undef TARGET_SCHED_VARIABLE_ISSUE
445 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
446 #undef TARGET_SCHED_INIT
447 #define TARGET_SCHED_INIT ia64_sched_init
448 #undef TARGET_SCHED_FINISH
449 #define TARGET_SCHED_FINISH ia64_sched_finish
450 #undef TARGET_SCHED_INIT_GLOBAL
451 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
452 #undef TARGET_SCHED_FINISH_GLOBAL
453 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
454 #undef TARGET_SCHED_REORDER
455 #define TARGET_SCHED_REORDER ia64_sched_reorder
456 #undef TARGET_SCHED_REORDER2
457 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
459 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
460 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
462 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
463 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
465 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
466 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
467 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
468 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
470 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
471 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
472 ia64_first_cycle_multipass_dfa_lookahead_guard
474 #undef TARGET_SCHED_DFA_NEW_CYCLE
475 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
477 #undef TARGET_SCHED_H_I_D_EXTENDED
478 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
480 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
481 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
483 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
484 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
486 #undef TARGET_SCHED_SET_SCHED_CONTEXT
487 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
489 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
490 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
492 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
493 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
495 #undef TARGET_SCHED_SET_SCHED_FLAGS
496 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
498 #undef TARGET_SCHED_GET_INSN_SPEC_DS
499 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
501 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
502 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
504 #undef TARGET_SCHED_SPECULATE_INSN
505 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
507 #undef TARGET_SCHED_NEEDS_BLOCK_P
508 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
510 #undef TARGET_SCHED_GEN_SPEC_CHECK
511 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
513 #undef TARGET_SCHED_SKIP_RTX_P
514 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
516 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
517 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
518 #undef TARGET_ARG_PARTIAL_BYTES
519 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
520 #undef TARGET_FUNCTION_ARG
521 #define TARGET_FUNCTION_ARG ia64_function_arg
522 #undef TARGET_FUNCTION_INCOMING_ARG
523 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
524 #undef TARGET_FUNCTION_ARG_ADVANCE
525 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
526 #undef TARGET_FUNCTION_ARG_PADDING
527 #define TARGET_FUNCTION_ARG_PADDING ia64_function_arg_padding
528 #undef TARGET_FUNCTION_ARG_BOUNDARY
529 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
531 #undef TARGET_ASM_OUTPUT_MI_THUNK
532 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
533 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
534 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
536 #undef TARGET_ASM_FILE_START
537 #define TARGET_ASM_FILE_START ia64_file_start
539 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
540 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
542 #undef TARGET_REGISTER_MOVE_COST
543 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
544 #undef TARGET_MEMORY_MOVE_COST
545 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
546 #undef TARGET_RTX_COSTS
547 #define TARGET_RTX_COSTS ia64_rtx_costs
548 #undef TARGET_ADDRESS_COST
549 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
551 #undef TARGET_UNSPEC_MAY_TRAP_P
552 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
554 #undef TARGET_MACHINE_DEPENDENT_REORG
555 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
557 #undef TARGET_ENCODE_SECTION_INFO
558 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
560 #undef TARGET_SECTION_TYPE_FLAGS
561 #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
563 #ifdef HAVE_AS_TLS
564 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
565 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
566 #endif
568 /* ??? Investigate. */
569 #if 0
570 #undef TARGET_PROMOTE_PROTOTYPES
571 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
572 #endif
574 #undef TARGET_FUNCTION_VALUE
575 #define TARGET_FUNCTION_VALUE ia64_function_value
576 #undef TARGET_LIBCALL_VALUE
577 #define TARGET_LIBCALL_VALUE ia64_libcall_value
578 #undef TARGET_FUNCTION_VALUE_REGNO_P
579 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
581 #undef TARGET_STRUCT_VALUE_RTX
582 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
583 #undef TARGET_RETURN_IN_MEMORY
584 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
585 #undef TARGET_SETUP_INCOMING_VARARGS
586 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
587 #undef TARGET_STRICT_ARGUMENT_NAMING
588 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
589 #undef TARGET_MUST_PASS_IN_STACK
590 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
591 #undef TARGET_GET_RAW_RESULT_MODE
592 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
593 #undef TARGET_GET_RAW_ARG_MODE
594 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
596 #undef TARGET_MEMBER_TYPE_FORCES_BLK
597 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
599 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
600 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
602 #undef TARGET_ASM_UNWIND_EMIT
603 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
604 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
605 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
606 #undef TARGET_ASM_INIT_SECTIONS
607 #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
609 #undef TARGET_DEBUG_UNWIND_INFO
610 #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
612 #undef TARGET_SCALAR_MODE_SUPPORTED_P
613 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
614 #undef TARGET_VECTOR_MODE_SUPPORTED_P
615 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
617 #undef TARGET_LEGITIMATE_CONSTANT_P
618 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
619 #undef TARGET_LEGITIMATE_ADDRESS_P
620 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
622 #undef TARGET_CANNOT_FORCE_CONST_MEM
623 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
625 #undef TARGET_MANGLE_TYPE
626 #define TARGET_MANGLE_TYPE ia64_mangle_type
628 #undef TARGET_INVALID_CONVERSION
629 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
630 #undef TARGET_INVALID_UNARY_OP
631 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
632 #undef TARGET_INVALID_BINARY_OP
633 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
635 #undef TARGET_C_MODE_FOR_SUFFIX
636 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
638 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
639 #define TARGET_C_MODE_FOR_FLOATING_TYPE ia64_c_mode_for_floating_type
641 #undef TARGET_CAN_ELIMINATE
642 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
644 #undef TARGET_TRAMPOLINE_INIT
645 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
647 #undef TARGET_CAN_USE_DOLOOP_P
648 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
649 #undef TARGET_INVALID_WITHIN_DOLOOP
650 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
652 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
653 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
655 #undef TARGET_PREFERRED_RELOAD_CLASS
656 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
658 #undef TARGET_DELAY_SCHED2
659 #define TARGET_DELAY_SCHED2 true
661 /* Variable tracking should be run after all optimizations which
662 change order of insns. It also needs a valid CFG. */
663 #undef TARGET_DELAY_VARTRACK
664 #define TARGET_DELAY_VARTRACK true
666 #undef TARGET_VECTORIZE_VEC_PERM_CONST
667 #define TARGET_VECTORIZE_VEC_PERM_CONST ia64_vectorize_vec_perm_const
669 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
670 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
672 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
673 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
675 #undef TARGET_HARD_REGNO_NREGS
676 #define TARGET_HARD_REGNO_NREGS ia64_hard_regno_nregs
677 #undef TARGET_HARD_REGNO_MODE_OK
678 #define TARGET_HARD_REGNO_MODE_OK ia64_hard_regno_mode_ok
680 #undef TARGET_MODES_TIEABLE_P
681 #define TARGET_MODES_TIEABLE_P ia64_modes_tieable_p
683 #undef TARGET_CAN_CHANGE_MODE_CLASS
684 #define TARGET_CAN_CHANGE_MODE_CLASS ia64_can_change_mode_class
686 #undef TARGET_CONSTANT_ALIGNMENT
687 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
689 #undef TARGET_DOCUMENTATION_NAME
690 #define TARGET_DOCUMENTATION_NAME "IA-64"
692 struct gcc_target targetm = TARGET_INITIALIZER;
694 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
695 identifier as an argument, so the front end shouldn't look it up. */
697 static bool
698 ia64_attribute_takes_identifier_p (const_tree attr_id)
700 if (is_attribute_p ("model", attr_id))
701 return true;
702 #if TARGET_ABI_OPEN_VMS
703 if (is_attribute_p ("common_object", attr_id))
704 return true;
705 #endif
706 return false;
709 typedef enum
711 ADDR_AREA_NORMAL, /* normal address area */
712 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
714 ia64_addr_area;
716 static GTY(()) tree small_ident1;
717 static GTY(()) tree small_ident2;
719 static void
720 init_idents (void)
722 if (small_ident1 == 0)
724 small_ident1 = get_identifier ("small");
725 small_ident2 = get_identifier ("__small__");
729 /* Retrieve the address area that has been chosen for the given decl. */
731 static ia64_addr_area
732 ia64_get_addr_area (tree decl)
734 tree model_attr;
736 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
737 if (model_attr)
739 tree id;
741 init_idents ();
742 id = TREE_VALUE (TREE_VALUE (model_attr));
743 if (id == small_ident1 || id == small_ident2)
744 return ADDR_AREA_SMALL;
746 return ADDR_AREA_NORMAL;
749 static tree
750 ia64_handle_model_attribute (tree *node, tree name, tree args,
751 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
753 ia64_addr_area addr_area = ADDR_AREA_NORMAL;
754 ia64_addr_area area;
755 tree arg, decl = *node;
757 init_idents ();
758 arg = TREE_VALUE (args);
759 if (arg == small_ident1 || arg == small_ident2)
761 addr_area = ADDR_AREA_SMALL;
763 else
765 warning (OPT_Wattributes, "invalid argument of %qE attribute",
766 name);
767 *no_add_attrs = true;
770 switch (TREE_CODE (decl))
772 case VAR_DECL:
773 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
774 == FUNCTION_DECL)
775 && !TREE_STATIC (decl))
777 error_at (DECL_SOURCE_LOCATION (decl),
778 "an address area attribute cannot be specified for "
779 "local variables");
780 *no_add_attrs = true;
782 area = ia64_get_addr_area (decl);
783 if (area != ADDR_AREA_NORMAL && addr_area != area)
785 error ("address area of %q+D conflicts with previous "
786 "declaration", decl);
787 *no_add_attrs = true;
789 break;
791 case FUNCTION_DECL:
792 error_at (DECL_SOURCE_LOCATION (decl),
793 "address area attribute cannot be specified for "
794 "functions");
795 *no_add_attrs = true;
796 break;
798 default:
799 warning (OPT_Wattributes, "%qE attribute ignored",
800 name);
801 *no_add_attrs = true;
802 break;
805 return NULL_TREE;
808 /* Part of the low level implementation of DEC Ada pragma Common_Object which
809 enables the shared use of variables stored in overlaid linker areas
810 corresponding to the use of Fortran COMMON. */
812 static tree
813 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
814 int flags ATTRIBUTE_UNUSED,
815 bool *no_add_attrs)
817 tree decl = *node;
818 tree id;
820 gcc_assert (DECL_P (decl));
822 DECL_COMMON (decl) = 1;
823 id = TREE_VALUE (args);
824 if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
826 error ("%qE attribute requires a string constant argument", name);
827 *no_add_attrs = true;
828 return NULL_TREE;
830 return NULL_TREE;
833 /* Part of the low level implementation of DEC Ada pragma Common_Object. */
835 void
836 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
837 unsigned HOST_WIDE_INT size,
838 unsigned int align)
840 tree attr = DECL_ATTRIBUTES (decl);
842 if (attr)
843 attr = lookup_attribute ("common_object", attr);
844 if (attr)
846 tree id = TREE_VALUE (TREE_VALUE (attr));
847 const char *name;
849 if (TREE_CODE (id) == IDENTIFIER_NODE)
850 name = IDENTIFIER_POINTER (id);
851 else if (TREE_CODE (id) == STRING_CST)
852 name = TREE_STRING_POINTER (id);
853 else
854 abort ();
856 fprintf (file, "\t.vms_common\t\"%s\",", name);
858 else
859 fprintf (file, "%s", COMMON_ASM_OP);
861 /* Code from elfos.h. */
862 assemble_name (file, name);
863 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
864 size, align / BITS_PER_UNIT);
866 fputc ('\n', file);
869 static void
870 ia64_encode_addr_area (tree decl, rtx symbol)
872 int flags;
874 flags = SYMBOL_REF_FLAGS (symbol);
875 switch (ia64_get_addr_area (decl))
877 case ADDR_AREA_NORMAL: break;
878 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
879 default: gcc_unreachable ();
881 SYMBOL_REF_FLAGS (symbol) = flags;
884 static void
885 ia64_encode_section_info (tree decl, rtx rtl, int first)
887 default_encode_section_info (decl, rtl, first);
889 /* Careful not to prod global register variables. */
890 if (VAR_P (decl)
891 && GET_CODE (DECL_RTL (decl)) == MEM
892 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
893 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
894 ia64_encode_addr_area (decl, XEXP (rtl, 0));
897 /* Return 1 if the operands of a move are ok. */
900 ia64_move_ok (rtx dst, rtx src)
902 /* If we're under init_recog_no_volatile, we'll not be able to use
903 memory_operand. So check the code directly and don't worry about
904 the validity of the underlying address, which should have been
905 checked elsewhere anyway. */
906 if (GET_CODE (dst) != MEM)
907 return 1;
908 if (GET_CODE (src) == MEM)
909 return 0;
910 if (register_operand (src, VOIDmode))
911 return 1;
913 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
914 if (INTEGRAL_MODE_P (GET_MODE (dst)))
915 return src == const0_rtx;
916 else
917 return satisfies_constraint_G (src);
920 /* Return 1 if the operands are ok for a floating point load pair. */
923 ia64_load_pair_ok (rtx dst, rtx src)
925 /* ??? There is a thinko in the implementation of the "x" constraint and the
926 FP_REGS class. The constraint will also reject (reg f30:TI) so we must
927 also return false for it. */
928 if (GET_CODE (dst) != REG
929 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
930 return 0;
931 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
932 return 0;
933 switch (GET_CODE (XEXP (src, 0)))
935 case REG:
936 case POST_INC:
937 break;
938 case POST_DEC:
939 return 0;
940 case POST_MODIFY:
942 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
944 if (GET_CODE (adjust) != CONST_INT
945 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
946 return 0;
948 break;
949 default:
950 abort ();
952 return 1;
956 addp4_optimize_ok (rtx op1, rtx op2)
958 return (basereg_operand (op1, GET_MODE(op1)) !=
959 basereg_operand (op2, GET_MODE(op2)));
962 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
963 Return the length of the field, or <= 0 on failure. */
966 ia64_depz_field_mask (rtx rop, rtx rshift)
968 unsigned HOST_WIDE_INT op = INTVAL (rop);
969 unsigned HOST_WIDE_INT shift = INTVAL (rshift);
971 /* Get rid of the zero bits we're shifting in. */
972 op >>= shift;
974 /* We must now have a solid block of 1's at bit 0. */
975 return exact_log2 (op + 1);
978 /* Return the TLS model to use for ADDR. */
980 static enum tls_model
981 tls_symbolic_operand_type (rtx addr)
983 enum tls_model tls_kind = TLS_MODEL_NONE;
985 if (GET_CODE (addr) == CONST)
987 if (GET_CODE (XEXP (addr, 0)) == PLUS
988 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
989 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
991 else if (GET_CODE (addr) == SYMBOL_REF)
992 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
994 return tls_kind;
997 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
998 as a base register. */
1000 static inline bool
1001 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
1003 if (strict
1004 && REGNO_OK_FOR_BASE_P (REGNO (reg)))
1005 return true;
1006 else if (!strict
1007 && (GENERAL_REGNO_P (REGNO (reg))
1008 || !HARD_REGISTER_P (reg)))
1009 return true;
1010 else
1011 return false;
1014 static bool
1015 ia64_legitimate_address_reg (const_rtx reg, bool strict)
1017 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
1018 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
1019 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
1020 return true;
1022 return false;
1025 static bool
1026 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
1028 if (GET_CODE (disp) == PLUS
1029 && rtx_equal_p (reg, XEXP (disp, 0))
1030 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
1031 || (CONST_INT_P (XEXP (disp, 1))
1032 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
1033 return true;
1035 return false;
1038 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
1040 static bool
1041 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x,
1042 bool strict, code_helper)
1044 if (ia64_legitimate_address_reg (x, strict))
1045 return true;
1046 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1047 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1048 && XEXP (x, 0) != arg_pointer_rtx)
1049 return true;
1050 else if (GET_CODE (x) == POST_MODIFY
1051 && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1052 && XEXP (x, 0) != arg_pointer_rtx
1053 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1054 return true;
1055 else
1056 return false;
1059 /* Return true if X is a constant that is valid for some immediate
1060 field in an instruction. */
1062 static bool
1063 ia64_legitimate_constant_p (machine_mode mode, rtx x)
1065 switch (GET_CODE (x))
1067 case CONST_INT:
1068 case LABEL_REF:
1069 return true;
1071 case CONST_DOUBLE:
1072 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1073 return true;
1074 return satisfies_constraint_G (x);
1076 case CONST:
1077 case SYMBOL_REF:
1078 /* ??? Short term workaround for PR 28490. We must make the code here
1079 match the code in ia64_expand_move and move_operand, even though they
1080 are both technically wrong. */
1081 if (tls_symbolic_operand_type (x) == 0)
1083 HOST_WIDE_INT addend = 0;
1084 rtx op = x;
1086 if (GET_CODE (op) == CONST
1087 && GET_CODE (XEXP (op, 0)) == PLUS
1088 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1090 addend = INTVAL (XEXP (XEXP (op, 0), 1));
1091 op = XEXP (XEXP (op, 0), 0);
1094 if (any_offset_symbol_operand (op, mode)
1095 || function_operand (op, mode))
1096 return true;
1097 if (aligned_offset_symbol_operand (op, mode))
1098 return (addend & 0x3fff) == 0;
1099 return false;
1101 return false;
1103 case CONST_VECTOR:
1104 if (mode == V2SFmode)
1105 return satisfies_constraint_Y (x);
1107 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1108 && GET_MODE_SIZE (mode) <= 8);
1110 default:
1111 return false;
1115 /* Don't allow TLS addresses to get spilled to memory. */
1117 static bool
1118 ia64_cannot_force_const_mem (machine_mode mode, rtx x)
1120 if (mode == RFmode)
1121 return true;
1122 return tls_symbolic_operand_type (x) != 0;
1125 /* Expand a symbolic constant load. */
1127 bool
1128 ia64_expand_load_address (rtx dest, rtx src)
1130 gcc_assert (GET_CODE (dest) == REG);
1132 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids
1133 having to pointer-extend the value afterward. Other forms of address
1134 computation below are also more natural to compute as 64-bit quantities.
1135 If we've been given an SImode destination register, change it. */
1136 if (GET_MODE (dest) != Pmode)
1137 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1138 byte_lowpart_offset (Pmode, GET_MODE (dest)));
1140 if (TARGET_NO_PIC)
1141 return false;
1142 if (small_addr_symbolic_operand (src, VOIDmode))
1143 return false;
1145 if (TARGET_AUTO_PIC)
1146 emit_insn (gen_load_gprel64 (dest, src));
1147 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1148 emit_insn (gen_load_fptr (dest, src));
1149 else if (sdata_symbolic_operand (src, VOIDmode))
1150 emit_insn (gen_load_gprel (dest, src));
1151 else if (local_symbolic_operand64 (src, VOIDmode))
1153 /* We want to use @gprel rather than @ltoff relocations for local
1154 symbols:
1155 - @gprel does not require dynamic linker
1156 - and does not use .sdata section
1157 https://gcc.gnu.org/bugzilla/60465 */
1158 emit_insn (gen_load_gprel64 (dest, src));
1160 else
1162 HOST_WIDE_INT addend = 0;
1163 rtx tmp;
1165 /* We did split constant offsets in ia64_expand_move, and we did try
1166 to keep them split in move_operand, but we also allowed reload to
1167 rematerialize arbitrary constants rather than spill the value to
1168 the stack and reload it. So we have to be prepared here to split
1169 them apart again. */
1170 if (GET_CODE (src) == CONST)
1172 HOST_WIDE_INT hi, lo;
1174 hi = INTVAL (XEXP (XEXP (src, 0), 1));
1175 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1176 hi = hi - lo;
1178 if (lo != 0)
1180 addend = lo;
1181 src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
1185 tmp = gen_rtx_HIGH (Pmode, src);
1186 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1187 emit_insn (gen_rtx_SET (dest, tmp));
1189 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1190 emit_insn (gen_rtx_SET (dest, tmp));
1192 if (addend)
1194 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1195 emit_insn (gen_rtx_SET (dest, tmp));
1199 return true;
1202 static GTY(()) rtx gen_tls_tga;
1203 static rtx
1204 gen_tls_get_addr (void)
1206 if (!gen_tls_tga)
1207 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1208 return gen_tls_tga;
1211 static GTY(()) rtx thread_pointer_rtx;
1212 static rtx
1213 gen_thread_pointer (void)
1215 if (!thread_pointer_rtx)
1216 thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1217 return thread_pointer_rtx;
1220 static rtx
1221 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1222 rtx orig_op1, HOST_WIDE_INT addend)
1224 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1225 rtx_insn *insns;
1226 rtx orig_op0 = op0;
1227 HOST_WIDE_INT addend_lo, addend_hi;
1229 switch (tls_kind)
1231 case TLS_MODEL_GLOBAL_DYNAMIC:
1232 start_sequence ();
1234 tga_op1 = gen_reg_rtx (Pmode);
1235 emit_insn (gen_load_dtpmod (tga_op1, op1));
1237 tga_op2 = gen_reg_rtx (Pmode);
1238 emit_insn (gen_load_dtprel (tga_op2, op1));
1240 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1241 LCT_CONST, Pmode,
1242 tga_op1, Pmode, tga_op2, Pmode);
1244 insns = get_insns ();
1245 end_sequence ();
1247 if (GET_MODE (op0) != Pmode)
1248 op0 = tga_ret;
1249 emit_libcall_block (insns, op0, tga_ret, op1);
1250 break;
1252 case TLS_MODEL_LOCAL_DYNAMIC:
1253 /* ??? This isn't the completely proper way to do local-dynamic
1254 If the call to __tls_get_addr is used only by a single symbol,
1255 then we should (somehow) move the dtprel to the second arg
1256 to avoid the extra add. */
1257 start_sequence ();
1259 tga_op1 = gen_reg_rtx (Pmode);
1260 emit_insn (gen_load_dtpmod (tga_op1, op1));
1262 tga_op2 = const0_rtx;
1264 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1265 LCT_CONST, Pmode,
1266 tga_op1, Pmode, tga_op2, Pmode);
1268 insns = get_insns ();
1269 end_sequence ();
1271 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1272 UNSPEC_LD_BASE);
1273 tmp = gen_reg_rtx (Pmode);
1274 emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1276 if (!register_operand (op0, Pmode))
1277 op0 = gen_reg_rtx (Pmode);
1278 if (TARGET_TLS64)
1280 emit_insn (gen_load_dtprel (op0, op1));
1281 emit_insn (gen_adddi3 (op0, tmp, op0));
1283 else
1284 emit_insn (gen_add_dtprel (op0, op1, tmp));
1285 break;
1287 case TLS_MODEL_INITIAL_EXEC:
1288 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1289 addend_hi = addend - addend_lo;
1291 op1 = plus_constant (Pmode, op1, addend_hi);
1292 addend = addend_lo;
1294 tmp = gen_reg_rtx (Pmode);
1295 emit_insn (gen_load_tprel (tmp, op1));
1297 if (!register_operand (op0, Pmode))
1298 op0 = gen_reg_rtx (Pmode);
1299 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1300 break;
1302 case TLS_MODEL_LOCAL_EXEC:
1303 if (!register_operand (op0, Pmode))
1304 op0 = gen_reg_rtx (Pmode);
1306 op1 = orig_op1;
1307 addend = 0;
1308 if (TARGET_TLS64)
1310 emit_insn (gen_load_tprel (op0, op1));
1311 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1313 else
1314 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1315 break;
1317 default:
1318 gcc_unreachable ();
1321 if (addend)
1322 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1323 orig_op0, 1, OPTAB_DIRECT);
1324 if (orig_op0 == op0)
1325 return NULL_RTX;
1326 if (GET_MODE (orig_op0) == Pmode)
1327 return op0;
1328 return gen_lowpart (GET_MODE (orig_op0), op0);
1332 ia64_expand_move (rtx op0, rtx op1)
1334 machine_mode mode = GET_MODE (op0);
1336 if (!lra_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1337 op1 = force_reg (mode, op1);
1339 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1341 HOST_WIDE_INT addend = 0;
1342 enum tls_model tls_kind;
1343 rtx sym = op1;
1345 if (GET_CODE (op1) == CONST
1346 && GET_CODE (XEXP (op1, 0)) == PLUS
1347 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1349 addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1350 sym = XEXP (XEXP (op1, 0), 0);
1353 tls_kind = tls_symbolic_operand_type (sym);
1354 if (tls_kind)
1355 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1357 if (any_offset_symbol_operand (sym, mode))
1358 addend = 0;
1359 else if (aligned_offset_symbol_operand (sym, mode))
1361 HOST_WIDE_INT addend_lo, addend_hi;
1363 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1364 addend_hi = addend - addend_lo;
1366 if (addend_lo != 0)
1368 op1 = plus_constant (mode, sym, addend_hi);
1369 addend = addend_lo;
1371 else
1372 addend = 0;
1374 else
1375 op1 = sym;
1377 if (reload_completed)
1379 /* We really should have taken care of this offset earlier. */
1380 gcc_assert (addend == 0);
1381 if (ia64_expand_load_address (op0, op1))
1382 return NULL_RTX;
1385 if (addend)
1387 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1389 emit_insn (gen_rtx_SET (subtarget, op1));
1391 op1 = expand_simple_binop (mode, PLUS, subtarget,
1392 GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1393 if (op0 == op1)
1394 return NULL_RTX;
1398 return op1;
1401 /* Split a move from OP1 to OP0 conditional on COND. */
1403 void
1404 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1406 rtx_insn *insn, *first = get_last_insn ();
1408 emit_move_insn (op0, op1);
1410 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1411 if (INSN_P (insn))
1412 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1413 PATTERN (insn));
1416 /* Split a post-reload TImode or TFmode reference into two DImode
1417 components. This is made extra difficult by the fact that we do
1418 not get any scratch registers to work with, because reload cannot
1419 be prevented from giving us a scratch that overlaps the register
1420 pair involved. So instead, when addressing memory, we tweak the
1421 pointer register up and back down with POST_INCs. Or up and not
1422 back down when we can get away with it.
1424 REVERSED is true when the loads must be done in reversed order
1425 (high word first) for correctness. DEAD is true when the pointer
1426 dies with the second insn we generate and therefore the second
1427 address must not carry a postmodify.
1429 May return an insn which is to be emitted after the moves. */
1431 static rtx
1432 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1434 rtx fixup = 0;
1436 switch (GET_CODE (in))
1438 case REG:
1439 out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1440 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1441 break;
1443 case CONST_INT:
1444 case CONST_DOUBLE:
1445 /* Cannot occur reversed. */
1446 gcc_assert (!reversed);
1448 if (GET_MODE (in) != TFmode)
1449 split_double (in, &out[0], &out[1]);
1450 else
1451 /* split_double does not understand how to split a TFmode
1452 quantity into a pair of DImode constants. */
1454 unsigned HOST_WIDE_INT p[2];
1455 long l[4]; /* TFmode is 128 bits */
1457 real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode);
1459 if (FLOAT_WORDS_BIG_ENDIAN)
1461 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1462 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1464 else
1466 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1467 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1469 out[0] = GEN_INT (p[0]);
1470 out[1] = GEN_INT (p[1]);
1472 break;
1474 case MEM:
1476 rtx base = XEXP (in, 0);
1477 rtx offset;
1479 switch (GET_CODE (base))
1481 case REG:
1482 if (!reversed)
1484 out[0] = adjust_automodify_address
1485 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1486 out[1] = adjust_automodify_address
1487 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1489 else
1491 /* Reversal requires a pre-increment, which can only
1492 be done as a separate insn. */
1493 emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1494 out[0] = adjust_automodify_address
1495 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1496 out[1] = adjust_address (in, DImode, 0);
1498 break;
1500 case POST_INC:
1501 gcc_assert (!reversed && !dead);
1503 /* Just do the increment in two steps. */
1504 out[0] = adjust_automodify_address (in, DImode, 0, 0);
1505 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1506 break;
1508 case POST_DEC:
1509 gcc_assert (!reversed && !dead);
1511 /* Add 8, subtract 24. */
1512 base = XEXP (base, 0);
1513 out[0] = adjust_automodify_address
1514 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1515 out[1] = adjust_automodify_address
1516 (in, DImode,
1517 gen_rtx_POST_MODIFY (Pmode, base,
1518 plus_constant (Pmode, base, -24)),
1520 break;
1522 case POST_MODIFY:
1523 gcc_assert (!reversed && !dead);
1525 /* Extract and adjust the modification. This case is
1526 trickier than the others, because we might have an
1527 index register, or we might have a combined offset that
1528 doesn't fit a signed 9-bit displacement field. We can
1529 assume the incoming expression is already legitimate. */
1530 offset = XEXP (base, 1);
1531 base = XEXP (base, 0);
1533 out[0] = adjust_automodify_address
1534 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1536 if (GET_CODE (XEXP (offset, 1)) == REG)
1538 /* Can't adjust the postmodify to match. Emit the
1539 original, then a separate addition insn. */
1540 out[1] = adjust_automodify_address (in, DImode, 0, 8);
1541 fixup = gen_adddi3 (base, base, GEN_INT (-8));
1543 else
1545 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1546 if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1548 /* Again the postmodify cannot be made to match,
1549 but in this case it's more efficient to get rid
1550 of the postmodify entirely and fix up with an
1551 add insn. */
1552 out[1] = adjust_automodify_address (in, DImode, base, 8);
1553 fixup = gen_adddi3
1554 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1556 else
1558 /* Combined offset still fits in the displacement field.
1559 (We cannot overflow it at the high end.) */
1560 out[1] = adjust_automodify_address
1561 (in, DImode, gen_rtx_POST_MODIFY
1562 (Pmode, base, gen_rtx_PLUS
1563 (Pmode, base,
1564 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1568 break;
1570 default:
1571 gcc_unreachable ();
1573 break;
1576 default:
1577 gcc_unreachable ();
1580 return fixup;
1583 /* Split a TImode or TFmode move instruction after reload.
1584 This is used by *movtf_internal and *movti_internal. */
1585 void
1586 ia64_split_tmode_move (rtx operands[])
1588 rtx in[2], out[2], insn;
1589 rtx fixup[2];
1590 bool dead = false;
1591 bool reversed = false;
1593 /* It is possible for reload to decide to overwrite a pointer with
1594 the value it points to. In that case we have to do the loads in
1595 the appropriate order so that the pointer is not destroyed too
1596 early. Also we must not generate a postmodify for that second
1597 load, or rws_access_regno will die. And we must not generate a
1598 postmodify for the second load if the destination register
1599 overlaps with the base register. */
1600 if (GET_CODE (operands[1]) == MEM
1601 && reg_overlap_mentioned_p (operands[0], operands[1]))
1603 rtx base = XEXP (operands[1], 0);
1604 while (GET_CODE (base) != REG)
1605 base = XEXP (base, 0);
1607 if (REGNO (base) == REGNO (operands[0]))
1608 reversed = true;
1610 if (refers_to_regno_p (REGNO (operands[0]),
1611 REGNO (operands[0])+2,
1612 base, 0))
1613 dead = true;
1615 /* Another reason to do the moves in reversed order is if the first
1616 element of the target register pair is also the second element of
1617 the source register pair. */
1618 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1619 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1620 reversed = true;
1622 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1623 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1625 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
1626 if (GET_CODE (EXP) == MEM \
1627 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
1628 || GET_CODE (XEXP (EXP, 0)) == POST_INC \
1629 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
1630 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1632 insn = emit_insn (gen_rtx_SET (out[0], in[0]));
1633 MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1634 MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1636 insn = emit_insn (gen_rtx_SET (out[1], in[1]));
1637 MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1638 MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1640 if (fixup[0])
1641 emit_insn (fixup[0]);
1642 if (fixup[1])
1643 emit_insn (fixup[1]);
1645 #undef MAYBE_ADD_REG_INC_NOTE
1648 /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
1649 through memory plus an extra GR scratch register. Except that you can
1650 either get the first from TARGET_SECONDARY_MEMORY_NEEDED or the second
1651 from SECONDARY_RELOAD_CLASS, but not both.
1653 We got into problems in the first place by allowing a construct like
1654 (subreg:XF (reg:TI)), which we got from a union containing a long double.
1655 This solution attempts to prevent this situation from occurring. When
1656 we see something like the above, we spill the inner register to memory. */
1658 static rtx
1659 spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
1661 if (GET_CODE (in) == SUBREG
1662 && GET_MODE (SUBREG_REG (in)) == TImode
1663 && GET_CODE (SUBREG_REG (in)) == REG)
1665 rtx memt = assign_stack_temp (TImode, 16);
1666 emit_move_insn (memt, SUBREG_REG (in));
1667 return adjust_address (memt, mode, 0);
1669 else if (force && GET_CODE (in) == REG)
1671 rtx memx = assign_stack_temp (mode, 16);
1672 emit_move_insn (memx, in);
1673 return memx;
1675 else
1676 return in;
1679 /* Expand the movxf or movrf pattern (MODE says which) with the given
1680 OPERANDS, returning true if the pattern should then invoke
1681 DONE. */
1683 bool
1684 ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
1686 rtx op0 = operands[0];
1688 if (GET_CODE (op0) == SUBREG)
1689 op0 = SUBREG_REG (op0);
1691 /* We must support XFmode loads into general registers for stdarg/vararg,
1692 unprototyped calls, and a rare case where a long double is passed as
1693 an argument after a float HFA fills the FP registers. We split them into
1694 DImode loads for convenience. We also need to support XFmode stores
1695 for the last case. This case does not happen for stdarg/vararg routines,
1696 because we do a block store to memory of unnamed arguments. */
1698 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1700 rtx out[2];
1702 /* We're hoping to transform everything that deals with XFmode
1703 quantities and GR registers early in the compiler. */
1704 gcc_assert (can_create_pseudo_p ());
1706 /* Struct to register can just use TImode instead. */
1707 if ((GET_CODE (operands[1]) == SUBREG
1708 && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1709 || (GET_CODE (operands[1]) == REG
1710 && GR_REGNO_P (REGNO (operands[1]))))
1712 rtx op1 = operands[1];
1714 if (GET_CODE (op1) == SUBREG)
1715 op1 = SUBREG_REG (op1);
1716 else
1717 op1 = gen_rtx_REG (TImode, REGNO (op1));
1719 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1720 return true;
1723 if (GET_CODE (operands[1]) == CONST_DOUBLE)
1725 /* Don't word-swap when reading in the constant. */
1726 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1727 operand_subword (operands[1], WORDS_BIG_ENDIAN,
1728 0, mode));
1729 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1730 operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1731 0, mode));
1732 return true;
1735 /* If the quantity is in a register not known to be GR, spill it. */
1736 if (register_operand (operands[1], mode))
1737 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1739 gcc_assert (GET_CODE (operands[1]) == MEM);
1741 /* Don't word-swap when reading in the value. */
1742 out[0] = gen_rtx_REG (DImode, REGNO (op0));
1743 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1745 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1746 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1747 return true;
1750 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1752 /* We're hoping to transform everything that deals with XFmode
1753 quantities and GR registers early in the compiler. */
1754 gcc_assert (can_create_pseudo_p ());
1756 /* Op0 can't be a GR_REG here, as that case is handled above.
1757 If op0 is a register, then we spill op1, so that we now have a
1758 MEM operand. This requires creating an XFmode subreg of a TImode reg
1759 to force the spill. */
1760 if (register_operand (operands[0], mode))
1762 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1763 op1 = gen_rtx_SUBREG (mode, op1, 0);
1764 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1767 else
1769 rtx in[2];
1771 gcc_assert (GET_CODE (operands[0]) == MEM);
1773 /* Don't word-swap when writing out the value. */
1774 in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1775 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1777 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1778 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1779 return true;
1783 if (!lra_in_progress && !reload_completed)
1785 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1787 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1789 rtx memt, memx, in = operands[1];
1790 if (CONSTANT_P (in))
1791 in = validize_mem (force_const_mem (mode, in));
1792 if (GET_CODE (in) == MEM)
1793 memt = adjust_address (in, TImode, 0);
1794 else
1796 memt = assign_stack_temp (TImode, 16);
1797 memx = adjust_address (memt, mode, 0);
1798 emit_move_insn (memx, in);
1800 emit_move_insn (op0, memt);
1801 return true;
1804 if (!ia64_move_ok (operands[0], operands[1]))
1805 operands[1] = force_reg (mode, operands[1]);
1808 return false;
1811 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1812 with the expression that holds the compare result (in VOIDmode). */
1814 static GTY(()) rtx cmptf_libfunc;
1816 void
1817 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1819 enum rtx_code code = GET_CODE (*expr);
1820 rtx cmp;
1822 /* If we have a BImode input, then we already have a compare result, and
1823 do not need to emit another comparison. */
1824 if (GET_MODE (*op0) == BImode)
1826 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1827 cmp = *op0;
1829 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1830 magic number as its third argument, that indicates what to do.
1831 The return value is an integer to be compared against zero. */
1832 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1834 enum qfcmp_magic {
1835 QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */
1836 QCMP_UNORD = 2,
1837 QCMP_EQ = 4,
1838 QCMP_LT = 8,
1839 QCMP_GT = 16
1841 int magic;
1842 enum rtx_code ncode;
1843 rtx ret;
1845 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1846 switch (code)
1848 /* 1 = equal, 0 = not equal. Equality operators do
1849 not raise FP_INVALID when given a NaN operand. */
1850 case EQ: magic = QCMP_EQ; ncode = NE; break;
1851 case NE: magic = QCMP_EQ; ncode = EQ; break;
1852 /* isunordered() from C99. */
1853 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
1854 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
1855 /* Relational operators raise FP_INVALID when given
1856 a NaN operand. */
1857 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
1858 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1859 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
1860 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1861 /* Unordered relational operators do not raise FP_INVALID
1862 when given a NaN operand. */
1863 case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break;
1864 case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1865 case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break;
1866 case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1867 /* Not supported. */
1868 case UNEQ:
1869 case LTGT:
1870 default: gcc_unreachable ();
1873 start_sequence ();
1875 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode,
1876 *op0, TFmode, *op1, TFmode,
1877 GEN_INT (magic), DImode);
1878 cmp = gen_reg_rtx (BImode);
1879 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
1880 ret, const0_rtx)));
1882 rtx_insn *insns = get_insns ();
1883 end_sequence ();
1885 emit_libcall_block (insns, cmp, cmp,
1886 gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1887 code = NE;
1889 else
1891 cmp = gen_reg_rtx (BImode);
1892 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1893 code = NE;
1896 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1897 *op0 = cmp;
1898 *op1 = const0_rtx;
1901 /* Generate an integral vector comparison. Return true if the condition has
1902 been reversed, and so the sense of the comparison should be inverted. */
1904 static bool
1905 ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
1906 rtx dest, rtx op0, rtx op1)
1908 bool negate = false;
1909 rtx x;
1911 /* Canonicalize the comparison to EQ, GT, GTU. */
1912 switch (code)
1914 case EQ:
1915 case GT:
1916 case GTU:
1917 break;
1919 case NE:
1920 case LE:
1921 case LEU:
1922 code = reverse_condition (code);
1923 negate = true;
1924 break;
1926 case GE:
1927 case GEU:
1928 code = reverse_condition (code);
1929 negate = true;
1930 /* FALLTHRU */
1932 case LT:
1933 case LTU:
1934 code = swap_condition (code);
1935 x = op0, op0 = op1, op1 = x;
1936 break;
1938 default:
1939 gcc_unreachable ();
1942 /* Unsigned parallel compare is not supported by the hardware. Play some
1943 tricks to turn this into a signed comparison against 0. */
1944 if (code == GTU)
1946 switch (mode)
1948 case E_V2SImode:
1950 rtx t1, t2, mask;
1952 /* Subtract (-(INT MAX) - 1) from both operands to make
1953 them signed. */
1954 mask = gen_int_mode (0x80000000, SImode);
1955 mask = gen_const_vec_duplicate (V2SImode, mask);
1956 mask = force_reg (mode, mask);
1957 t1 = gen_reg_rtx (mode);
1958 emit_insn (gen_subv2si3 (t1, op0, mask));
1959 t2 = gen_reg_rtx (mode);
1960 emit_insn (gen_subv2si3 (t2, op1, mask));
1961 op0 = t1;
1962 op1 = t2;
1963 code = GT;
1965 break;
1967 case E_V8QImode:
1968 case E_V4HImode:
1969 /* Perform a parallel unsigned saturating subtraction. */
1970 x = gen_reg_rtx (mode);
1971 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
1973 code = EQ;
1974 op0 = x;
1975 op1 = CONST0_RTX (mode);
1976 negate = !negate;
1977 break;
1979 default:
1980 gcc_unreachable ();
1984 x = gen_rtx_fmt_ee (code, mode, op0, op1);
1985 emit_insn (gen_rtx_SET (dest, x));
1987 return negate;
1990 /* Emit an integral vector conditional move. */
1992 void
1993 ia64_expand_vecint_cmov (rtx operands[])
1995 machine_mode mode = GET_MODE (operands[0]);
1996 enum rtx_code code = GET_CODE (operands[3]);
1997 bool negate;
1998 rtx cmp, x, ot, of;
2000 cmp = gen_reg_rtx (mode);
2001 negate = ia64_expand_vecint_compare (code, mode, cmp,
2002 operands[4], operands[5]);
2004 ot = operands[1+negate];
2005 of = operands[2-negate];
2007 if (ot == CONST0_RTX (mode))
2009 if (of == CONST0_RTX (mode))
2011 emit_move_insn (operands[0], ot);
2012 return;
2015 x = gen_rtx_NOT (mode, cmp);
2016 x = gen_rtx_AND (mode, x, of);
2017 emit_insn (gen_rtx_SET (operands[0], x));
2019 else if (of == CONST0_RTX (mode))
2021 x = gen_rtx_AND (mode, cmp, ot);
2022 emit_insn (gen_rtx_SET (operands[0], x));
2024 else
2026 rtx t, f;
2028 t = gen_reg_rtx (mode);
2029 x = gen_rtx_AND (mode, cmp, operands[1+negate]);
2030 emit_insn (gen_rtx_SET (t, x));
2032 f = gen_reg_rtx (mode);
2033 x = gen_rtx_NOT (mode, cmp);
2034 x = gen_rtx_AND (mode, x, operands[2-negate]);
2035 emit_insn (gen_rtx_SET (f, x));
2037 x = gen_rtx_IOR (mode, t, f);
2038 emit_insn (gen_rtx_SET (operands[0], x));
2042 /* Emit an integral vector min or max operation. Return true if all done. */
2044 bool
2045 ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
2046 rtx operands[])
2048 rtx xops[6];
2050 /* These four combinations are supported directly. */
2051 if (mode == V8QImode && (code == UMIN || code == UMAX))
2052 return false;
2053 if (mode == V4HImode && (code == SMIN || code == SMAX))
2054 return false;
2056 /* This combination can be implemented with only saturating subtraction. */
2057 if (mode == V4HImode && code == UMAX)
2059 rtx x, tmp = gen_reg_rtx (mode);
2061 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2062 emit_insn (gen_rtx_SET (tmp, x));
2064 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2065 return true;
2068 /* Everything else implemented via vector comparisons. */
2069 xops[0] = operands[0];
2070 xops[4] = xops[1] = operands[1];
2071 xops[5] = xops[2] = operands[2];
2073 switch (code)
2075 case UMIN:
2076 code = LTU;
2077 break;
2078 case UMAX:
2079 code = GTU;
2080 break;
2081 case SMIN:
2082 code = LT;
2083 break;
2084 case SMAX:
2085 code = GT;
2086 break;
2087 default:
2088 gcc_unreachable ();
2090 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2092 ia64_expand_vecint_cmov (xops);
2093 return true;
2096 /* The vectors LO and HI each contain N halves of a double-wide vector.
2097 Reassemble either the first N/2 or the second N/2 elements. */
2099 void
2100 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2102 machine_mode vmode = GET_MODE (lo);
2103 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2104 struct expand_vec_perm_d d;
2105 bool ok;
2107 d.target = gen_lowpart (vmode, out);
2108 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2109 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2110 d.vmode = vmode;
2111 d.nelt = nelt;
2112 d.one_operand_p = false;
2113 d.testing_p = false;
2115 high = (highp ? nelt / 2 : 0);
2116 for (i = 0; i < nelt / 2; ++i)
2118 d.perm[i * 2] = i + high;
2119 d.perm[i * 2 + 1] = i + high + nelt;
2122 ok = ia64_expand_vec_perm_const_1 (&d);
2123 gcc_assert (ok);
2126 /* Return a vector of the sign-extension of VEC. */
2128 static rtx
2129 ia64_unpack_sign (rtx vec, bool unsignedp)
2131 machine_mode mode = GET_MODE (vec);
2132 rtx zero = CONST0_RTX (mode);
2134 if (unsignedp)
2135 return zero;
2136 else
2138 rtx sign = gen_reg_rtx (mode);
2139 bool neg;
2141 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2142 gcc_assert (!neg);
2144 return sign;
2148 /* Emit an integral vector unpack operation. */
2150 void
2151 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2153 rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2154 ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2157 /* Emit an integral vector widening sum operations. */
2159 void
2160 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2162 machine_mode wmode;
2163 rtx l, h, t, sign;
2165 sign = ia64_unpack_sign (operands[1], unsignedp);
2167 wmode = GET_MODE (operands[0]);
2168 l = gen_reg_rtx (wmode);
2169 h = gen_reg_rtx (wmode);
2171 ia64_unpack_assemble (l, operands[1], sign, false);
2172 ia64_unpack_assemble (h, operands[1], sign, true);
2174 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2175 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2176 if (t != operands[0])
2177 emit_move_insn (operands[0], t);
2180 /* Emit the appropriate sequence for a call. */
2182 void
2183 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2184 int sibcall_p)
2186 rtx insn, b0;
2188 addr = XEXP (addr, 0);
2189 addr = convert_memory_address (DImode, addr);
2190 b0 = gen_rtx_REG (DImode, R_BR (0));
2192 /* ??? Should do this for functions known to bind local too. */
2193 if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2195 if (sibcall_p)
2196 insn = gen_sibcall_nogp (addr);
2197 else if (! retval)
2198 insn = gen_call_nogp (addr, b0);
2199 else
2200 insn = gen_call_value_nogp (retval, addr, b0);
2201 insn = emit_call_insn (insn);
2203 else
2205 if (sibcall_p)
2206 insn = gen_sibcall_gp (addr);
2207 else if (! retval)
2208 insn = gen_call_gp (addr, b0);
2209 else
2210 insn = gen_call_value_gp (retval, addr, b0);
2211 insn = emit_call_insn (insn);
2213 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2216 if (sibcall_p)
2217 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2219 if (TARGET_ABI_OPEN_VMS)
2220 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2221 gen_rtx_REG (DImode, GR_REG (25)));
2224 static void
2225 reg_emitted (enum ia64_frame_regs r)
2227 if (emitted_frame_related_regs[r] == 0)
2228 emitted_frame_related_regs[r] = current_frame_info.r[r];
2229 else
2230 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2233 static int
2234 get_reg (enum ia64_frame_regs r)
2236 reg_emitted (r);
2237 return current_frame_info.r[r];
2240 static bool
2241 is_emitted (int regno)
2243 unsigned int r;
2245 for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2246 if (emitted_frame_related_regs[r] == regno)
2247 return true;
2248 return false;
2251 void
2252 ia64_reload_gp (void)
2254 rtx tmp;
2256 if (current_frame_info.r[reg_save_gp])
2258 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2260 else
2262 HOST_WIDE_INT offset;
2263 rtx offset_r;
2265 offset = (current_frame_info.spill_cfa_off
2266 + current_frame_info.spill_size);
2267 if (frame_pointer_needed)
2269 tmp = hard_frame_pointer_rtx;
2270 offset = -offset;
2272 else
2274 tmp = stack_pointer_rtx;
2275 offset = current_frame_info.total_size - offset;
2278 offset_r = GEN_INT (offset);
2279 if (satisfies_constraint_I (offset_r))
2280 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2281 else
2283 emit_move_insn (pic_offset_table_rtx, offset_r);
2284 emit_insn (gen_adddi3 (pic_offset_table_rtx,
2285 pic_offset_table_rtx, tmp));
2288 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2291 emit_move_insn (pic_offset_table_rtx, tmp);
2294 void
2295 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2296 rtx scratch_b, int noreturn_p, int sibcall_p)
2298 rtx insn;
2299 bool is_desc = false;
2301 /* If we find we're calling through a register, then we're actually
2302 calling through a descriptor, so load up the values. */
2303 if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2305 rtx tmp;
2306 bool addr_dead_p;
2308 /* ??? We are currently constrained to *not* use peep2, because
2309 we can legitimately change the global lifetime of the GP
2310 (in the form of killing where previously live). This is
2311 because a call through a descriptor doesn't use the previous
2312 value of the GP, while a direct call does, and we do not
2313 commit to either form until the split here.
2315 That said, this means that we lack precise life info for
2316 whether ADDR is dead after this call. This is not terribly
2317 important, since we can fix things up essentially for free
2318 with the POST_DEC below, but it's nice to not use it when we
2319 can immediately tell it's not necessary. */
2320 addr_dead_p = ((noreturn_p || sibcall_p
2321 || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2322 REGNO (addr)))
2323 && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2325 /* Load the code address into scratch_b. */
2326 tmp = gen_rtx_POST_INC (Pmode, addr);
2327 tmp = gen_rtx_MEM (Pmode, tmp);
2328 emit_move_insn (scratch_r, tmp);
2329 emit_move_insn (scratch_b, scratch_r);
2331 /* Load the GP address. If ADDR is not dead here, then we must
2332 revert the change made above via the POST_INCREMENT. */
2333 if (!addr_dead_p)
2334 tmp = gen_rtx_POST_DEC (Pmode, addr);
2335 else
2336 tmp = addr;
2337 tmp = gen_rtx_MEM (Pmode, tmp);
2338 emit_move_insn (pic_offset_table_rtx, tmp);
2340 is_desc = true;
2341 addr = scratch_b;
2344 if (sibcall_p)
2345 insn = gen_sibcall_nogp (addr);
2346 else if (retval)
2347 insn = gen_call_value_nogp (retval, addr, retaddr);
2348 else
2349 insn = gen_call_nogp (addr, retaddr);
2350 emit_call_insn (insn);
2352 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2353 ia64_reload_gp ();
2356 /* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
2358 This differs from the generic code in that we know about the zero-extending
2359 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
2360 also know that ld.acq+cmpxchg.rel equals a full barrier.
2362 The loop we want to generate looks like
2364 cmp_reg = mem;
2365 label:
2366 old_reg = cmp_reg;
2367 new_reg = cmp_reg op val;
2368 cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2369 if (cmp_reg != old_reg)
2370 goto label;
2372 Note that we only do the plain load from memory once. Subsequent
2373 iterations use the value loaded by the compare-and-swap pattern. */
2375 void
2376 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2377 rtx old_dst, rtx new_dst, enum memmodel model)
2379 machine_mode mode = GET_MODE (mem);
2380 rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2381 enum insn_code icode;
2383 /* Special case for using fetchadd. */
2384 if ((mode == SImode || mode == DImode)
2385 && (code == PLUS || code == MINUS)
2386 && fetchadd_operand (val, mode))
2388 if (code == MINUS)
2389 val = GEN_INT (-INTVAL (val));
2391 if (!old_dst)
2392 old_dst = gen_reg_rtx (mode);
2394 switch (model)
2396 case MEMMODEL_ACQ_REL:
2397 case MEMMODEL_SEQ_CST:
2398 case MEMMODEL_SYNC_SEQ_CST:
2399 emit_insn (gen_memory_barrier ());
2400 /* FALLTHRU */
2401 case MEMMODEL_RELAXED:
2402 case MEMMODEL_ACQUIRE:
2403 case MEMMODEL_SYNC_ACQUIRE:
2404 case MEMMODEL_CONSUME:
2405 if (mode == SImode)
2406 icode = CODE_FOR_fetchadd_acq_si;
2407 else
2408 icode = CODE_FOR_fetchadd_acq_di;
2409 break;
2410 case MEMMODEL_RELEASE:
2411 case MEMMODEL_SYNC_RELEASE:
2412 if (mode == SImode)
2413 icode = CODE_FOR_fetchadd_rel_si;
2414 else
2415 icode = CODE_FOR_fetchadd_rel_di;
2416 break;
2418 default:
2419 gcc_unreachable ();
2422 emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2424 if (new_dst)
2426 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2427 true, OPTAB_WIDEN);
2428 if (new_reg != new_dst)
2429 emit_move_insn (new_dst, new_reg);
2431 return;
2434 /* Because of the volatile mem read, we get an ld.acq, which is the
2435 front half of the full barrier. The end half is the cmpxchg.rel.
2436 For relaxed and release memory models, we don't need this. But we
2437 also don't bother trying to prevent it either. */
2438 gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
2439 || MEM_VOLATILE_P (mem));
2441 old_reg = gen_reg_rtx (DImode);
2442 cmp_reg = gen_reg_rtx (DImode);
2443 label = gen_label_rtx ();
2445 if (mode != DImode)
2447 val = simplify_gen_subreg (DImode, val, mode, 0);
2448 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2450 else
2451 emit_move_insn (cmp_reg, mem);
2453 emit_label (label);
2455 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2456 emit_move_insn (old_reg, cmp_reg);
2457 emit_move_insn (ar_ccv, cmp_reg);
2459 if (old_dst)
2460 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2462 new_reg = cmp_reg;
2463 if (code == NOT)
2465 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2466 true, OPTAB_DIRECT);
2467 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2469 else
2470 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2471 true, OPTAB_DIRECT);
2473 if (mode != DImode)
2474 new_reg = gen_lowpart (mode, new_reg);
2475 if (new_dst)
2476 emit_move_insn (new_dst, new_reg);
2478 switch (model)
2480 case MEMMODEL_RELAXED:
2481 case MEMMODEL_ACQUIRE:
2482 case MEMMODEL_SYNC_ACQUIRE:
2483 case MEMMODEL_CONSUME:
2484 switch (mode)
2486 case E_QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
2487 case E_HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
2488 case E_SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
2489 case E_DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
2490 default:
2491 gcc_unreachable ();
2493 break;
2495 case MEMMODEL_RELEASE:
2496 case MEMMODEL_SYNC_RELEASE:
2497 case MEMMODEL_ACQ_REL:
2498 case MEMMODEL_SEQ_CST:
2499 case MEMMODEL_SYNC_SEQ_CST:
2500 switch (mode)
2502 case E_QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
2503 case E_HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
2504 case E_SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
2505 case E_DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
2506 default:
2507 gcc_unreachable ();
2509 break;
2511 default:
2512 gcc_unreachable ();
2515 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2517 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2520 /* Begin the assembly file. */
2522 static void
2523 ia64_file_start (void)
2525 default_file_start ();
2526 emit_safe_across_calls ();
2529 void
2530 emit_safe_across_calls (void)
2532 unsigned int rs, re;
2533 int out_state;
2535 rs = 1;
2536 out_state = 0;
2537 while (1)
2539 while (rs < 64 && call_used_or_fixed_reg_p (PR_REG (rs)))
2540 rs++;
2541 if (rs >= 64)
2542 break;
2543 for (re = rs + 1;
2544 re < 64 && ! call_used_or_fixed_reg_p (PR_REG (re)); re++)
2545 continue;
2546 if (out_state == 0)
2548 fputs ("\t.pred.safe_across_calls ", asm_out_file);
2549 out_state = 1;
2551 else
2552 fputc (',', asm_out_file);
2553 if (re == rs + 1)
2554 fprintf (asm_out_file, "p%u", rs);
2555 else
2556 fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2557 rs = re + 1;
2559 if (out_state)
2560 fputc ('\n', asm_out_file);
2563 /* Globalize a declaration. */
2565 static void
2566 ia64_globalize_decl_name (FILE * stream, tree decl)
2568 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2569 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2570 if (version_attr)
2572 tree v = TREE_VALUE (TREE_VALUE (version_attr));
2573 const char *p = TREE_STRING_POINTER (v);
2574 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2576 targetm.asm_out.globalize_label (stream, name);
2577 if (TREE_CODE (decl) == FUNCTION_DECL)
2578 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2581 /* Helper function for ia64_compute_frame_size: find an appropriate general
2582 register to spill some special register to. SPECIAL_SPILL_MASK contains
2583 bits in GR0 to GR31 that have already been allocated by this routine.
2584 TRY_LOCALS is true if we should attempt to locate a local regnum. */
2586 static int
2587 find_gr_spill (enum ia64_frame_regs r, int try_locals)
2589 int regno;
2591 if (emitted_frame_related_regs[r] != 0)
2593 regno = emitted_frame_related_regs[r];
2594 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2595 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2596 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2597 else if (crtl->is_leaf
2598 && regno >= GR_REG (1) && regno <= GR_REG (31))
2599 current_frame_info.gr_used_mask |= 1 << regno;
2601 return regno;
2604 /* If this is a leaf function, first try an otherwise unused
2605 call-clobbered register. */
2606 if (crtl->is_leaf)
2608 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2609 if (! df_regs_ever_live_p (regno)
2610 && call_used_or_fixed_reg_p (regno)
2611 && ! fixed_regs[regno]
2612 && ! global_regs[regno]
2613 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2614 && ! is_emitted (regno))
2616 current_frame_info.gr_used_mask |= 1 << regno;
2617 return regno;
2621 if (try_locals)
2623 regno = current_frame_info.n_local_regs;
2624 /* If there is a frame pointer, then we can't use loc79, because
2625 that is HARD_FRAME_POINTER_REGNUM. In particular, see the
2626 reg_name switching code in ia64_expand_prologue. */
2627 while (regno < (80 - frame_pointer_needed))
2628 if (! is_emitted (LOC_REG (regno++)))
2630 current_frame_info.n_local_regs = regno;
2631 return LOC_REG (regno - 1);
2635 /* Failed to find a general register to spill to. Must use stack. */
2636 return 0;
2639 /* In order to make for nice schedules, we try to allocate every temporary
2640 to a different register. We must of course stay away from call-saved,
2641 fixed, and global registers. We must also stay away from registers
2642 allocated in current_frame_info.gr_used_mask, since those include regs
2643 used all through the prologue.
2645 Any register allocated here must be used immediately. The idea is to
2646 aid scheduling, not to solve data flow problems. */
2648 static int last_scratch_gr_reg;
2650 static int
2651 next_scratch_gr_reg (void)
2653 int i, regno;
2655 for (i = 0; i < 32; ++i)
2657 regno = (last_scratch_gr_reg + i + 1) & 31;
2658 if (call_used_or_fixed_reg_p (regno)
2659 && ! fixed_regs[regno]
2660 && ! global_regs[regno]
2661 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2663 last_scratch_gr_reg = regno;
2664 return regno;
2668 /* There must be _something_ available. */
2669 gcc_unreachable ();
2672 /* Helper function for ia64_compute_frame_size, called through
2673 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
2675 static void
2676 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2678 unsigned int regno = REGNO (reg);
2679 if (regno < 32)
2681 unsigned int i, n = REG_NREGS (reg);
2682 for (i = 0; i < n; ++i)
2683 current_frame_info.gr_used_mask |= 1 << (regno + i);
2688 /* Returns the number of bytes offset between the frame pointer and the stack
2689 pointer for the current function. SIZE is the number of bytes of space
2690 needed for local variables. */
2692 static void
2693 ia64_compute_frame_size (HOST_WIDE_INT size)
2695 HOST_WIDE_INT total_size;
2696 HOST_WIDE_INT spill_size = 0;
2697 HOST_WIDE_INT extra_spill_size = 0;
2698 HOST_WIDE_INT pretend_args_size;
2699 HARD_REG_SET mask;
2700 int n_spilled = 0;
2701 int spilled_gr_p = 0;
2702 int spilled_fr_p = 0;
2703 unsigned int regno;
2704 int min_regno;
2705 int max_regno;
2706 int i;
2708 if (current_frame_info.initialized)
2709 return;
2711 memset (&current_frame_info, 0, sizeof current_frame_info);
2712 CLEAR_HARD_REG_SET (mask);
2714 /* Don't allocate scratches to the return register. */
2715 diddle_return_value (mark_reg_gr_used_mask, NULL);
2717 /* Don't allocate scratches to the EH scratch registers. */
2718 if (cfun->machine->ia64_eh_epilogue_sp)
2719 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2720 if (cfun->machine->ia64_eh_epilogue_bsp)
2721 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2723 /* Static stack checking uses r2 and r3. */
2724 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
2725 || flag_stack_clash_protection)
2726 current_frame_info.gr_used_mask |= 0xc;
2728 /* Find the size of the register stack frame. We have only 80 local
2729 registers, because we reserve 8 for the inputs and 8 for the
2730 outputs. */
2732 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2733 since we'll be adjusting that down later. */
2734 regno = LOC_REG (78) + ! frame_pointer_needed;
2735 for (; regno >= LOC_REG (0); regno--)
2736 if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2737 break;
2738 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2740 /* For functions marked with the syscall_linkage attribute, we must mark
2741 all eight input registers as in use, so that locals aren't visible to
2742 the caller. */
2744 if (cfun->machine->n_varargs > 0
2745 || lookup_attribute ("syscall_linkage",
2746 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2747 current_frame_info.n_input_regs = 8;
2748 else
2750 for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2751 if (df_regs_ever_live_p (regno))
2752 break;
2753 current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2756 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2757 if (df_regs_ever_live_p (regno))
2758 break;
2759 i = regno - OUT_REG (0) + 1;
2761 #ifndef PROFILE_HOOK
2762 /* When -p profiling, we need one output register for the mcount argument.
2763 Likewise for -a profiling for the bb_init_func argument. For -ax
2764 profiling, we need two output registers for the two bb_init_trace_func
2765 arguments. */
2766 if (crtl->profile)
2767 i = MAX (i, 1);
2768 #endif
2769 current_frame_info.n_output_regs = i;
2771 /* ??? No rotating register support yet. */
2772 current_frame_info.n_rotate_regs = 0;
2774 /* Discover which registers need spilling, and how much room that
2775 will take. Begin with floating point and general registers,
2776 which will always wind up on the stack. */
2778 for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2779 if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
2781 SET_HARD_REG_BIT (mask, regno);
2782 spill_size += 16;
2783 n_spilled += 1;
2784 spilled_fr_p = 1;
2787 for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2788 if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
2790 SET_HARD_REG_BIT (mask, regno);
2791 spill_size += 8;
2792 n_spilled += 1;
2793 spilled_gr_p = 1;
2796 for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2797 if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
2799 SET_HARD_REG_BIT (mask, regno);
2800 spill_size += 8;
2801 n_spilled += 1;
2804 /* Now come all special registers that might get saved in other
2805 general registers. */
2807 if (frame_pointer_needed)
2809 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2810 /* If we did not get a register, then we take LOC79. This is guaranteed
2811 to be free, even if regs_ever_live is already set, because this is
2812 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
2813 as we don't count loc79 above. */
2814 if (current_frame_info.r[reg_fp] == 0)
2816 current_frame_info.r[reg_fp] = LOC_REG (79);
2817 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2821 if (! crtl->is_leaf)
2823 /* Emit a save of BR0 if we call other functions. Do this even
2824 if this function doesn't return, as EH depends on this to be
2825 able to unwind the stack. */
2826 SET_HARD_REG_BIT (mask, BR_REG (0));
2828 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2829 if (current_frame_info.r[reg_save_b0] == 0)
2831 extra_spill_size += 8;
2832 n_spilled += 1;
2835 /* Similarly for ar.pfs. */
2836 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2837 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2838 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2840 extra_spill_size += 8;
2841 n_spilled += 1;
2844 /* Similarly for gp. Note that if we're calling setjmp, the stacked
2845 registers are clobbered, so we fall back to the stack. */
2846 current_frame_info.r[reg_save_gp]
2847 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2848 if (current_frame_info.r[reg_save_gp] == 0)
2850 SET_HARD_REG_BIT (mask, GR_REG (1));
2851 spill_size += 8;
2852 n_spilled += 1;
2855 else
2857 if (df_regs_ever_live_p (BR_REG (0))
2858 && ! call_used_or_fixed_reg_p (BR_REG (0)))
2860 SET_HARD_REG_BIT (mask, BR_REG (0));
2861 extra_spill_size += 8;
2862 n_spilled += 1;
2865 if (df_regs_ever_live_p (AR_PFS_REGNUM))
2867 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2868 current_frame_info.r[reg_save_ar_pfs]
2869 = find_gr_spill (reg_save_ar_pfs, 1);
2870 if (current_frame_info.r[reg_save_ar_pfs] == 0)
2872 extra_spill_size += 8;
2873 n_spilled += 1;
2878 /* Unwind descriptor hackery: things are most efficient if we allocate
2879 consecutive GR save registers for RP, PFS, FP in that order. However,
2880 it is absolutely critical that FP get the only hard register that's
2881 guaranteed to be free, so we allocated it first. If all three did
2882 happen to be allocated hard regs, and are consecutive, rearrange them
2883 into the preferred order now.
2885 If we have already emitted code for any of those registers,
2886 then it's already too late to change. */
2887 min_regno = MIN (current_frame_info.r[reg_fp],
2888 MIN (current_frame_info.r[reg_save_b0],
2889 current_frame_info.r[reg_save_ar_pfs]));
2890 max_regno = MAX (current_frame_info.r[reg_fp],
2891 MAX (current_frame_info.r[reg_save_b0],
2892 current_frame_info.r[reg_save_ar_pfs]));
2893 if (min_regno > 0
2894 && min_regno + 2 == max_regno
2895 && (current_frame_info.r[reg_fp] == min_regno + 1
2896 || current_frame_info.r[reg_save_b0] == min_regno + 1
2897 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2898 && (emitted_frame_related_regs[reg_save_b0] == 0
2899 || emitted_frame_related_regs[reg_save_b0] == min_regno)
2900 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2901 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2902 && (emitted_frame_related_regs[reg_fp] == 0
2903 || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2905 current_frame_info.r[reg_save_b0] = min_regno;
2906 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2907 current_frame_info.r[reg_fp] = min_regno + 2;
2910 /* See if we need to store the predicate register block. */
2911 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2912 if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
2913 break;
2914 if (regno <= PR_REG (63))
2916 SET_HARD_REG_BIT (mask, PR_REG (0));
2917 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2918 if (current_frame_info.r[reg_save_pr] == 0)
2920 extra_spill_size += 8;
2921 n_spilled += 1;
2924 /* ??? Mark them all as used so that register renaming and such
2925 are free to use them. */
2926 for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2927 df_set_regs_ever_live (regno, true);
2930 /* If we're forced to use st8.spill, we're forced to save and restore
2931 ar.unat as well. The check for existing liveness allows inline asm
2932 to touch ar.unat. */
2933 if (spilled_gr_p || cfun->machine->n_varargs
2934 || df_regs_ever_live_p (AR_UNAT_REGNUM))
2936 df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2937 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2938 current_frame_info.r[reg_save_ar_unat]
2939 = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2940 if (current_frame_info.r[reg_save_ar_unat] == 0)
2942 extra_spill_size += 8;
2943 n_spilled += 1;
2947 if (df_regs_ever_live_p (AR_LC_REGNUM))
2949 SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2950 current_frame_info.r[reg_save_ar_lc]
2951 = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2952 if (current_frame_info.r[reg_save_ar_lc] == 0)
2954 extra_spill_size += 8;
2955 n_spilled += 1;
2959 /* If we have an odd number of words of pretend arguments written to
2960 the stack, then the FR save area will be unaligned. We round the
2961 size of this area up to keep things 16 byte aligned. */
2962 if (spilled_fr_p)
2963 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2964 else
2965 pretend_args_size = crtl->args.pretend_args_size;
2967 total_size = (spill_size + extra_spill_size + size + pretend_args_size
2968 + crtl->outgoing_args_size);
2969 total_size = IA64_STACK_ALIGN (total_size);
2971 /* We always use the 16-byte scratch area provided by the caller, but
2972 if we are a leaf function, there's no one to which we need to provide
2973 a scratch area. However, if the function allocates dynamic stack space,
2974 the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2975 so we need to cope. */
2976 if (crtl->is_leaf && !cfun->calls_alloca)
2977 total_size = MAX (0, total_size - 16);
2979 current_frame_info.total_size = total_size;
2980 current_frame_info.spill_cfa_off = pretend_args_size - 16;
2981 current_frame_info.spill_size = spill_size;
2982 current_frame_info.extra_spill_size = extra_spill_size;
2983 current_frame_info.mask = mask;
2984 current_frame_info.n_spilled = n_spilled;
2985 current_frame_info.initialized = reload_completed;
2988 /* Worker function for TARGET_CAN_ELIMINATE. */
2990 bool
2991 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2993 return (to == BR_REG (0) ? crtl->is_leaf : true);
2996 /* Compute the initial difference between the specified pair of registers. */
2998 HOST_WIDE_INT
2999 ia64_initial_elimination_offset (int from, int to)
3001 HOST_WIDE_INT offset;
3003 ia64_compute_frame_size (get_frame_size ());
3004 switch (from)
3006 case FRAME_POINTER_REGNUM:
3007 switch (to)
3009 case HARD_FRAME_POINTER_REGNUM:
3010 offset = -current_frame_info.total_size;
3011 if (!crtl->is_leaf || cfun->calls_alloca)
3012 offset += 16 + crtl->outgoing_args_size;
3013 break;
3015 case STACK_POINTER_REGNUM:
3016 offset = 0;
3017 if (!crtl->is_leaf || cfun->calls_alloca)
3018 offset += 16 + crtl->outgoing_args_size;
3019 break;
3021 default:
3022 gcc_unreachable ();
3024 break;
3026 case ARG_POINTER_REGNUM:
3027 /* Arguments start above the 16 byte save area, unless stdarg
3028 in which case we store through the 16 byte save area. */
3029 switch (to)
3031 case HARD_FRAME_POINTER_REGNUM:
3032 offset = 16 - crtl->args.pretend_args_size;
3033 break;
3035 case STACK_POINTER_REGNUM:
3036 offset = (current_frame_info.total_size
3037 + 16 - crtl->args.pretend_args_size);
3038 break;
3040 default:
3041 gcc_unreachable ();
3043 break;
3045 default:
3046 gcc_unreachable ();
3049 return offset;
3052 /* If there are more than a trivial number of register spills, we use
3053 two interleaved iterators so that we can get two memory references
3054 per insn group.
3056 In order to simplify things in the prologue and epilogue expanders,
3057 we use helper functions to fix up the memory references after the
3058 fact with the appropriate offsets to a POST_MODIFY memory mode.
3059 The following data structure tracks the state of the two iterators
3060 while insns are being emitted. */
3062 struct spill_fill_data
3064 rtx_insn *init_after; /* point at which to emit initializations */
3065 rtx init_reg[2]; /* initial base register */
3066 rtx iter_reg[2]; /* the iterator registers */
3067 rtx *prev_addr[2]; /* address of last memory use */
3068 rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */
3069 HOST_WIDE_INT prev_off[2]; /* last offset */
3070 int n_iter; /* number of iterators in use */
3071 int next_iter; /* next iterator to use */
3072 unsigned int save_gr_used_mask;
3075 static struct spill_fill_data spill_fill_data;
3077 static void
3078 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3080 int i;
3082 spill_fill_data.init_after = get_last_insn ();
3083 spill_fill_data.init_reg[0] = init_reg;
3084 spill_fill_data.init_reg[1] = init_reg;
3085 spill_fill_data.prev_addr[0] = NULL;
3086 spill_fill_data.prev_addr[1] = NULL;
3087 spill_fill_data.prev_insn[0] = NULL;
3088 spill_fill_data.prev_insn[1] = NULL;
3089 spill_fill_data.prev_off[0] = cfa_off;
3090 spill_fill_data.prev_off[1] = cfa_off;
3091 spill_fill_data.next_iter = 0;
3092 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3094 spill_fill_data.n_iter = 1 + (n_spills > 2);
3095 for (i = 0; i < spill_fill_data.n_iter; ++i)
3097 int regno = next_scratch_gr_reg ();
3098 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3099 current_frame_info.gr_used_mask |= 1 << regno;
3103 static void
3104 finish_spill_pointers (void)
3106 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3109 static rtx
3110 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3112 int iter = spill_fill_data.next_iter;
3113 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3114 rtx disp_rtx = GEN_INT (disp);
3115 rtx mem;
3117 if (spill_fill_data.prev_addr[iter])
3119 if (satisfies_constraint_N (disp_rtx))
3121 *spill_fill_data.prev_addr[iter]
3122 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3123 gen_rtx_PLUS (DImode,
3124 spill_fill_data.iter_reg[iter],
3125 disp_rtx));
3126 add_reg_note (spill_fill_data.prev_insn[iter],
3127 REG_INC, spill_fill_data.iter_reg[iter]);
3129 else
3131 /* ??? Could use register post_modify for loads. */
3132 if (!satisfies_constraint_I (disp_rtx))
3134 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3135 emit_move_insn (tmp, disp_rtx);
3136 disp_rtx = tmp;
3138 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3139 spill_fill_data.iter_reg[iter], disp_rtx));
3142 /* Micro-optimization: if we've created a frame pointer, it's at
3143 CFA 0, which may allow the real iterator to be initialized lower,
3144 slightly increasing parallelism. Also, if there are few saves
3145 it may eliminate the iterator entirely. */
3146 else if (disp == 0
3147 && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3148 && frame_pointer_needed)
3150 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3151 set_mem_alias_set (mem, get_varargs_alias_set ());
3152 return mem;
3154 else
3156 rtx seq;
3157 rtx_insn *insn;
3159 if (disp == 0)
3160 seq = gen_movdi (spill_fill_data.iter_reg[iter],
3161 spill_fill_data.init_reg[iter]);
3162 else
3164 start_sequence ();
3166 if (!satisfies_constraint_I (disp_rtx))
3168 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3169 emit_move_insn (tmp, disp_rtx);
3170 disp_rtx = tmp;
3173 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3174 spill_fill_data.init_reg[iter],
3175 disp_rtx));
3177 seq = get_insns ();
3178 end_sequence ();
3181 /* Careful for being the first insn in a sequence. */
3182 if (spill_fill_data.init_after)
3183 insn = emit_insn_after (seq, spill_fill_data.init_after);
3184 else
3186 rtx_insn *first = get_insns ();
3187 if (first)
3188 insn = emit_insn_before (seq, first);
3189 else
3190 insn = emit_insn (seq);
3192 spill_fill_data.init_after = insn;
3195 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3197 /* ??? Not all of the spills are for varargs, but some of them are.
3198 The rest of the spills belong in an alias set of their own. But
3199 it doesn't actually hurt to include them here. */
3200 set_mem_alias_set (mem, get_varargs_alias_set ());
3202 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3203 spill_fill_data.prev_off[iter] = cfa_off;
3205 if (++iter >= spill_fill_data.n_iter)
3206 iter = 0;
3207 spill_fill_data.next_iter = iter;
3209 return mem;
3212 static void
3213 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3214 rtx frame_reg)
3216 int iter = spill_fill_data.next_iter;
3217 rtx mem;
3218 rtx_insn *insn;
3220 mem = spill_restore_mem (reg, cfa_off);
3221 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3222 spill_fill_data.prev_insn[iter] = insn;
3224 if (frame_reg)
3226 rtx base;
3227 HOST_WIDE_INT off;
3229 RTX_FRAME_RELATED_P (insn) = 1;
3231 /* Don't even pretend that the unwind code can intuit its way
3232 through a pair of interleaved post_modify iterators. Just
3233 provide the correct answer. */
3235 if (frame_pointer_needed)
3237 base = hard_frame_pointer_rtx;
3238 off = - cfa_off;
3240 else
3242 base = stack_pointer_rtx;
3243 off = current_frame_info.total_size - cfa_off;
3246 add_reg_note (insn, REG_CFA_OFFSET,
3247 gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
3248 plus_constant (Pmode,
3249 base, off)),
3250 frame_reg));
3254 static void
3255 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3257 int iter = spill_fill_data.next_iter;
3258 rtx_insn *insn;
3260 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3261 GEN_INT (cfa_off)));
3262 spill_fill_data.prev_insn[iter] = insn;
3265 /* Wrapper functions that discards the CONST_INT spill offset. These
3266 exist so that we can give gr_spill/gr_fill the offset they need and
3267 use a consistent function interface. */
3269 static rtx
3270 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3272 return gen_movdi (dest, src);
3275 static rtx
3276 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3278 return gen_fr_spill (dest, src);
3281 static rtx
3282 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3284 return gen_fr_restore (dest, src);
3287 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3289 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */
3290 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3292 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3293 inclusive. These are offsets from the current stack pointer. BS_SIZE
3294 is the size of the backing store. ??? This clobbers r2 and r3. */
3296 static void
3297 ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3298 int bs_size)
3300 rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3301 rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
3302 rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3304 /* On the IA-64 there is a second stack in memory, namely the Backing Store
3305 of the Register Stack Engine. We also need to probe it after checking
3306 that the 2 stacks don't overlap. */
3307 emit_insn (gen_bsp_value (r3));
3308 emit_move_insn (r2, GEN_INT (-(first + size)));
3310 /* Compare current value of BSP and SP registers. */
3311 emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
3312 r3, stack_pointer_rtx)));
3314 /* Compute the address of the probe for the Backing Store (which grows
3315 towards higher addresses). We probe only at the first offset of
3316 the next page because some OS (eg Linux/ia64) only extend the
3317 backing store when this specific address is hit (but generate a SEGV
3318 on other address). Page size is the worst case (4KB). The reserve
3319 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3320 Also compute the address of the last probe for the memory stack
3321 (which grows towards lower addresses). */
3322 emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
3323 emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3325 /* Compare them and raise SEGV if the former has topped the latter. */
3326 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3327 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3328 gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
3329 r3, r2))));
3330 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
3331 const0_rtx),
3332 const0_rtx));
3333 emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3334 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3335 gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3336 GEN_INT (11))));
3338 /* Probe the Backing Store if necessary. */
3339 if (bs_size > 0)
3340 emit_stack_probe (r3);
3342 /* Probe the memory stack if necessary. */
3343 if (size == 0)
3346 /* See if we have a constant small number of probes to generate. If so,
3347 that's the easy case. */
3348 else if (size <= PROBE_INTERVAL)
3349 emit_stack_probe (r2);
3351 /* The run-time loop is made up of 9 insns in the generic case while this
3352 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */
3353 else if (size <= 4 * PROBE_INTERVAL)
3355 HOST_WIDE_INT i;
3357 emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
3358 emit_insn (gen_rtx_SET (r2,
3359 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3360 emit_stack_probe (r2);
3362 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3363 it exceeds SIZE. If only two probes are needed, this will not
3364 generate any code. Then probe at FIRST + SIZE. */
3365 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3367 emit_insn (gen_rtx_SET (r2,
3368 plus_constant (Pmode, r2, -PROBE_INTERVAL)));
3369 emit_stack_probe (r2);
3372 emit_insn (gen_rtx_SET (r2,
3373 plus_constant (Pmode, r2,
3374 (i - PROBE_INTERVAL) - size)));
3375 emit_stack_probe (r2);
3378 /* Otherwise, do the same as above, but in a loop. Note that we must be
3379 extra careful with variables wrapping around because we might be at
3380 the very top (or the very bottom) of the address space and we have
3381 to be able to handle this case properly; in particular, we use an
3382 equality test for the loop condition. */
3383 else
3385 HOST_WIDE_INT rounded_size;
3387 emit_move_insn (r2, GEN_INT (-first));
3390 /* Step 1: round SIZE to the previous multiple of the interval. */
3392 rounded_size = size & -PROBE_INTERVAL;
3395 /* Step 2: compute initial and final value of the loop counter. */
3397 /* TEST_ADDR = SP + FIRST. */
3398 emit_insn (gen_rtx_SET (r2,
3399 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3401 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
3402 if (rounded_size > (1 << 21))
3404 emit_move_insn (r3, GEN_INT (-rounded_size));
3405 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
3407 else
3408 emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
3409 GEN_INT (-rounded_size))));
3412 /* Step 3: the loop
3416 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3417 probe at TEST_ADDR
3419 while (TEST_ADDR != LAST_ADDR)
3421 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3422 until it is equal to ROUNDED_SIZE. */
3424 emit_insn (gen_probe_stack_range (r2, r2, r3));
3427 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3428 that SIZE is equal to ROUNDED_SIZE. */
3430 /* TEMP = SIZE - ROUNDED_SIZE. */
3431 if (size != rounded_size)
3433 emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
3434 rounded_size - size)));
3435 emit_stack_probe (r2);
3439 /* Make sure nothing is scheduled before we are done. */
3440 emit_insn (gen_blockage ());
3443 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3444 absolute addresses. */
3446 const char *
3447 output_probe_stack_range (rtx reg1, rtx reg2)
3449 static int labelno = 0;
3450 char loop_lab[32];
3451 rtx xops[3];
3453 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
3455 /* Loop. */
3456 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3458 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3459 xops[0] = reg1;
3460 xops[1] = GEN_INT (-PROBE_INTERVAL);
3461 output_asm_insn ("addl %0 = %1, %0", xops);
3462 fputs ("\t;;\n", asm_out_file);
3464 /* Probe at TEST_ADDR. */
3465 output_asm_insn ("probe.w.fault %0, 0", xops);
3467 /* Test if TEST_ADDR == LAST_ADDR. */
3468 xops[1] = reg2;
3469 xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3470 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3472 /* Branch. */
3473 fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]);
3474 assemble_name_raw (asm_out_file, loop_lab);
3475 fputc ('\n', asm_out_file);
3477 return "";
3480 /* Called after register allocation to add any instructions needed for the
3481 prologue. Using a prologue insn is favored compared to putting all of the
3482 instructions in output_function_prologue(), since it allows the scheduler
3483 to intermix instructions with the saves of the caller saved registers. In
3484 some cases, it might be necessary to emit a barrier instruction as the last
3485 insn to prevent such scheduling.
3487 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3488 so that the debug info generation code can handle them properly.
3490 The register save area is laid out like so:
3491 cfa+16
3492 [ varargs spill area ]
3493 [ fr register spill area ]
3494 [ br register spill area ]
3495 [ ar register spill area ]
3496 [ pr register spill area ]
3497 [ gr register spill area ] */
3499 /* ??? Get inefficient code when the frame size is larger than can fit in an
3500 adds instruction. */
3502 void
3503 ia64_expand_prologue (void)
3505 rtx_insn *insn;
3506 rtx ar_pfs_save_reg, ar_unat_save_reg;
3507 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3508 rtx reg, alt_reg;
3510 ia64_compute_frame_size (get_frame_size ());
3511 last_scratch_gr_reg = 15;
3513 if (flag_stack_usage_info)
3514 current_function_static_stack_size = current_frame_info.total_size;
3516 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
3517 || flag_stack_clash_protection)
3519 HOST_WIDE_INT size = current_frame_info.total_size;
3520 int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3521 + current_frame_info.n_local_regs);
3523 if (crtl->is_leaf && !cfun->calls_alloca)
3525 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
3526 ia64_emit_probe_stack_range (get_stack_check_protect (),
3527 size - get_stack_check_protect (),
3528 bs_size);
3529 else if (size + bs_size > get_stack_check_protect ())
3530 ia64_emit_probe_stack_range (get_stack_check_protect (),
3531 0, bs_size);
3533 else if (size + bs_size > 0)
3534 ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
3537 if (dump_file)
3539 fprintf (dump_file, "ia64 frame related registers "
3540 "recorded in current_frame_info.r[]:\n");
3541 #define PRINTREG(a) if (current_frame_info.r[a]) \
3542 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3543 PRINTREG(reg_fp);
3544 PRINTREG(reg_save_b0);
3545 PRINTREG(reg_save_pr);
3546 PRINTREG(reg_save_ar_pfs);
3547 PRINTREG(reg_save_ar_unat);
3548 PRINTREG(reg_save_ar_lc);
3549 PRINTREG(reg_save_gp);
3550 #undef PRINTREG
3553 /* If there is no epilogue, then we don't need some prologue insns.
3554 We need to avoid emitting the dead prologue insns, because flow
3555 will complain about them. */
3556 if (optimize)
3558 edge e;
3559 edge_iterator ei;
3561 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3562 if ((e->flags & EDGE_FAKE) == 0
3563 && (e->flags & EDGE_FALLTHRU) != 0)
3564 break;
3565 epilogue_p = (e != NULL);
3567 else
3568 epilogue_p = 1;
3570 /* Set the local, input, and output register names. We need to do this
3571 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3572 half. If we use in/loc/out register names, then we get assembler errors
3573 in crtn.S because there is no alloc insn or regstk directive in there. */
3574 if (! TARGET_REG_NAMES)
3576 int inputs = current_frame_info.n_input_regs;
3577 int locals = current_frame_info.n_local_regs;
3578 int outputs = current_frame_info.n_output_regs;
3580 for (i = 0; i < inputs; i++)
3581 reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3582 for (i = 0; i < locals; i++)
3583 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3584 for (i = 0; i < outputs; i++)
3585 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3588 /* Set the frame pointer register name. The regnum is logically loc79,
3589 but of course we'll not have allocated that many locals. Rather than
3590 worrying about renumbering the existing rtxs, we adjust the name. */
3591 /* ??? This code means that we can never use one local register when
3592 there is a frame pointer. loc79 gets wasted in this case, as it is
3593 renamed to a register that will never be used. See also the try_locals
3594 code in find_gr_spill. */
3595 if (current_frame_info.r[reg_fp])
3597 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3598 reg_names[HARD_FRAME_POINTER_REGNUM]
3599 = reg_names[current_frame_info.r[reg_fp]];
3600 reg_names[current_frame_info.r[reg_fp]] = tmp;
3603 /* We don't need an alloc instruction if we've used no outputs or locals. */
3604 if (current_frame_info.n_local_regs == 0
3605 && current_frame_info.n_output_regs == 0
3606 && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3607 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3609 /* If there is no alloc, but there are input registers used, then we
3610 need a .regstk directive. */
3611 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3612 ar_pfs_save_reg = NULL_RTX;
3614 else
3616 current_frame_info.need_regstk = 0;
3618 if (current_frame_info.r[reg_save_ar_pfs])
3620 regno = current_frame_info.r[reg_save_ar_pfs];
3621 reg_emitted (reg_save_ar_pfs);
3623 else
3624 regno = next_scratch_gr_reg ();
3625 ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3627 insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3628 GEN_INT (current_frame_info.n_input_regs),
3629 GEN_INT (current_frame_info.n_local_regs),
3630 GEN_INT (current_frame_info.n_output_regs),
3631 GEN_INT (current_frame_info.n_rotate_regs)));
3632 if (current_frame_info.r[reg_save_ar_pfs])
3634 RTX_FRAME_RELATED_P (insn) = 1;
3635 add_reg_note (insn, REG_CFA_REGISTER,
3636 gen_rtx_SET (ar_pfs_save_reg,
3637 gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3641 /* Set up frame pointer, stack pointer, and spill iterators. */
3643 n_varargs = cfun->machine->n_varargs;
3644 setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3645 stack_pointer_rtx, 0);
3647 if (frame_pointer_needed)
3649 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3650 RTX_FRAME_RELATED_P (insn) = 1;
3652 /* Force the unwind info to recognize this as defining a new CFA,
3653 rather than some temp register setup. */
3654 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3657 if (current_frame_info.total_size != 0)
3659 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3660 rtx offset;
3662 if (satisfies_constraint_I (frame_size_rtx))
3663 offset = frame_size_rtx;
3664 else
3666 regno = next_scratch_gr_reg ();
3667 offset = gen_rtx_REG (DImode, regno);
3668 emit_move_insn (offset, frame_size_rtx);
3671 insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3672 stack_pointer_rtx, offset));
3674 if (! frame_pointer_needed)
3676 RTX_FRAME_RELATED_P (insn) = 1;
3677 add_reg_note (insn, REG_CFA_ADJUST_CFA,
3678 gen_rtx_SET (stack_pointer_rtx,
3679 gen_rtx_PLUS (DImode,
3680 stack_pointer_rtx,
3681 frame_size_rtx)));
3684 /* ??? At this point we must generate a magic insn that appears to
3685 modify the stack pointer, the frame pointer, and all spill
3686 iterators. This would allow the most scheduling freedom. For
3687 now, just hard stop. */
3688 emit_insn (gen_blockage ());
3691 /* Must copy out ar.unat before doing any integer spills. */
3692 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3694 if (current_frame_info.r[reg_save_ar_unat])
3696 ar_unat_save_reg
3697 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3698 reg_emitted (reg_save_ar_unat);
3700 else
3702 alt_regno = next_scratch_gr_reg ();
3703 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3704 current_frame_info.gr_used_mask |= 1 << alt_regno;
3707 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3708 insn = emit_move_insn (ar_unat_save_reg, reg);
3709 if (current_frame_info.r[reg_save_ar_unat])
3711 RTX_FRAME_RELATED_P (insn) = 1;
3712 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3715 /* Even if we're not going to generate an epilogue, we still
3716 need to save the register so that EH works. */
3717 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3718 emit_insn (gen_prologue_use (ar_unat_save_reg));
3720 else
3721 ar_unat_save_reg = NULL_RTX;
3723 /* Spill all varargs registers. Do this before spilling any GR registers,
3724 since we want the UNAT bits for the GR registers to override the UNAT
3725 bits from varargs, which we don't care about. */
3727 cfa_off = -16;
3728 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3730 reg = gen_rtx_REG (DImode, regno);
3731 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3734 /* Locate the bottom of the register save area. */
3735 cfa_off = (current_frame_info.spill_cfa_off
3736 + current_frame_info.spill_size
3737 + current_frame_info.extra_spill_size);
3739 /* Save the predicate register block either in a register or in memory. */
3740 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3742 reg = gen_rtx_REG (DImode, PR_REG (0));
3743 if (current_frame_info.r[reg_save_pr] != 0)
3745 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3746 reg_emitted (reg_save_pr);
3747 insn = emit_move_insn (alt_reg, reg);
3749 /* ??? Denote pr spill/fill by a DImode move that modifies all
3750 64 hard registers. */
3751 RTX_FRAME_RELATED_P (insn) = 1;
3752 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3754 /* Even if we're not going to generate an epilogue, we still
3755 need to save the register so that EH works. */
3756 if (! epilogue_p)
3757 emit_insn (gen_prologue_use (alt_reg));
3759 else
3761 alt_regno = next_scratch_gr_reg ();
3762 alt_reg = gen_rtx_REG (DImode, alt_regno);
3763 insn = emit_move_insn (alt_reg, reg);
3764 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3765 cfa_off -= 8;
3769 /* Handle AR regs in numerical order. All of them get special handling. */
3770 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3771 && current_frame_info.r[reg_save_ar_unat] == 0)
3773 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3774 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3775 cfa_off -= 8;
3778 /* The alloc insn already copied ar.pfs into a general register. The
3779 only thing we have to do now is copy that register to a stack slot
3780 if we'd not allocated a local register for the job. */
3781 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3782 && current_frame_info.r[reg_save_ar_pfs] == 0)
3784 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3785 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3786 cfa_off -= 8;
3789 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3791 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3792 if (current_frame_info.r[reg_save_ar_lc] != 0)
3794 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3795 reg_emitted (reg_save_ar_lc);
3796 insn = emit_move_insn (alt_reg, reg);
3797 RTX_FRAME_RELATED_P (insn) = 1;
3798 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3800 /* Even if we're not going to generate an epilogue, we still
3801 need to save the register so that EH works. */
3802 if (! epilogue_p)
3803 emit_insn (gen_prologue_use (alt_reg));
3805 else
3807 alt_regno = next_scratch_gr_reg ();
3808 alt_reg = gen_rtx_REG (DImode, alt_regno);
3809 emit_move_insn (alt_reg, reg);
3810 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3811 cfa_off -= 8;
3815 /* Save the return pointer. */
3816 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3818 reg = gen_rtx_REG (DImode, BR_REG (0));
3819 if (current_frame_info.r[reg_save_b0] != 0)
3821 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3822 reg_emitted (reg_save_b0);
3823 insn = emit_move_insn (alt_reg, reg);
3824 RTX_FRAME_RELATED_P (insn) = 1;
3825 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
3827 /* Even if we're not going to generate an epilogue, we still
3828 need to save the register so that EH works. */
3829 if (! epilogue_p)
3830 emit_insn (gen_prologue_use (alt_reg));
3832 else
3834 alt_regno = next_scratch_gr_reg ();
3835 alt_reg = gen_rtx_REG (DImode, alt_regno);
3836 emit_move_insn (alt_reg, reg);
3837 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3838 cfa_off -= 8;
3842 if (current_frame_info.r[reg_save_gp])
3844 reg_emitted (reg_save_gp);
3845 insn = emit_move_insn (gen_rtx_REG (DImode,
3846 current_frame_info.r[reg_save_gp]),
3847 pic_offset_table_rtx);
3850 /* We should now be at the base of the gr/br/fr spill area. */
3851 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3852 + current_frame_info.spill_size));
3854 /* Spill all general registers. */
3855 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3856 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3858 reg = gen_rtx_REG (DImode, regno);
3859 do_spill (gen_gr_spill, reg, cfa_off, reg);
3860 cfa_off -= 8;
3863 /* Spill the rest of the BR registers. */
3864 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3865 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3867 alt_regno = next_scratch_gr_reg ();
3868 alt_reg = gen_rtx_REG (DImode, alt_regno);
3869 reg = gen_rtx_REG (DImode, regno);
3870 emit_move_insn (alt_reg, reg);
3871 do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3872 cfa_off -= 8;
3875 /* Align the frame and spill all FR registers. */
3876 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3877 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3879 gcc_assert (!(cfa_off & 15));
3880 reg = gen_rtx_REG (XFmode, regno);
3881 do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3882 cfa_off -= 16;
3885 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3887 finish_spill_pointers ();
3890 /* Output the textual info surrounding the prologue. */
3892 void
3893 ia64_start_function (FILE *file, const char *fnname,
3894 tree decl ATTRIBUTE_UNUSED)
3896 #if TARGET_ABI_OPEN_VMS
3897 vms_start_function (fnname);
3898 #endif
3900 fputs ("\t.proc ", file);
3901 assemble_name (file, fnname);
3902 fputc ('\n', file);
3903 ASM_OUTPUT_FUNCTION_LABEL (file, fnname, decl);
3906 /* Called after register allocation to add any instructions needed for the
3907 epilogue. Using an epilogue insn is favored compared to putting all of the
3908 instructions in output_function_prologue(), since it allows the scheduler
3909 to intermix instructions with the saves of the caller saved registers. In
3910 some cases, it might be necessary to emit a barrier instruction as the last
3911 insn to prevent such scheduling. */
3913 void
3914 ia64_expand_epilogue (int sibcall_p)
3916 rtx_insn *insn;
3917 rtx reg, alt_reg, ar_unat_save_reg;
3918 int regno, alt_regno, cfa_off;
3920 ia64_compute_frame_size (get_frame_size ());
3922 /* If there is a frame pointer, then we use it instead of the stack
3923 pointer, so that the stack pointer does not need to be valid when
3924 the epilogue starts. See EXIT_IGNORE_STACK. */
3925 if (frame_pointer_needed)
3926 setup_spill_pointers (current_frame_info.n_spilled,
3927 hard_frame_pointer_rtx, 0);
3928 else
3929 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3930 current_frame_info.total_size);
3932 if (current_frame_info.total_size != 0)
3934 /* ??? At this point we must generate a magic insn that appears to
3935 modify the spill iterators and the frame pointer. This would
3936 allow the most scheduling freedom. For now, just hard stop. */
3937 emit_insn (gen_blockage ());
3940 /* Locate the bottom of the register save area. */
3941 cfa_off = (current_frame_info.spill_cfa_off
3942 + current_frame_info.spill_size
3943 + current_frame_info.extra_spill_size);
3945 /* Restore the predicate registers. */
3946 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3948 if (current_frame_info.r[reg_save_pr] != 0)
3950 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3951 reg_emitted (reg_save_pr);
3953 else
3955 alt_regno = next_scratch_gr_reg ();
3956 alt_reg = gen_rtx_REG (DImode, alt_regno);
3957 do_restore (gen_movdi_x, alt_reg, cfa_off);
3958 cfa_off -= 8;
3960 reg = gen_rtx_REG (DImode, PR_REG (0));
3961 emit_move_insn (reg, alt_reg);
3964 /* Restore the application registers. */
3966 /* Load the saved unat from the stack, but do not restore it until
3967 after the GRs have been restored. */
3968 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3970 if (current_frame_info.r[reg_save_ar_unat] != 0)
3972 ar_unat_save_reg
3973 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3974 reg_emitted (reg_save_ar_unat);
3976 else
3978 alt_regno = next_scratch_gr_reg ();
3979 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3980 current_frame_info.gr_used_mask |= 1 << alt_regno;
3981 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3982 cfa_off -= 8;
3985 else
3986 ar_unat_save_reg = NULL_RTX;
3988 if (current_frame_info.r[reg_save_ar_pfs] != 0)
3990 reg_emitted (reg_save_ar_pfs);
3991 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3992 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3993 emit_move_insn (reg, alt_reg);
3995 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3997 alt_regno = next_scratch_gr_reg ();
3998 alt_reg = gen_rtx_REG (DImode, alt_regno);
3999 do_restore (gen_movdi_x, alt_reg, cfa_off);
4000 cfa_off -= 8;
4001 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
4002 emit_move_insn (reg, alt_reg);
4005 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
4007 if (current_frame_info.r[reg_save_ar_lc] != 0)
4009 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
4010 reg_emitted (reg_save_ar_lc);
4012 else
4014 alt_regno = next_scratch_gr_reg ();
4015 alt_reg = gen_rtx_REG (DImode, alt_regno);
4016 do_restore (gen_movdi_x, alt_reg, cfa_off);
4017 cfa_off -= 8;
4019 reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
4020 emit_move_insn (reg, alt_reg);
4023 /* Restore the return pointer. */
4024 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4026 if (current_frame_info.r[reg_save_b0] != 0)
4028 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4029 reg_emitted (reg_save_b0);
4031 else
4033 alt_regno = next_scratch_gr_reg ();
4034 alt_reg = gen_rtx_REG (DImode, alt_regno);
4035 do_restore (gen_movdi_x, alt_reg, cfa_off);
4036 cfa_off -= 8;
4038 reg = gen_rtx_REG (DImode, BR_REG (0));
4039 emit_move_insn (reg, alt_reg);
4042 /* We should now be at the base of the gr/br/fr spill area. */
4043 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4044 + current_frame_info.spill_size));
4046 /* The GP may be stored on the stack in the prologue, but it's
4047 never restored in the epilogue. Skip the stack slot. */
4048 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4049 cfa_off -= 8;
4051 /* Restore all general registers. */
4052 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
4053 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4055 reg = gen_rtx_REG (DImode, regno);
4056 do_restore (gen_gr_restore, reg, cfa_off);
4057 cfa_off -= 8;
4060 /* Restore the branch registers. */
4061 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4062 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4064 alt_regno = next_scratch_gr_reg ();
4065 alt_reg = gen_rtx_REG (DImode, alt_regno);
4066 do_restore (gen_movdi_x, alt_reg, cfa_off);
4067 cfa_off -= 8;
4068 reg = gen_rtx_REG (DImode, regno);
4069 emit_move_insn (reg, alt_reg);
4072 /* Restore floating point registers. */
4073 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4074 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4076 gcc_assert (!(cfa_off & 15));
4077 reg = gen_rtx_REG (XFmode, regno);
4078 do_restore (gen_fr_restore_x, reg, cfa_off);
4079 cfa_off -= 16;
4082 /* Restore ar.unat for real. */
4083 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4085 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4086 emit_move_insn (reg, ar_unat_save_reg);
4089 gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
4091 finish_spill_pointers ();
4093 if (current_frame_info.total_size
4094 || cfun->machine->ia64_eh_epilogue_sp
4095 || frame_pointer_needed)
4097 /* ??? At this point we must generate a magic insn that appears to
4098 modify the spill iterators, the stack pointer, and the frame
4099 pointer. This would allow the most scheduling freedom. For now,
4100 just hard stop. */
4101 emit_insn (gen_blockage ());
4104 if (cfun->machine->ia64_eh_epilogue_sp)
4105 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4106 else if (frame_pointer_needed)
4108 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4109 RTX_FRAME_RELATED_P (insn) = 1;
4110 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
4112 else if (current_frame_info.total_size)
4114 rtx offset, frame_size_rtx;
4116 frame_size_rtx = GEN_INT (current_frame_info.total_size);
4117 if (satisfies_constraint_I (frame_size_rtx))
4118 offset = frame_size_rtx;
4119 else
4121 regno = next_scratch_gr_reg ();
4122 offset = gen_rtx_REG (DImode, regno);
4123 emit_move_insn (offset, frame_size_rtx);
4126 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4127 offset));
4129 RTX_FRAME_RELATED_P (insn) = 1;
4130 add_reg_note (insn, REG_CFA_ADJUST_CFA,
4131 gen_rtx_SET (stack_pointer_rtx,
4132 gen_rtx_PLUS (DImode,
4133 stack_pointer_rtx,
4134 frame_size_rtx)));
4137 if (cfun->machine->ia64_eh_epilogue_bsp)
4138 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
4140 if (! sibcall_p)
4141 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
4142 else
4144 int fp = GR_REG (2);
4145 /* We need a throw away register here, r0 and r1 are reserved,
4146 so r2 is the first available call clobbered register. If
4147 there was a frame_pointer register, we may have swapped the
4148 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4149 sure we're using the string "r2" when emitting the register
4150 name for the assembler. */
4151 if (current_frame_info.r[reg_fp]
4152 && current_frame_info.r[reg_fp] == GR_REG (2))
4153 fp = HARD_FRAME_POINTER_REGNUM;
4155 /* We must emit an alloc to force the input registers to become output
4156 registers. Otherwise, if the callee tries to pass its parameters
4157 through to another call without an intervening alloc, then these
4158 values get lost. */
4159 /* ??? We don't need to preserve all input registers. We only need to
4160 preserve those input registers used as arguments to the sibling call.
4161 It is unclear how to compute that number here. */
4162 if (current_frame_info.n_input_regs != 0)
4164 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
4166 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4167 const0_rtx, const0_rtx,
4168 n_inputs, const0_rtx));
4169 RTX_FRAME_RELATED_P (insn) = 1;
4171 /* ??? We need to mark the alloc as frame-related so that it gets
4172 passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4173 But there's nothing dwarf2 related to be done wrt the register
4174 windows. If we do nothing, dwarf2out will abort on the UNSPEC;
4175 the empty parallel means dwarf2out will not see anything. */
4176 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4177 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
4182 /* Return 1 if br.ret can do all the work required to return from a
4183 function. */
4186 ia64_direct_return (void)
4188 if (reload_completed && ! frame_pointer_needed)
4190 ia64_compute_frame_size (get_frame_size ());
4192 return (current_frame_info.total_size == 0
4193 && current_frame_info.n_spilled == 0
4194 && current_frame_info.r[reg_save_b0] == 0
4195 && current_frame_info.r[reg_save_pr] == 0
4196 && current_frame_info.r[reg_save_ar_pfs] == 0
4197 && current_frame_info.r[reg_save_ar_unat] == 0
4198 && current_frame_info.r[reg_save_ar_lc] == 0);
4200 return 0;
4203 /* Return the magic cookie that we use to hold the return address
4204 during early compilation. */
4207 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
4209 if (count != 0)
4210 return NULL;
4211 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4214 /* Split this value after reload, now that we know where the return
4215 address is saved. */
4217 void
4218 ia64_split_return_addr_rtx (rtx dest)
4220 rtx src;
4222 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4224 if (current_frame_info.r[reg_save_b0] != 0)
4226 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4227 reg_emitted (reg_save_b0);
4229 else
4231 HOST_WIDE_INT off;
4232 unsigned int regno;
4233 rtx off_r;
4235 /* Compute offset from CFA for BR0. */
4236 /* ??? Must be kept in sync with ia64_expand_prologue. */
4237 off = (current_frame_info.spill_cfa_off
4238 + current_frame_info.spill_size);
4239 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4240 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4241 off -= 8;
4243 /* Convert CFA offset to a register based offset. */
4244 if (frame_pointer_needed)
4245 src = hard_frame_pointer_rtx;
4246 else
4248 src = stack_pointer_rtx;
4249 off += current_frame_info.total_size;
4252 /* Load address into scratch register. */
4253 off_r = GEN_INT (off);
4254 if (satisfies_constraint_I (off_r))
4255 emit_insn (gen_adddi3 (dest, src, off_r));
4256 else
4258 emit_move_insn (dest, off_r);
4259 emit_insn (gen_adddi3 (dest, src, dest));
4262 src = gen_rtx_MEM (Pmode, dest);
4265 else
4266 src = gen_rtx_REG (DImode, BR_REG (0));
4268 emit_move_insn (dest, src);
4272 ia64_hard_regno_rename_ok (int from, int to)
4274 /* Don't clobber any of the registers we reserved for the prologue. */
4275 unsigned int r;
4277 for (r = reg_fp; r <= reg_save_ar_lc; r++)
4278 if (to == current_frame_info.r[r]
4279 || from == current_frame_info.r[r]
4280 || to == emitted_frame_related_regs[r]
4281 || from == emitted_frame_related_regs[r])
4282 return 0;
4284 /* Don't use output registers outside the register frame. */
4285 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4286 return 0;
4288 /* Retain even/oddness on predicate register pairs. */
4289 if (PR_REGNO_P (from) && PR_REGNO_P (to))
4290 return (from & 1) == (to & 1);
4292 return 1;
4295 /* Implement TARGET_HARD_REGNO_NREGS.
4297 ??? We say that BImode PR values require two registers. This allows us to
4298 easily store the normal and inverted values. We use CCImode to indicate
4299 a single predicate register. */
4301 static unsigned int
4302 ia64_hard_regno_nregs (unsigned int regno, machine_mode mode)
4304 if (regno == PR_REG (0) && mode == DImode)
4305 return 64;
4306 if (PR_REGNO_P (regno) && (mode) == BImode)
4307 return 2;
4308 if ((PR_REGNO_P (regno) || GR_REGNO_P (regno)) && mode == CCImode)
4309 return 1;
4310 if (FR_REGNO_P (regno) && mode == XFmode)
4311 return 1;
4312 if (FR_REGNO_P (regno) && mode == RFmode)
4313 return 1;
4314 if (FR_REGNO_P (regno) && mode == XCmode)
4315 return 2;
4316 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
4319 /* Implement TARGET_HARD_REGNO_MODE_OK. */
4321 static bool
4322 ia64_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
4324 if (FR_REGNO_P (regno))
4325 return (GET_MODE_CLASS (mode) != MODE_CC
4326 && mode != BImode
4327 && mode != TFmode);
4329 if (PR_REGNO_P (regno))
4330 return mode == BImode || GET_MODE_CLASS (mode) == MODE_CC;
4332 if (GR_REGNO_P (regno))
4333 return mode != XFmode && mode != XCmode && mode != RFmode;
4335 if (AR_REGNO_P (regno))
4336 return mode == DImode;
4338 if (BR_REGNO_P (regno))
4339 return mode == DImode;
4341 return false;
4344 /* Implement TARGET_MODES_TIEABLE_P.
4346 Don't tie integer and FP modes, as that causes us to get integer registers
4347 allocated for FP instructions. XFmode only supported in FP registers so
4348 we can't tie it with any other modes. */
4350 static bool
4351 ia64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
4353 return (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
4354 && ((mode1 == XFmode || mode1 == XCmode || mode1 == RFmode)
4355 == (mode2 == XFmode || mode2 == XCmode || mode2 == RFmode))
4356 && (mode1 == BImode) == (mode2 == BImode));
4359 /* Target hook for assembling integer objects. Handle word-sized
4360 aligned objects and detect the cases when @fptr is needed. */
4362 static bool
4363 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4365 if (size == POINTER_SIZE / BITS_PER_UNIT
4366 && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4367 && GET_CODE (x) == SYMBOL_REF
4368 && SYMBOL_REF_FUNCTION_P (x))
4370 static const char * const directive[2][2] = {
4371 /* 64-bit pointer */ /* 32-bit pointer */
4372 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */
4373 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */
4375 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4376 output_addr_const (asm_out_file, x);
4377 fputs (")\n", asm_out_file);
4378 return true;
4380 return default_assemble_integer (x, size, aligned_p);
4383 /* Emit the function prologue. */
4385 static void
4386 ia64_output_function_prologue (FILE *file)
4388 int mask, grsave, grsave_prev;
4390 if (current_frame_info.need_regstk)
4391 fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4392 current_frame_info.n_input_regs,
4393 current_frame_info.n_local_regs,
4394 current_frame_info.n_output_regs,
4395 current_frame_info.n_rotate_regs);
4397 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4398 return;
4400 /* Emit the .prologue directive. */
4402 mask = 0;
4403 grsave = grsave_prev = 0;
4404 if (current_frame_info.r[reg_save_b0] != 0)
4406 mask |= 8;
4407 grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4409 if (current_frame_info.r[reg_save_ar_pfs] != 0
4410 && (grsave_prev == 0
4411 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4413 mask |= 4;
4414 if (grsave_prev == 0)
4415 grsave = current_frame_info.r[reg_save_ar_pfs];
4416 grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4418 if (current_frame_info.r[reg_fp] != 0
4419 && (grsave_prev == 0
4420 || current_frame_info.r[reg_fp] == grsave_prev + 1))
4422 mask |= 2;
4423 if (grsave_prev == 0)
4424 grsave = HARD_FRAME_POINTER_REGNUM;
4425 grsave_prev = current_frame_info.r[reg_fp];
4427 if (current_frame_info.r[reg_save_pr] != 0
4428 && (grsave_prev == 0
4429 || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4431 mask |= 1;
4432 if (grsave_prev == 0)
4433 grsave = current_frame_info.r[reg_save_pr];
4436 if (mask && TARGET_GNU_AS)
4437 fprintf (file, "\t.prologue %d, %d\n", mask,
4438 ia64_debugger_regno (grsave));
4439 else
4440 fputs ("\t.prologue\n", file);
4442 /* Emit a .spill directive, if necessary, to relocate the base of
4443 the register spill area. */
4444 if (current_frame_info.spill_cfa_off != -16)
4445 fprintf (file, "\t.spill %ld\n",
4446 (long) (current_frame_info.spill_cfa_off
4447 + current_frame_info.spill_size));
4450 /* Emit the .body directive at the scheduled end of the prologue. */
4452 static void
4453 ia64_output_function_end_prologue (FILE *file)
4455 if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4456 return;
4458 fputs ("\t.body\n", file);
4461 /* Emit the function epilogue. */
4463 static void
4464 ia64_output_function_epilogue (FILE *)
4466 int i;
4468 if (current_frame_info.r[reg_fp])
4470 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4471 reg_names[HARD_FRAME_POINTER_REGNUM]
4472 = reg_names[current_frame_info.r[reg_fp]];
4473 reg_names[current_frame_info.r[reg_fp]] = tmp;
4474 reg_emitted (reg_fp);
4476 if (! TARGET_REG_NAMES)
4478 for (i = 0; i < current_frame_info.n_input_regs; i++)
4479 reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4480 for (i = 0; i < current_frame_info.n_local_regs; i++)
4481 reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4482 for (i = 0; i < current_frame_info.n_output_regs; i++)
4483 reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4486 current_frame_info.initialized = 0;
4490 ia64_debugger_regno (int regno)
4492 /* In ia64_expand_prologue we quite literally renamed the frame pointer
4493 from its home at loc79 to something inside the register frame. We
4494 must perform the same renumbering here for the debug info. */
4495 if (current_frame_info.r[reg_fp])
4497 if (regno == HARD_FRAME_POINTER_REGNUM)
4498 regno = current_frame_info.r[reg_fp];
4499 else if (regno == current_frame_info.r[reg_fp])
4500 regno = HARD_FRAME_POINTER_REGNUM;
4503 if (IN_REGNO_P (regno))
4504 return 32 + regno - IN_REG (0);
4505 else if (LOC_REGNO_P (regno))
4506 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4507 else if (OUT_REGNO_P (regno))
4508 return (32 + current_frame_info.n_input_regs
4509 + current_frame_info.n_local_regs + regno - OUT_REG (0));
4510 else
4511 return regno;
4514 /* Implement TARGET_TRAMPOLINE_INIT.
4516 The trampoline should set the static chain pointer to value placed
4517 into the trampoline and should branch to the specified routine.
4518 To make the normal indirect-subroutine calling convention work,
4519 the trampoline must look like a function descriptor; the first
4520 word being the target address and the second being the target's
4521 global pointer.
4523 We abuse the concept of a global pointer by arranging for it
4524 to point to the data we need to load. The complete trampoline
4525 has the following form:
4527 +-------------------+ \
4528 TRAMP: | __ia64_trampoline | |
4529 +-------------------+ > fake function descriptor
4530 | TRAMP+16 | |
4531 +-------------------+ /
4532 | target descriptor |
4533 +-------------------+
4534 | static link |
4535 +-------------------+
4538 static void
4539 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4541 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4542 rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4544 /* The Intel assembler requires that the global __ia64_trampoline symbol
4545 be declared explicitly */
4546 if (!TARGET_GNU_AS)
4548 static bool declared_ia64_trampoline = false;
4550 if (!declared_ia64_trampoline)
4552 declared_ia64_trampoline = true;
4553 (*targetm.asm_out.globalize_label) (asm_out_file,
4554 "__ia64_trampoline");
4558 /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4559 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4560 fnaddr = convert_memory_address (Pmode, fnaddr);
4561 static_chain = convert_memory_address (Pmode, static_chain);
4563 /* Load up our iterator. */
4564 addr_reg = copy_to_reg (addr);
4565 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4567 /* The first two words are the fake descriptor:
4568 __ia64_trampoline, ADDR+16. */
4569 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4570 if (TARGET_ABI_OPEN_VMS)
4572 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4573 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4574 relocation against function symbols to make it identical to the
4575 LTOFF_FPTR22 relocation. Emit the latter directly to stay within
4576 strict ELF and dereference to get the bare code address. */
4577 rtx reg = gen_reg_rtx (Pmode);
4578 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4579 emit_move_insn (reg, tramp);
4580 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4581 tramp = reg;
4583 emit_move_insn (m_tramp, tramp);
4584 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4585 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4587 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
4588 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4589 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4591 /* The third word is the target descriptor. */
4592 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4593 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4594 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4596 /* The fourth word is the static chain. */
4597 emit_move_insn (m_tramp, static_chain);
4600 /* Do any needed setup for a variadic function. CUM has not been updated
4601 for the last named argument, which is given by ARG.
4603 We generate the actual spill instructions during prologue generation. */
4605 static void
4606 ia64_setup_incoming_varargs (cumulative_args_t cum,
4607 const function_arg_info &arg,
4608 int *pretend_size,
4609 int second_time ATTRIBUTE_UNUSED)
4611 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4613 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)))
4614 /* Skip the current argument. */
4615 ia64_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4617 if (next_cum.words < MAX_ARGUMENT_SLOTS)
4619 int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4620 *pretend_size = n * UNITS_PER_WORD;
4621 cfun->machine->n_varargs = n;
4625 /* Check whether TYPE is a homogeneous floating point aggregate. If
4626 it is, return the mode of the floating point type that appears
4627 in all leafs. If it is not, return VOIDmode.
4629 An aggregate is a homogeneous floating point aggregate is if all
4630 fields/elements in it have the same floating point type (e.g,
4631 SFmode). 128-bit quad-precision floats are excluded.
4633 Variable sized aggregates should never arrive here, since we should
4634 have already decided to pass them by reference. Top-level zero-sized
4635 aggregates are excluded because our parallels crash the middle-end. */
4637 static machine_mode
4638 hfa_element_mode (const_tree type, bool nested)
4640 machine_mode element_mode = VOIDmode;
4641 machine_mode mode;
4642 enum tree_code code = TREE_CODE (type);
4643 int know_element_mode = 0;
4644 tree t;
4646 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4647 return VOIDmode;
4649 switch (code)
4651 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE:
4652 case BOOLEAN_TYPE: case POINTER_TYPE:
4653 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE:
4654 case LANG_TYPE: case FUNCTION_TYPE:
4655 return VOIDmode;
4657 /* Fortran complex types are supposed to be HFAs, so we need to handle
4658 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex
4659 types though. */
4660 case COMPLEX_TYPE:
4661 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4662 && TYPE_MODE (type) != TCmode)
4663 return GET_MODE_INNER (TYPE_MODE (type));
4664 else
4665 return VOIDmode;
4667 case REAL_TYPE:
4668 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4669 mode if this is contained within an aggregate. */
4670 if (nested && TYPE_MODE (type) != TFmode)
4671 return TYPE_MODE (type);
4672 else
4673 return VOIDmode;
4675 case ARRAY_TYPE:
4676 return hfa_element_mode (TREE_TYPE (type), 1);
4678 case RECORD_TYPE:
4679 case UNION_TYPE:
4680 case QUAL_UNION_TYPE:
4681 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4683 if (TREE_CODE (t) != FIELD_DECL || DECL_FIELD_ABI_IGNORED (t))
4684 continue;
4686 mode = hfa_element_mode (TREE_TYPE (t), 1);
4687 if (know_element_mode)
4689 if (mode != element_mode)
4690 return VOIDmode;
4692 else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4693 return VOIDmode;
4694 else
4696 know_element_mode = 1;
4697 element_mode = mode;
4700 return element_mode;
4702 default:
4703 /* If we reach here, we probably have some front-end specific type
4704 that the backend doesn't know about. This can happen via the
4705 aggregate_value_p call in init_function_start. All we can do is
4706 ignore unknown tree types. */
4707 return VOIDmode;
4710 return VOIDmode;
4713 /* Return the number of words required to hold a quantity of TYPE and MODE
4714 when passed as an argument. */
4715 static int
4716 ia64_function_arg_words (const_tree type, machine_mode mode)
4718 int words;
4720 if (mode == BLKmode)
4721 words = int_size_in_bytes (type);
4722 else
4723 words = GET_MODE_SIZE (mode);
4725 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */
4728 /* Return the number of registers that should be skipped so the current
4729 argument (described by TYPE and WORDS) will be properly aligned.
4731 Integer and float arguments larger than 8 bytes start at the next
4732 even boundary. Aggregates larger than 8 bytes start at the next
4733 even boundary if the aggregate has 16 byte alignment. Note that
4734 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4735 but are still to be aligned in registers.
4737 ??? The ABI does not specify how to handle aggregates with
4738 alignment from 9 to 15 bytes, or greater than 16. We handle them
4739 all as if they had 16 byte alignment. Such aggregates can occur
4740 only if gcc extensions are used. */
4741 static int
4742 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4743 const_tree type, int words)
4745 /* No registers are skipped on VMS. */
4746 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4747 return 0;
4749 if (type
4750 && TREE_CODE (type) != INTEGER_TYPE
4751 && TREE_CODE (type) != REAL_TYPE)
4752 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4753 else
4754 return words > 1;
4757 /* Return rtx for register where argument is passed, or zero if it is passed
4758 on the stack. */
4759 /* ??? 128-bit quad-precision floats are always passed in general
4760 registers. */
4762 static rtx
4763 ia64_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
4764 bool incoming)
4766 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4768 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4769 int words = ia64_function_arg_words (arg.type, arg.mode);
4770 int offset = ia64_function_arg_offset (cum, arg.type, words);
4771 machine_mode hfa_mode = VOIDmode;
4773 /* For OPEN VMS, emit the instruction setting up the argument register here,
4774 when we know this will be together with the other arguments setup related
4775 insns. This is not the conceptually best place to do this, but this is
4776 the easiest as we have convenient access to cumulative args info. */
4778 if (TARGET_ABI_OPEN_VMS && arg.end_marker_p ())
4780 unsigned HOST_WIDE_INT regval = cum->words;
4781 int i;
4783 for (i = 0; i < 8; i++)
4784 regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4786 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4787 GEN_INT (regval));
4790 /* If all argument slots are used, then it must go on the stack. */
4791 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4792 return 0;
4794 /* On OpenVMS argument is either in Rn or Fn. */
4795 if (TARGET_ABI_OPEN_VMS)
4797 if (FLOAT_MODE_P (arg.mode))
4798 return gen_rtx_REG (arg.mode, FR_ARG_FIRST + cum->words);
4799 else
4800 return gen_rtx_REG (arg.mode, basereg + cum->words);
4803 /* Check for and handle homogeneous FP aggregates. */
4804 if (arg.type)
4805 hfa_mode = hfa_element_mode (arg.type, 0);
4807 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
4808 and unprototyped hfas are passed specially. */
4809 if (hfa_mode != VOIDmode && (! cum->prototype || arg.named))
4811 rtx loc[16];
4812 int i = 0;
4813 int fp_regs = cum->fp_regs;
4814 int int_regs = cum->words + offset;
4815 int hfa_size = GET_MODE_SIZE (hfa_mode);
4816 int byte_size;
4817 int args_byte_size;
4819 /* If prototyped, pass it in FR regs then GR regs.
4820 If not prototyped, pass it in both FR and GR regs.
4822 If this is an SFmode aggregate, then it is possible to run out of
4823 FR regs while GR regs are still left. In that case, we pass the
4824 remaining part in the GR regs. */
4826 /* Fill the FP regs. We do this always. We stop if we reach the end
4827 of the argument, the last FP register, or the last argument slot. */
4829 byte_size = arg.promoted_size_in_bytes ();
4830 args_byte_size = int_regs * UNITS_PER_WORD;
4831 offset = 0;
4832 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4833 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4835 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4836 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4837 + fp_regs)),
4838 GEN_INT (offset));
4839 offset += hfa_size;
4840 args_byte_size += hfa_size;
4841 fp_regs++;
4844 /* If no prototype, then the whole thing must go in GR regs. */
4845 if (! cum->prototype)
4846 offset = 0;
4847 /* If this is an SFmode aggregate, then we might have some left over
4848 that needs to go in GR regs. */
4849 else if (byte_size != offset)
4850 int_regs += offset / UNITS_PER_WORD;
4852 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */
4854 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4856 machine_mode gr_mode = DImode;
4857 unsigned int gr_size;
4859 /* If we have an odd 4 byte hunk because we ran out of FR regs,
4860 then this goes in a GR reg left adjusted/little endian, right
4861 adjusted/big endian. */
4862 /* ??? Currently this is handled wrong, because 4-byte hunks are
4863 always right adjusted/little endian. */
4864 if (offset & 0x4)
4865 gr_mode = SImode;
4866 /* If we have an even 4 byte hunk because the aggregate is a
4867 multiple of 4 bytes in size, then this goes in a GR reg right
4868 adjusted/little endian. */
4869 else if (byte_size - offset == 4)
4870 gr_mode = SImode;
4872 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4873 gen_rtx_REG (gr_mode, (basereg
4874 + int_regs)),
4875 GEN_INT (offset));
4877 gr_size = GET_MODE_SIZE (gr_mode);
4878 offset += gr_size;
4879 if (gr_size == UNITS_PER_WORD
4880 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4881 int_regs++;
4882 else if (gr_size > UNITS_PER_WORD)
4883 int_regs += gr_size / UNITS_PER_WORD;
4885 return gen_rtx_PARALLEL (arg.mode, gen_rtvec_v (i, loc));
4888 /* Integral and aggregates go in general registers. If we have run out of
4889 FR registers, then FP values must also go in general registers. This can
4890 happen when we have a SFmode HFA. */
4891 else if (arg.mode == TFmode || arg.mode == TCmode
4892 || !FLOAT_MODE_P (arg.mode)
4893 || cum->fp_regs == MAX_ARGUMENT_SLOTS)
4895 int byte_size = arg.promoted_size_in_bytes ();
4896 if (BYTES_BIG_ENDIAN
4897 && (arg.mode == BLKmode || arg.aggregate_type_p ())
4898 && byte_size < UNITS_PER_WORD
4899 && byte_size > 0)
4901 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4902 gen_rtx_REG (DImode,
4903 (basereg + cum->words
4904 + offset)),
4905 const0_rtx);
4906 return gen_rtx_PARALLEL (arg.mode, gen_rtvec (1, gr_reg));
4908 else
4909 return gen_rtx_REG (arg.mode, basereg + cum->words + offset);
4913 /* If there is a prototype, then FP values go in a FR register when
4914 named, and in a GR register when unnamed. */
4915 else if (cum->prototype)
4917 if (arg.named)
4918 return gen_rtx_REG (arg.mode, FR_ARG_FIRST + cum->fp_regs);
4919 /* In big-endian mode, an anonymous SFmode value must be represented
4920 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4921 the value into the high half of the general register. */
4922 else if (BYTES_BIG_ENDIAN && arg.mode == SFmode)
4923 return gen_rtx_PARALLEL (arg.mode,
4924 gen_rtvec (1,
4925 gen_rtx_EXPR_LIST (VOIDmode,
4926 gen_rtx_REG (DImode, basereg + cum->words + offset),
4927 const0_rtx)));
4928 else
4929 return gen_rtx_REG (arg.mode, basereg + cum->words + offset);
4931 /* If there is no prototype, then FP values go in both FR and GR
4932 registers. */
4933 else
4935 /* See comment above. */
4936 machine_mode inner_mode =
4937 (BYTES_BIG_ENDIAN && arg.mode == SFmode) ? DImode : arg.mode;
4939 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4940 gen_rtx_REG (arg.mode, (FR_ARG_FIRST
4941 + cum->fp_regs)),
4942 const0_rtx);
4943 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4944 gen_rtx_REG (inner_mode,
4945 (basereg + cum->words
4946 + offset)),
4947 const0_rtx);
4949 return gen_rtx_PARALLEL (arg.mode, gen_rtvec (2, fp_reg, gr_reg));
4953 /* Implement TARGET_FUNCION_ARG target hook. */
4955 static rtx
4956 ia64_function_arg (cumulative_args_t cum, const function_arg_info &arg)
4958 return ia64_function_arg_1 (cum, arg, false);
4961 /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */
4963 static rtx
4964 ia64_function_incoming_arg (cumulative_args_t cum,
4965 const function_arg_info &arg)
4967 return ia64_function_arg_1 (cum, arg, true);
4970 /* Return number of bytes, at the beginning of the argument, that must be
4971 put in registers. 0 is the argument is entirely in registers or entirely
4972 in memory. */
4974 static int
4975 ia64_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
4977 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4979 int words = ia64_function_arg_words (arg.type, arg.mode);
4980 int offset = ia64_function_arg_offset (cum, arg.type, words);
4982 /* If all argument slots are used, then it must go on the stack. */
4983 if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4984 return 0;
4986 /* It doesn't matter whether the argument goes in FR or GR regs. If
4987 it fits within the 8 argument slots, then it goes entirely in
4988 registers. If it extends past the last argument slot, then the rest
4989 goes on the stack. */
4991 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4992 return 0;
4994 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4997 /* Return ivms_arg_type based on machine_mode. */
4999 static enum ivms_arg_type
5000 ia64_arg_type (machine_mode mode)
5002 switch (mode)
5004 case E_SFmode:
5005 return FS;
5006 case E_DFmode:
5007 return FT;
5008 default:
5009 return I64;
5013 /* Update CUM to point after this argument. This is patterned after
5014 ia64_function_arg. */
5016 static void
5017 ia64_function_arg_advance (cumulative_args_t cum_v,
5018 const function_arg_info &arg)
5020 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5021 int words = ia64_function_arg_words (arg.type, arg.mode);
5022 int offset = ia64_function_arg_offset (cum, arg.type, words);
5023 machine_mode hfa_mode = VOIDmode;
5025 /* If all arg slots are already full, then there is nothing to do. */
5026 if (cum->words >= MAX_ARGUMENT_SLOTS)
5028 cum->words += words + offset;
5029 return;
5032 cum->atypes[cum->words] = ia64_arg_type (arg.mode);
5033 cum->words += words + offset;
5035 /* On OpenVMS argument is either in Rn or Fn. */
5036 if (TARGET_ABI_OPEN_VMS)
5038 cum->int_regs = cum->words;
5039 cum->fp_regs = cum->words;
5040 return;
5043 /* Check for and handle homogeneous FP aggregates. */
5044 if (arg.type)
5045 hfa_mode = hfa_element_mode (arg.type, 0);
5047 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas
5048 and unprototyped hfas are passed specially. */
5049 if (hfa_mode != VOIDmode && (! cum->prototype || arg.named))
5051 int fp_regs = cum->fp_regs;
5052 /* This is the original value of cum->words + offset. */
5053 int int_regs = cum->words - words;
5054 int hfa_size = GET_MODE_SIZE (hfa_mode);
5055 int byte_size;
5056 int args_byte_size;
5058 /* If prototyped, pass it in FR regs then GR regs.
5059 If not prototyped, pass it in both FR and GR regs.
5061 If this is an SFmode aggregate, then it is possible to run out of
5062 FR regs while GR regs are still left. In that case, we pass the
5063 remaining part in the GR regs. */
5065 /* Fill the FP regs. We do this always. We stop if we reach the end
5066 of the argument, the last FP register, or the last argument slot. */
5068 byte_size = arg.promoted_size_in_bytes ();
5069 args_byte_size = int_regs * UNITS_PER_WORD;
5070 offset = 0;
5071 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
5072 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
5074 offset += hfa_size;
5075 args_byte_size += hfa_size;
5076 fp_regs++;
5079 cum->fp_regs = fp_regs;
5082 /* Integral and aggregates go in general registers. So do TFmode FP values.
5083 If we have run out of FR registers, then other FP values must also go in
5084 general registers. This can happen when we have a SFmode HFA. */
5085 else if (arg.mode == TFmode || arg.mode == TCmode
5086 || !FLOAT_MODE_P (arg.mode)
5087 || cum->fp_regs == MAX_ARGUMENT_SLOTS)
5088 cum->int_regs = cum->words;
5090 /* If there is a prototype, then FP values go in a FR register when
5091 named, and in a GR register when unnamed. */
5092 else if (cum->prototype)
5094 if (! arg.named)
5095 cum->int_regs = cum->words;
5096 else
5097 /* ??? Complex types should not reach here. */
5098 cum->fp_regs
5099 += (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5101 /* If there is no prototype, then FP values go in both FR and GR
5102 registers. */
5103 else
5105 /* ??? Complex types should not reach here. */
5106 cum->fp_regs
5107 += (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5108 cum->int_regs = cum->words;
5112 /* Arguments with alignment larger than 8 bytes start at the next even
5113 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary
5114 even though their normal alignment is 8 bytes. See ia64_function_arg. */
5116 static unsigned int
5117 ia64_function_arg_boundary (machine_mode mode, const_tree type)
5119 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5120 return PARM_BOUNDARY * 2;
5122 if (type)
5124 if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5125 return PARM_BOUNDARY * 2;
5126 else
5127 return PARM_BOUNDARY;
5130 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5131 return PARM_BOUNDARY * 2;
5132 else
5133 return PARM_BOUNDARY;
5136 /* True if it is OK to do sibling call optimization for the specified
5137 call expression EXP. DECL will be the called function, or NULL if
5138 this is an indirect call. */
5139 static bool
5140 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5142 /* We can't perform a sibcall if the current function has the syscall_linkage
5143 attribute. */
5144 if (lookup_attribute ("syscall_linkage",
5145 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5146 return false;
5148 /* We must always return with our current GP. This means we can
5149 only sibcall to functions defined in the current module unless
5150 TARGET_CONST_GP is set to true. */
5151 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
5155 /* Implement va_arg. */
5157 static tree
5158 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5159 gimple_seq *post_p)
5161 /* Variable sized types are passed by reference. */
5162 if (pass_va_arg_by_reference (type))
5164 tree ptrtype = build_pointer_type (type);
5165 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
5166 return build_va_arg_indirect_ref (addr);
5169 /* Aggregate arguments with alignment larger than 8 bytes start at
5170 the next even boundary. Integer and floating point arguments
5171 do so if they are larger than 8 bytes, whether or not they are
5172 also aligned larger than 8 bytes. */
5173 if ((SCALAR_FLOAT_TYPE_P (type) || TREE_CODE (type) == INTEGER_TYPE)
5174 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5176 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
5177 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5178 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
5179 gimplify_assign (unshare_expr (valist), t, pre_p);
5182 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5185 /* Return 1 if function return value returned in memory. Return 0 if it is
5186 in a register. */
5188 static bool
5189 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
5191 machine_mode mode;
5192 machine_mode hfa_mode;
5193 HOST_WIDE_INT byte_size;
5195 mode = TYPE_MODE (valtype);
5196 byte_size = GET_MODE_SIZE (mode);
5197 if (mode == BLKmode)
5199 byte_size = int_size_in_bytes (valtype);
5200 if (byte_size < 0)
5201 return true;
5204 /* Hfa's with up to 8 elements are returned in the FP argument registers. */
5206 hfa_mode = hfa_element_mode (valtype, 0);
5207 if (hfa_mode != VOIDmode)
5209 int hfa_size = GET_MODE_SIZE (hfa_mode);
5211 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
5212 return true;
5213 else
5214 return false;
5216 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
5217 return true;
5218 else
5219 return false;
5222 /* Return rtx for register that holds the function return value. */
5224 static rtx
5225 ia64_function_value (const_tree valtype,
5226 const_tree fn_decl_or_type,
5227 bool outgoing ATTRIBUTE_UNUSED)
5229 machine_mode mode;
5230 machine_mode hfa_mode;
5231 int unsignedp;
5232 const_tree func = fn_decl_or_type;
5234 if (fn_decl_or_type
5235 && !DECL_P (fn_decl_or_type))
5236 func = NULL;
5238 mode = TYPE_MODE (valtype);
5239 hfa_mode = hfa_element_mode (valtype, 0);
5241 if (hfa_mode != VOIDmode)
5243 rtx loc[8];
5244 int i;
5245 int hfa_size;
5246 int byte_size;
5247 int offset;
5249 hfa_size = GET_MODE_SIZE (hfa_mode);
5250 byte_size = ((mode == BLKmode)
5251 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5252 offset = 0;
5253 for (i = 0; offset < byte_size; i++)
5255 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5256 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5257 GEN_INT (offset));
5258 offset += hfa_size;
5260 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5262 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
5263 return gen_rtx_REG (mode, FR_ARG_FIRST);
5264 else
5266 bool need_parallel = false;
5268 /* In big-endian mode, we need to manage the layout of aggregates
5269 in the registers so that we get the bits properly aligned in
5270 the highpart of the registers. */
5271 if (BYTES_BIG_ENDIAN
5272 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
5273 need_parallel = true;
5275 /* Something like struct S { long double x; char a[0] } is not an
5276 HFA structure, and therefore doesn't go in fp registers. But
5277 the middle-end will give it XFmode anyway, and XFmode values
5278 don't normally fit in integer registers. So we need to smuggle
5279 the value inside a parallel. */
5280 else if (mode == XFmode || mode == XCmode || mode == RFmode)
5281 need_parallel = true;
5283 if (need_parallel)
5285 rtx loc[8];
5286 int offset;
5287 int bytesize;
5288 int i;
5290 offset = 0;
5291 bytesize = int_size_in_bytes (valtype);
5292 /* An empty PARALLEL is invalid here, but the return value
5293 doesn't matter for empty structs. */
5294 if (bytesize == 0)
5295 return gen_rtx_REG (mode, GR_RET_FIRST);
5296 for (i = 0; offset < bytesize; i++)
5298 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5299 gen_rtx_REG (DImode,
5300 GR_RET_FIRST + i),
5301 GEN_INT (offset));
5302 offset += UNITS_PER_WORD;
5304 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5307 mode = promote_function_mode (valtype, mode, &unsignedp,
5308 func ? TREE_TYPE (func) : NULL_TREE,
5309 true);
5311 return gen_rtx_REG (mode, GR_RET_FIRST);
5315 /* Worker function for TARGET_LIBCALL_VALUE. */
5317 static rtx
5318 ia64_libcall_value (machine_mode mode,
5319 const_rtx fun ATTRIBUTE_UNUSED)
5321 return gen_rtx_REG (mode,
5322 (((GET_MODE_CLASS (mode) == MODE_FLOAT
5323 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5324 && (mode) != TFmode)
5325 ? FR_RET_FIRST : GR_RET_FIRST));
5328 /* Worker function for FUNCTION_VALUE_REGNO_P. */
5330 static bool
5331 ia64_function_value_regno_p (const unsigned int regno)
5333 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5334 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5337 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5338 We need to emit DTP-relative relocations. */
5340 static void
5341 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5343 gcc_assert (size == 4 || size == 8);
5344 if (size == 4)
5345 fputs ("\tdata4.ua\t@dtprel(", file);
5346 else
5347 fputs ("\tdata8.ua\t@dtprel(", file);
5348 output_addr_const (file, x);
5349 fputs (")", file);
5352 /* Print a memory address as an operand to reference that memory location. */
5354 /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps
5355 also call this from ia64_print_operand for memory addresses. */
5357 static void
5358 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5359 machine_mode /*mode*/,
5360 rtx address ATTRIBUTE_UNUSED)
5364 /* Print an operand to an assembler instruction.
5365 C Swap and print a comparison operator.
5366 D Print an FP comparison operator.
5367 E Print 32 - constant, for SImode shifts as extract.
5368 e Print 64 - constant, for DImode rotates.
5369 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5370 a floating point register emitted normally.
5371 G A floating point constant.
5372 I Invert a predicate register by adding 1.
5373 J Select the proper predicate register for a condition.
5374 j Select the inverse predicate register for a condition.
5375 O Append .acq for volatile load.
5376 P Postincrement of a MEM.
5377 Q Append .rel for volatile store.
5378 R Print .s .d or nothing for a single, double or no truncation.
5379 S Shift amount for shladd instruction.
5380 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5381 for Intel assembler.
5382 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5383 for Intel assembler.
5384 X A pair of floating point registers.
5385 r Print register name, or constant 0 as r0. HP compatibility for
5386 Linux kernel.
5387 v Print vector constant value as an 8-byte integer value. */
5389 static void
5390 ia64_print_operand (FILE * file, rtx x, int code)
5392 const char *str;
5394 switch (code)
5396 case 0:
5397 /* Handled below. */
5398 break;
5400 case 'C':
5402 enum rtx_code c = swap_condition (GET_CODE (x));
5403 fputs (GET_RTX_NAME (c), file);
5404 return;
5407 case 'D':
5408 switch (GET_CODE (x))
5410 case NE:
5411 str = "neq";
5412 break;
5413 case UNORDERED:
5414 str = "unord";
5415 break;
5416 case ORDERED:
5417 str = "ord";
5418 break;
5419 case UNLT:
5420 str = "nge";
5421 break;
5422 case UNLE:
5423 str = "ngt";
5424 break;
5425 case UNGT:
5426 str = "nle";
5427 break;
5428 case UNGE:
5429 str = "nlt";
5430 break;
5431 case UNEQ:
5432 case LTGT:
5433 gcc_unreachable ();
5434 default:
5435 str = GET_RTX_NAME (GET_CODE (x));
5436 break;
5438 fputs (str, file);
5439 return;
5441 case 'E':
5442 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5443 return;
5445 case 'e':
5446 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5447 return;
5449 case 'F':
5450 if (x == CONST0_RTX (GET_MODE (x)))
5451 str = reg_names [FR_REG (0)];
5452 else if (x == CONST1_RTX (GET_MODE (x)))
5453 str = reg_names [FR_REG (1)];
5454 else
5456 gcc_assert (GET_CODE (x) == REG);
5457 str = reg_names [REGNO (x)];
5459 fputs (str, file);
5460 return;
5462 case 'G':
5464 long val[4];
5465 real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
5466 if (GET_MODE (x) == SFmode)
5467 fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5468 else if (GET_MODE (x) == DFmode)
5469 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5470 & 0xffffffff,
5471 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5472 & 0xffffffff);
5473 else
5474 output_operand_lossage ("invalid %%G mode");
5476 return;
5478 case 'I':
5479 fputs (reg_names [REGNO (x) + 1], file);
5480 return;
5482 case 'J':
5483 case 'j':
5485 unsigned int regno = REGNO (XEXP (x, 0));
5486 if (GET_CODE (x) == EQ)
5487 regno += 1;
5488 if (code == 'j')
5489 regno ^= 1;
5490 fputs (reg_names [regno], file);
5492 return;
5494 case 'O':
5495 if (MEM_VOLATILE_P (x))
5496 fputs(".acq", file);
5497 return;
5499 case 'P':
5501 HOST_WIDE_INT value;
5503 switch (GET_CODE (XEXP (x, 0)))
5505 default:
5506 return;
5508 case POST_MODIFY:
5509 x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5510 if (GET_CODE (x) == CONST_INT)
5511 value = INTVAL (x);
5512 else
5514 gcc_assert (GET_CODE (x) == REG);
5515 fprintf (file, ", %s", reg_names[REGNO (x)]);
5516 return;
5518 break;
5520 case POST_INC:
5521 value = GET_MODE_SIZE (GET_MODE (x));
5522 break;
5524 case POST_DEC:
5525 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5526 break;
5529 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5530 return;
5533 case 'Q':
5534 if (MEM_VOLATILE_P (x))
5535 fputs(".rel", file);
5536 return;
5538 case 'R':
5539 if (x == CONST0_RTX (GET_MODE (x)))
5540 fputs(".s", file);
5541 else if (x == CONST1_RTX (GET_MODE (x)))
5542 fputs(".d", file);
5543 else if (x == CONST2_RTX (GET_MODE (x)))
5545 else
5546 output_operand_lossage ("invalid %%R value");
5547 return;
5549 case 'S':
5550 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5551 return;
5553 case 'T':
5554 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5556 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5557 return;
5559 break;
5561 case 'U':
5562 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5564 const char *prefix = "0x";
5565 if (INTVAL (x) & 0x80000000)
5567 fprintf (file, "0xffffffff");
5568 prefix = "";
5570 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5571 return;
5573 break;
5575 case 'X':
5577 unsigned int regno = REGNO (x);
5578 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5580 return;
5582 case 'r':
5583 /* If this operand is the constant zero, write it as register zero.
5584 Any register, zero, or CONST_INT value is OK here. */
5585 if (GET_CODE (x) == REG)
5586 fputs (reg_names[REGNO (x)], file);
5587 else if (x == CONST0_RTX (GET_MODE (x)))
5588 fputs ("r0", file);
5589 else if (GET_CODE (x) == CONST_INT)
5590 output_addr_const (file, x);
5591 else
5592 output_operand_lossage ("invalid %%r value");
5593 return;
5595 case 'v':
5596 gcc_assert (GET_CODE (x) == CONST_VECTOR);
5597 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5598 break;
5600 case '+':
5602 const char *which;
5604 /* For conditional branches, returns or calls, substitute
5605 sptk, dptk, dpnt, or spnt for %s. */
5606 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5607 if (x)
5609 int pred_val = profile_probability::from_reg_br_prob_note
5610 (XINT (x, 0)).to_reg_br_prob_base ();
5612 /* Guess top and bottom 10% statically predicted. */
5613 if (pred_val < REG_BR_PROB_BASE / 50
5614 && br_prob_note_reliable_p (x))
5615 which = ".spnt";
5616 else if (pred_val < REG_BR_PROB_BASE / 2)
5617 which = ".dpnt";
5618 else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5619 || !br_prob_note_reliable_p (x))
5620 which = ".dptk";
5621 else
5622 which = ".sptk";
5624 else if (CALL_P (current_output_insn))
5625 which = ".sptk";
5626 else
5627 which = ".dptk";
5629 fputs (which, file);
5630 return;
5633 case ',':
5634 x = current_insn_predicate;
5635 if (x)
5637 unsigned int regno = REGNO (XEXP (x, 0));
5638 if (GET_CODE (x) == EQ)
5639 regno += 1;
5640 fprintf (file, "(%s) ", reg_names [regno]);
5642 return;
5644 default:
5645 output_operand_lossage ("ia64_print_operand: unknown code");
5646 return;
5649 switch (GET_CODE (x))
5651 /* This happens for the spill/restore instructions. */
5652 case POST_INC:
5653 case POST_DEC:
5654 case POST_MODIFY:
5655 x = XEXP (x, 0);
5656 /* fall through */
5658 case REG:
5659 fputs (reg_names [REGNO (x)], file);
5660 break;
5662 case MEM:
5664 rtx addr = XEXP (x, 0);
5665 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5666 addr = XEXP (addr, 0);
5667 fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5668 break;
5671 default:
5672 output_addr_const (file, x);
5673 break;
5676 return;
5679 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5681 static bool
5682 ia64_print_operand_punct_valid_p (unsigned char code)
5684 return (code == '+' || code == ',');
5687 /* Compute a (partial) cost for rtx X. Return true if the complete
5688 cost has been computed, and false if subexpressions should be
5689 scanned. In either case, *TOTAL contains the cost result. */
5690 /* ??? This is incomplete. */
5692 static bool
5693 ia64_rtx_costs (rtx x, machine_mode mode, int outer_code,
5694 int opno ATTRIBUTE_UNUSED,
5695 int *total, bool speed ATTRIBUTE_UNUSED)
5697 int code = GET_CODE (x);
5699 switch (code)
5701 case CONST_INT:
5702 switch (outer_code)
5704 case SET:
5705 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5706 return true;
5707 case PLUS:
5708 if (satisfies_constraint_I (x))
5709 *total = 0;
5710 else if (satisfies_constraint_J (x))
5711 *total = 1;
5712 else
5713 *total = COSTS_N_INSNS (1);
5714 return true;
5715 default:
5716 if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5717 *total = 0;
5718 else
5719 *total = COSTS_N_INSNS (1);
5720 return true;
5723 case CONST_DOUBLE:
5724 *total = COSTS_N_INSNS (1);
5725 return true;
5727 case CONST:
5728 case SYMBOL_REF:
5729 case LABEL_REF:
5730 *total = COSTS_N_INSNS (3);
5731 return true;
5733 case FMA:
5734 *total = COSTS_N_INSNS (4);
5735 return true;
5737 case MULT:
5738 /* For multiplies wider than HImode, we have to go to the FPU,
5739 which normally involves copies. Plus there's the latency
5740 of the multiply itself, and the latency of the instructions to
5741 transfer integer regs to FP regs. */
5742 if (FLOAT_MODE_P (mode))
5743 *total = COSTS_N_INSNS (4);
5744 else if (GET_MODE_SIZE (mode) > 2)
5745 *total = COSTS_N_INSNS (10);
5746 else
5747 *total = COSTS_N_INSNS (2);
5748 return true;
5750 case PLUS:
5751 case MINUS:
5752 if (FLOAT_MODE_P (mode))
5754 *total = COSTS_N_INSNS (4);
5755 return true;
5757 /* FALLTHRU */
5759 case ASHIFT:
5760 case ASHIFTRT:
5761 case LSHIFTRT:
5762 *total = COSTS_N_INSNS (1);
5763 return true;
5765 case DIV:
5766 case UDIV:
5767 case MOD:
5768 case UMOD:
5769 /* We make divide expensive, so that divide-by-constant will be
5770 optimized to a multiply. */
5771 *total = COSTS_N_INSNS (60);
5772 return true;
5774 default:
5775 return false;
5779 /* Calculate the cost of moving data from a register in class FROM to
5780 one in class TO, using MODE. */
5782 static int
5783 ia64_register_move_cost (machine_mode mode, reg_class_t from,
5784 reg_class_t to)
5786 /* ADDL_REGS is the same as GR_REGS for movement purposes. */
5787 if (to == ADDL_REGS)
5788 to = GR_REGS;
5789 if (from == ADDL_REGS)
5790 from = GR_REGS;
5792 /* All costs are symmetric, so reduce cases by putting the
5793 lower number class as the destination. */
5794 if (from < to)
5796 reg_class_t tmp = to;
5797 to = from, from = tmp;
5800 /* Moving from FR<->GR in XFmode must be more expensive than 2,
5801 so that we get secondary memory reloads. Between FR_REGS,
5802 we have to make this at least as expensive as memory_move_cost
5803 to avoid spectacularly poor register class preferencing. */
5804 if (mode == XFmode || mode == RFmode)
5806 if (to != GR_REGS || from != GR_REGS)
5807 return memory_move_cost (mode, to, false);
5808 else
5809 return 3;
5812 switch (to)
5814 case PR_REGS:
5815 /* Moving between PR registers takes two insns. */
5816 if (from == PR_REGS)
5817 return 3;
5818 /* Moving between PR and anything but GR is impossible. */
5819 if (from != GR_REGS)
5820 return memory_move_cost (mode, to, false);
5821 break;
5823 case BR_REGS:
5824 /* Moving between BR and anything but GR is impossible. */
5825 if (from != GR_REGS && from != GR_AND_BR_REGS)
5826 return memory_move_cost (mode, to, false);
5827 break;
5829 case AR_I_REGS:
5830 case AR_M_REGS:
5831 /* Moving between AR and anything but GR is impossible. */
5832 if (from != GR_REGS)
5833 return memory_move_cost (mode, to, false);
5834 break;
5836 case GR_REGS:
5837 case FR_REGS:
5838 case FP_REGS:
5839 case GR_AND_FR_REGS:
5840 case GR_AND_BR_REGS:
5841 case ALL_REGS:
5842 break;
5844 default:
5845 gcc_unreachable ();
5848 return 2;
5851 /* Calculate the cost of moving data of MODE from a register to or from
5852 memory. */
5854 static int
5855 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
5856 reg_class_t rclass,
5857 bool in ATTRIBUTE_UNUSED)
5859 if (rclass == GENERAL_REGS
5860 || rclass == FR_REGS
5861 || rclass == FP_REGS
5862 || rclass == GR_AND_FR_REGS)
5863 return 4;
5864 else
5865 return 10;
5868 /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions
5869 on RCLASS to use when copying X into that class. */
5871 static reg_class_t
5872 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5874 switch (rclass)
5876 case FR_REGS:
5877 case FP_REGS:
5878 /* Don't allow volatile mem reloads into floating point registers.
5879 This is defined to force reload to choose the r/m case instead
5880 of the f/f case when reloading (set (reg fX) (mem/v)). */
5881 if (MEM_P (x) && MEM_VOLATILE_P (x))
5882 return NO_REGS;
5884 /* Force all unrecognized constants into the constant pool. */
5885 if (CONSTANT_P (x))
5886 return NO_REGS;
5887 break;
5889 case AR_M_REGS:
5890 case AR_I_REGS:
5891 if (!OBJECT_P (x))
5892 return NO_REGS;
5893 break;
5895 default:
5896 break;
5899 return rclass;
5902 /* This function returns the register class required for a secondary
5903 register when copying between one of the registers in RCLASS, and X,
5904 using MODE. A return value of NO_REGS means that no secondary register
5905 is required. */
5907 enum reg_class
5908 ia64_secondary_reload_class (enum reg_class rclass,
5909 machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5911 int regno = -1;
5913 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5914 regno = true_regnum (x);
5916 switch (rclass)
5918 case BR_REGS:
5919 case AR_M_REGS:
5920 case AR_I_REGS:
5921 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5922 interaction. We end up with two pseudos with overlapping lifetimes
5923 both of which are equiv to the same constant, and both which need
5924 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end
5925 changes depending on the path length, which means the qty_first_reg
5926 check in make_regs_eqv can give different answers at different times.
5927 At some point I'll probably need a reload_indi pattern to handle
5928 this.
5930 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5931 wound up with a FP register from GR_AND_FR_REGS. Extend that to all
5932 non-general registers for good measure. */
5933 if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5934 return GR_REGS;
5936 /* This is needed if a pseudo used as a call_operand gets spilled to a
5937 stack slot. */
5938 if (GET_CODE (x) == MEM)
5939 return GR_REGS;
5940 break;
5942 case FR_REGS:
5943 case FP_REGS:
5944 /* Need to go through general registers to get to other class regs. */
5945 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5946 return GR_REGS;
5948 /* This can happen when a paradoxical subreg is an operand to the
5949 muldi3 pattern. */
5950 /* ??? This shouldn't be necessary after instruction scheduling is
5951 enabled, because paradoxical subregs are not accepted by
5952 register_operand when INSN_SCHEDULING is defined. Or alternatively,
5953 stop the paradoxical subreg stupidity in the *_operand functions
5954 in recog.cc. */
5955 if (GET_CODE (x) == MEM
5956 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5957 || GET_MODE (x) == QImode))
5958 return GR_REGS;
5960 /* This can happen because of the ior/and/etc patterns that accept FP
5961 registers as operands. If the third operand is a constant, then it
5962 needs to be reloaded into a FP register. */
5963 if (GET_CODE (x) == CONST_INT)
5964 return GR_REGS;
5966 /* This can happen because of register elimination in a muldi3 insn.
5967 E.g. `26107 * (unsigned long)&u'. */
5968 if (GET_CODE (x) == PLUS)
5969 return GR_REGS;
5970 break;
5972 case PR_REGS:
5973 /* ??? This happens if we cse/gcse a BImode value across a call,
5974 and the function has a nonlocal goto. This is because global
5975 does not allocate call crossing pseudos to hard registers when
5976 crtl->has_nonlocal_goto is true. This is relatively
5977 common for C++ programs that use exceptions. To reproduce,
5978 return NO_REGS and compile libstdc++. */
5979 if (GET_CODE (x) == MEM)
5980 return GR_REGS;
5982 /* This can happen when we take a BImode subreg of a DImode value,
5983 and that DImode value winds up in some non-GR register. */
5984 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5985 return GR_REGS;
5986 break;
5988 default:
5989 break;
5992 return NO_REGS;
5996 /* Implement targetm.unspec_may_trap_p hook. */
5997 static int
5998 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
6000 switch (XINT (x, 1))
6002 case UNSPEC_LDA:
6003 case UNSPEC_LDS:
6004 case UNSPEC_LDSA:
6005 case UNSPEC_LDCCLR:
6006 case UNSPEC_CHKACLR:
6007 case UNSPEC_CHKS:
6008 /* These unspecs are just wrappers. */
6009 return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
6012 return default_unspec_may_trap_p (x, flags);
6016 /* Parse the -mfixed-range= option string. */
6018 static void
6019 fix_range (const char *const_str)
6021 int i, first, last;
6022 char *str, *dash, *comma;
6024 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
6025 REG2 are either register names or register numbers. The effect
6026 of this option is to mark the registers in the range from REG1 to
6027 REG2 as ``fixed'' so they won't be used by the compiler. This is
6028 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */
6030 i = strlen (const_str);
6031 str = (char *) alloca (i + 1);
6032 memcpy (str, const_str, i + 1);
6034 while (1)
6036 dash = strchr (str, '-');
6037 if (!dash)
6039 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
6040 return;
6042 *dash = '\0';
6044 comma = strchr (dash + 1, ',');
6045 if (comma)
6046 *comma = '\0';
6048 first = decode_reg_name (str);
6049 if (first < 0)
6051 warning (0, "unknown register name: %s", str);
6052 return;
6055 last = decode_reg_name (dash + 1);
6056 if (last < 0)
6058 warning (0, "unknown register name: %s", dash + 1);
6059 return;
6062 *dash = '-';
6064 if (first > last)
6066 warning (0, "%s-%s is an empty range", str, dash + 1);
6067 return;
6070 for (i = first; i <= last; ++i)
6071 fixed_regs[i] = 1;
6073 if (!comma)
6074 break;
6076 *comma = ',';
6077 str = comma + 1;
6081 /* Implement TARGET_OPTION_OVERRIDE. */
6083 static void
6084 ia64_option_override (void)
6086 unsigned int i;
6087 cl_deferred_option *opt;
6088 vec<cl_deferred_option> *v
6089 = (vec<cl_deferred_option> *) ia64_deferred_options;
6091 if (v)
6092 FOR_EACH_VEC_ELT (*v, i, opt)
6094 switch (opt->opt_index)
6096 case OPT_mfixed_range_:
6097 fix_range (opt->arg);
6098 break;
6100 default:
6101 gcc_unreachable ();
6105 if (TARGET_AUTO_PIC)
6106 target_flags |= MASK_CONST_GP;
6108 /* Numerous experiment shows that IRA based loop pressure
6109 calculation works better for RTL loop invariant motion on targets
6110 with enough (>= 32) registers. It is an expensive optimization.
6111 So it is on only for peak performance. */
6112 if (optimize >= 3)
6113 flag_ira_loop_pressure = 1;
6116 ia64_section_threshold = (OPTION_SET_P (g_switch_value)
6117 ? g_switch_value
6118 : IA64_DEFAULT_GVALUE);
6120 init_machine_status = ia64_init_machine_status;
6122 if (flag_align_functions && !str_align_functions)
6123 str_align_functions = "64";
6124 if (flag_align_loops && !str_align_loops)
6125 str_align_loops = "32";
6126 if (TARGET_ABI_OPEN_VMS)
6127 flag_no_common = 1;
6129 ia64_override_options_after_change();
6132 /* Implement targetm.override_options_after_change. */
6134 static void
6135 ia64_override_options_after_change (void)
6137 if (optimize >= 3
6138 && !OPTION_SET_P (flag_selective_scheduling)
6139 && !OPTION_SET_P (flag_selective_scheduling2))
6141 flag_selective_scheduling2 = 1;
6142 flag_sel_sched_pipelining = 1;
6144 if (mflag_sched_control_spec == 2)
6146 /* Control speculation is on by default for the selective scheduler,
6147 but not for the Haifa scheduler. */
6148 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6150 if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6152 /* FIXME: remove this when we'd implement breaking autoinsns as
6153 a transformation. */
6154 flag_auto_inc_dec = 0;
6158 /* Initialize the record of emitted frame related registers. */
6160 void ia64_init_expanders (void)
6162 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6165 static struct machine_function *
6166 ia64_init_machine_status (void)
6168 return ggc_cleared_alloc<machine_function> ();
6171 static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6172 static enum attr_type ia64_safe_type (rtx_insn *);
6174 static enum attr_itanium_class
6175 ia64_safe_itanium_class (rtx_insn *insn)
6177 if (recog_memoized (insn) >= 0)
6178 return get_attr_itanium_class (insn);
6179 else if (DEBUG_INSN_P (insn))
6180 return ITANIUM_CLASS_IGNORE;
6181 else
6182 return ITANIUM_CLASS_UNKNOWN;
6185 static enum attr_type
6186 ia64_safe_type (rtx_insn *insn)
6188 if (recog_memoized (insn) >= 0)
6189 return get_attr_type (insn);
6190 else
6191 return TYPE_UNKNOWN;
6194 /* The following collection of routines emit instruction group stop bits as
6195 necessary to avoid dependencies. */
6197 /* Need to track some additional registers as far as serialization is
6198 concerned so we can properly handle br.call and br.ret. We could
6199 make these registers visible to gcc, but since these registers are
6200 never explicitly used in gcc generated code, it seems wasteful to
6201 do so (plus it would make the call and return patterns needlessly
6202 complex). */
6203 #define REG_RP (BR_REG (0))
6204 #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
6205 /* This is used for volatile asms which may require a stop bit immediately
6206 before and after them. */
6207 #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
6208 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
6209 #define NUM_REGS (AR_UNAT_BIT_0 + 64)
6211 /* For each register, we keep track of how it has been written in the
6212 current instruction group.
6214 If a register is written unconditionally (no qualifying predicate),
6215 WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6217 If a register is written if its qualifying predicate P is true, we
6218 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register
6219 may be written again by the complement of P (P^1) and when this happens,
6220 WRITE_COUNT gets set to 2.
6222 The result of this is that whenever an insn attempts to write a register
6223 whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6225 If a predicate register is written by a floating-point insn, we set
6226 WRITTEN_BY_FP to true.
6228 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6229 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */
6231 #if GCC_VERSION >= 4000
6232 #define RWS_FIELD_TYPE __extension__ unsigned short
6233 #else
6234 #define RWS_FIELD_TYPE unsigned int
6235 #endif
6236 struct reg_write_state
6238 RWS_FIELD_TYPE write_count : 2;
6239 RWS_FIELD_TYPE first_pred : 10;
6240 RWS_FIELD_TYPE written_by_fp : 1;
6241 RWS_FIELD_TYPE written_by_and : 1;
6242 RWS_FIELD_TYPE written_by_or : 1;
6245 /* Cumulative info for the current instruction group. */
6246 struct reg_write_state rws_sum[NUM_REGS];
6247 #if CHECKING_P
6248 /* Bitmap whether a register has been written in the current insn. */
6249 unsigned HOST_WIDEST_FAST_INT rws_insn
6250 [(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6251 / HOST_BITS_PER_WIDEST_FAST_INT];
6253 static inline void
6254 rws_insn_set (unsigned int regno)
6256 unsigned int elt = regno / HOST_BITS_PER_WIDEST_FAST_INT;
6257 unsigned int bit = regno % HOST_BITS_PER_WIDEST_FAST_INT;
6258 gcc_assert (!((rws_insn[elt] >> bit) & 1));
6259 rws_insn[elt] |= (unsigned HOST_WIDEST_FAST_INT) 1 << bit;
6262 static inline int
6263 rws_insn_test (unsigned int regno)
6265 unsigned int elt = regno / HOST_BITS_PER_WIDEST_FAST_INT;
6266 unsigned int bit = regno % HOST_BITS_PER_WIDEST_FAST_INT;
6267 return (rws_insn[elt] >> bit) & 1;
6269 #else
6270 /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */
6271 unsigned char rws_insn[2];
6273 static inline void
6274 rws_insn_set (int regno)
6276 if (regno == REG_AR_CFM)
6277 rws_insn[0] = 1;
6278 else if (regno == REG_VOLATILE)
6279 rws_insn[1] = 1;
6282 static inline int
6283 rws_insn_test (int regno)
6285 if (regno == REG_AR_CFM)
6286 return rws_insn[0];
6287 if (regno == REG_VOLATILE)
6288 return rws_insn[1];
6289 return 0;
6291 #endif
6293 /* Indicates whether this is the first instruction after a stop bit,
6294 in which case we don't need another stop bit. Without this,
6295 ia64_variable_issue will die when scheduling an alloc. */
6296 static int first_instruction;
6298 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6299 RTL for one instruction. */
6300 struct reg_flags
6302 unsigned int is_write : 1; /* Is register being written? */
6303 unsigned int is_fp : 1; /* Is register used as part of an fp op? */
6304 unsigned int is_branch : 1; /* Is register used as part of a branch? */
6305 unsigned int is_and : 1; /* Is register used as part of and.orcm? */
6306 unsigned int is_or : 1; /* Is register used as part of or.andcm? */
6307 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */
6310 static void rws_update (int, struct reg_flags, int);
6311 static int rws_access_regno (int, struct reg_flags, int);
6312 static int rws_access_reg (rtx, struct reg_flags, int);
6313 static void update_set_flags (rtx, struct reg_flags *);
6314 static int set_src_needs_barrier (rtx, struct reg_flags, int);
6315 static int rtx_needs_barrier (rtx, struct reg_flags, int);
6316 static void init_insn_group_barriers (void);
6317 static int group_barrier_needed (rtx_insn *);
6318 static int safe_group_barrier_needed (rtx_insn *);
6319 static int in_safe_group_barrier;
6321 /* Update *RWS for REGNO, which is being written by the current instruction,
6322 with predicate PRED, and associated register flags in FLAGS. */
6324 static void
6325 rws_update (int regno, struct reg_flags flags, int pred)
6327 if (pred)
6328 rws_sum[regno].write_count++;
6329 else
6330 rws_sum[regno].write_count = 2;
6331 rws_sum[regno].written_by_fp |= flags.is_fp;
6332 /* ??? Not tracking and/or across differing predicates. */
6333 rws_sum[regno].written_by_and = flags.is_and;
6334 rws_sum[regno].written_by_or = flags.is_or;
6335 rws_sum[regno].first_pred = pred;
6338 /* Handle an access to register REGNO of type FLAGS using predicate register
6339 PRED. Update rws_sum array. Return 1 if this access creates
6340 a dependency with an earlier instruction in the same group. */
6342 static int
6343 rws_access_regno (int regno, struct reg_flags flags, int pred)
6345 int need_barrier = 0;
6347 gcc_assert (regno < NUM_REGS);
6349 if (! PR_REGNO_P (regno))
6350 flags.is_and = flags.is_or = 0;
6352 if (flags.is_write)
6354 int write_count;
6356 rws_insn_set (regno);
6357 write_count = rws_sum[regno].write_count;
6359 switch (write_count)
6361 case 0:
6362 /* The register has not been written yet. */
6363 if (!in_safe_group_barrier)
6364 rws_update (regno, flags, pred);
6365 break;
6367 case 1:
6368 /* The register has been written via a predicate. Treat
6369 it like a unconditional write and do not try to check
6370 for complementary pred reg in earlier write. */
6371 if (flags.is_and && rws_sum[regno].written_by_and)
6373 else if (flags.is_or && rws_sum[regno].written_by_or)
6375 else
6376 need_barrier = 1;
6377 if (!in_safe_group_barrier)
6378 rws_update (regno, flags, pred);
6379 break;
6381 case 2:
6382 /* The register has been unconditionally written already. We
6383 need a barrier. */
6384 if (flags.is_and && rws_sum[regno].written_by_and)
6386 else if (flags.is_or && rws_sum[regno].written_by_or)
6388 else
6389 need_barrier = 1;
6390 if (!in_safe_group_barrier)
6392 rws_sum[regno].written_by_and = flags.is_and;
6393 rws_sum[regno].written_by_or = flags.is_or;
6395 break;
6397 default:
6398 gcc_unreachable ();
6401 else
6403 if (flags.is_branch)
6405 /* Branches have several RAW exceptions that allow to avoid
6406 barriers. */
6408 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6409 /* RAW dependencies on branch regs are permissible as long
6410 as the writer is a non-branch instruction. Since we
6411 never generate code that uses a branch register written
6412 by a branch instruction, handling this case is
6413 easy. */
6414 return 0;
6416 if (REGNO_REG_CLASS (regno) == PR_REGS
6417 && ! rws_sum[regno].written_by_fp)
6418 /* The predicates of a branch are available within the
6419 same insn group as long as the predicate was written by
6420 something other than a floating-point instruction. */
6421 return 0;
6424 if (flags.is_and && rws_sum[regno].written_by_and)
6425 return 0;
6426 if (flags.is_or && rws_sum[regno].written_by_or)
6427 return 0;
6429 switch (rws_sum[regno].write_count)
6431 case 0:
6432 /* The register has not been written yet. */
6433 break;
6435 case 1:
6436 /* The register has been written via a predicate, assume we
6437 need a barrier (don't check for complementary regs). */
6438 need_barrier = 1;
6439 break;
6441 case 2:
6442 /* The register has been unconditionally written already. We
6443 need a barrier. */
6444 need_barrier = 1;
6445 break;
6447 default:
6448 gcc_unreachable ();
6452 return need_barrier;
6455 static int
6456 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6458 int regno = REGNO (reg);
6459 int n = REG_NREGS (reg);
6461 if (n == 1)
6462 return rws_access_regno (regno, flags, pred);
6463 else
6465 int need_barrier = 0;
6466 while (--n >= 0)
6467 need_barrier |= rws_access_regno (regno + n, flags, pred);
6468 return need_barrier;
6472 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
6473 the condition, stored in *PFLAGS, *PPRED and *PCOND. */
6475 static void
6476 update_set_flags (rtx x, struct reg_flags *pflags)
6478 rtx src = SET_SRC (x);
6480 switch (GET_CODE (src))
6482 case CALL:
6483 return;
6485 case IF_THEN_ELSE:
6486 /* There are four cases here:
6487 (1) The destination is (pc), in which case this is a branch,
6488 nothing here applies.
6489 (2) The destination is ar.lc, in which case this is a
6490 doloop_end_internal,
6491 (3) The destination is an fp register, in which case this is
6492 an fselect instruction.
6493 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6494 this is a check load.
6495 In all cases, nothing we do in this function applies. */
6496 return;
6498 default:
6499 if (COMPARISON_P (src)
6500 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6501 /* Set pflags->is_fp to 1 so that we know we're dealing
6502 with a floating point comparison when processing the
6503 destination of the SET. */
6504 pflags->is_fp = 1;
6506 /* Discover if this is a parallel comparison. We only handle
6507 and.orcm and or.andcm at present, since we must retain a
6508 strict inverse on the predicate pair. */
6509 else if (GET_CODE (src) == AND)
6510 pflags->is_and = 1;
6511 else if (GET_CODE (src) == IOR)
6512 pflags->is_or = 1;
6514 break;
6518 /* Subroutine of rtx_needs_barrier; this function determines whether the
6519 source of a given SET rtx found in X needs a barrier. FLAGS and PRED
6520 are as in rtx_needs_barrier. COND is an rtx that holds the condition
6521 for this insn. */
6523 static int
6524 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6526 int need_barrier = 0;
6527 rtx dst;
6528 rtx src = SET_SRC (x);
6530 if (GET_CODE (src) == CALL)
6531 /* We don't need to worry about the result registers that
6532 get written by subroutine call. */
6533 return rtx_needs_barrier (src, flags, pred);
6534 else if (SET_DEST (x) == pc_rtx)
6536 /* X is a conditional branch. */
6537 /* ??? This seems redundant, as the caller sets this bit for
6538 all JUMP_INSNs. */
6539 if (!ia64_spec_check_src_p (src))
6540 flags.is_branch = 1;
6541 return rtx_needs_barrier (src, flags, pred);
6544 if (ia64_spec_check_src_p (src))
6545 /* Avoid checking one register twice (in condition
6546 and in 'then' section) for ldc pattern. */
6548 gcc_assert (REG_P (XEXP (src, 2)));
6549 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6551 /* We process MEM below. */
6552 src = XEXP (src, 1);
6555 need_barrier |= rtx_needs_barrier (src, flags, pred);
6557 dst = SET_DEST (x);
6558 if (GET_CODE (dst) == ZERO_EXTRACT)
6560 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6561 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6563 return need_barrier;
6566 /* Handle an access to rtx X of type FLAGS using predicate register
6567 PRED. Return 1 if this access creates a dependency with an earlier
6568 instruction in the same group. */
6570 static int
6571 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6573 int i, j;
6574 int is_complemented = 0;
6575 int need_barrier = 0;
6576 const char *format_ptr;
6577 struct reg_flags new_flags;
6578 rtx cond;
6580 if (! x)
6581 return 0;
6583 new_flags = flags;
6585 switch (GET_CODE (x))
6587 case SET:
6588 update_set_flags (x, &new_flags);
6589 need_barrier = set_src_needs_barrier (x, new_flags, pred);
6590 if (GET_CODE (SET_SRC (x)) != CALL)
6592 new_flags.is_write = 1;
6593 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6595 break;
6597 case CALL:
6598 new_flags.is_write = 0;
6599 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6601 /* Avoid multiple register writes, in case this is a pattern with
6602 multiple CALL rtx. This avoids a failure in rws_access_reg. */
6603 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6605 new_flags.is_write = 1;
6606 need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6607 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6608 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6610 break;
6612 case COND_EXEC:
6613 /* X is a predicated instruction. */
6615 cond = COND_EXEC_TEST (x);
6616 gcc_assert (!pred);
6617 need_barrier = rtx_needs_barrier (cond, flags, 0);
6619 if (GET_CODE (cond) == EQ)
6620 is_complemented = 1;
6621 cond = XEXP (cond, 0);
6622 gcc_assert (GET_CODE (cond) == REG
6623 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6624 pred = REGNO (cond);
6625 if (is_complemented)
6626 ++pred;
6628 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6629 return need_barrier;
6631 case CLOBBER:
6632 case USE:
6633 /* Clobber & use are for earlier compiler-phases only. */
6634 break;
6636 case ASM_OPERANDS:
6637 case ASM_INPUT:
6638 /* We always emit stop bits for traditional asms. We emit stop bits
6639 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */
6640 if (GET_CODE (x) != ASM_OPERANDS
6641 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6643 /* Avoid writing the register multiple times if we have multiple
6644 asm outputs. This avoids a failure in rws_access_reg. */
6645 if (! rws_insn_test (REG_VOLATILE))
6647 new_flags.is_write = 1;
6648 rws_access_regno (REG_VOLATILE, new_flags, pred);
6650 return 1;
6653 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6654 We cannot just fall through here since then we would be confused
6655 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6656 traditional asms unlike their normal usage. */
6658 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6659 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6660 need_barrier = 1;
6661 break;
6663 case PARALLEL:
6664 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6666 rtx pat = XVECEXP (x, 0, i);
6667 switch (GET_CODE (pat))
6669 case SET:
6670 update_set_flags (pat, &new_flags);
6671 need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6672 break;
6674 case USE:
6675 case CALL:
6676 case ASM_OPERANDS:
6677 case ASM_INPUT:
6678 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6679 break;
6681 case CLOBBER:
6682 if (REG_P (XEXP (pat, 0))
6683 && extract_asm_operands (x) != NULL_RTX
6684 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6686 new_flags.is_write = 1;
6687 need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6688 new_flags, pred);
6689 new_flags = flags;
6691 break;
6693 case RETURN:
6694 break;
6696 default:
6697 gcc_unreachable ();
6700 for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6702 rtx pat = XVECEXP (x, 0, i);
6703 if (GET_CODE (pat) == SET)
6705 if (GET_CODE (SET_SRC (pat)) != CALL)
6707 new_flags.is_write = 1;
6708 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6709 pred);
6712 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6713 need_barrier |= rtx_needs_barrier (pat, flags, pred);
6715 break;
6717 case SUBREG:
6718 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6719 break;
6720 case REG:
6721 if (REGNO (x) == AR_UNAT_REGNUM)
6723 for (i = 0; i < 64; ++i)
6724 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6726 else
6727 need_barrier = rws_access_reg (x, flags, pred);
6728 break;
6730 case MEM:
6731 /* Find the regs used in memory address computation. */
6732 new_flags.is_write = 0;
6733 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6734 break;
6736 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
6737 case SYMBOL_REF: case LABEL_REF: case CONST:
6738 break;
6740 /* Operators with side-effects. */
6741 case POST_INC: case POST_DEC:
6742 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6744 new_flags.is_write = 0;
6745 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6746 new_flags.is_write = 1;
6747 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6748 break;
6750 case POST_MODIFY:
6751 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6753 new_flags.is_write = 0;
6754 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred);
6755 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6756 new_flags.is_write = 1;
6757 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6758 break;
6760 /* Handle common unary and binary ops for efficiency. */
6761 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
6762 case MOD: case UDIV: case UMOD: case AND: case IOR:
6763 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
6764 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
6765 case NE: case EQ: case GE: case GT: case LE:
6766 case LT: case GEU: case GTU: case LEU: case LTU:
6767 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6768 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6769 break;
6771 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
6772 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
6773 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
6774 case SQRT: case FFS: case POPCOUNT:
6775 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6776 break;
6778 case VEC_SELECT:
6779 /* VEC_SELECT's second argument is a PARALLEL with integers that
6780 describe the elements selected. On ia64, those integers are
6781 always constants. Avoid walking the PARALLEL so that we don't
6782 get confused with "normal" parallels and then die. */
6783 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6784 break;
6786 case UNSPEC:
6787 switch (XINT (x, 1))
6789 case UNSPEC_LTOFF_DTPMOD:
6790 case UNSPEC_LTOFF_DTPREL:
6791 case UNSPEC_DTPREL:
6792 case UNSPEC_LTOFF_TPREL:
6793 case UNSPEC_TPREL:
6794 case UNSPEC_PRED_REL_MUTEX:
6795 case UNSPEC_PIC_CALL:
6796 case UNSPEC_MF:
6797 case UNSPEC_FETCHADD_ACQ:
6798 case UNSPEC_FETCHADD_REL:
6799 case UNSPEC_BSP_VALUE:
6800 case UNSPEC_FLUSHRS:
6801 case UNSPEC_BUNDLE_SELECTOR:
6802 break;
6804 case UNSPEC_GR_SPILL:
6805 case UNSPEC_GR_RESTORE:
6807 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6808 HOST_WIDE_INT bit = (offset >> 3) & 63;
6810 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6811 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6812 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6813 new_flags, pred);
6814 break;
6817 case UNSPEC_FR_SPILL:
6818 case UNSPEC_FR_RESTORE:
6819 case UNSPEC_GETF_EXP:
6820 case UNSPEC_SETF_EXP:
6821 case UNSPEC_ADDP4:
6822 case UNSPEC_FR_SQRT_RECIP_APPROX:
6823 case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6824 case UNSPEC_LDA:
6825 case UNSPEC_LDS:
6826 case UNSPEC_LDS_A:
6827 case UNSPEC_LDSA:
6828 case UNSPEC_CHKACLR:
6829 case UNSPEC_CHKS:
6830 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6831 break;
6833 case UNSPEC_FR_RECIP_APPROX:
6834 case UNSPEC_SHRP:
6835 case UNSPEC_COPYSIGN:
6836 case UNSPEC_FR_RECIP_APPROX_RES:
6837 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6838 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6839 break;
6841 case UNSPEC_CMPXCHG_ACQ:
6842 case UNSPEC_CMPXCHG_REL:
6843 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6844 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6845 break;
6847 default:
6848 gcc_unreachable ();
6850 break;
6852 case UNSPEC_VOLATILE:
6853 switch (XINT (x, 1))
6855 case UNSPECV_ALLOC:
6856 /* Alloc must always be the first instruction of a group.
6857 We force this by always returning true. */
6858 /* ??? We might get better scheduling if we explicitly check for
6859 input/local/output register dependencies, and modify the
6860 scheduler so that alloc is always reordered to the start of
6861 the current group. We could then eliminate all of the
6862 first_instruction code. */
6863 rws_access_regno (AR_PFS_REGNUM, flags, pred);
6865 new_flags.is_write = 1;
6866 rws_access_regno (REG_AR_CFM, new_flags, pred);
6867 return 1;
6869 case UNSPECV_SET_BSP:
6870 case UNSPECV_PROBE_STACK_RANGE:
6871 need_barrier = 1;
6872 break;
6874 case UNSPECV_BLOCKAGE:
6875 case UNSPECV_INSN_GROUP_BARRIER:
6876 case UNSPECV_BREAK:
6877 case UNSPECV_PSAC_ALL:
6878 case UNSPECV_PSAC_NORMAL:
6879 return 0;
6881 case UNSPECV_PROBE_STACK_ADDRESS:
6882 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6883 break;
6885 default:
6886 gcc_unreachable ();
6888 break;
6890 case RETURN:
6891 new_flags.is_write = 0;
6892 need_barrier = rws_access_regno (REG_RP, flags, pred);
6893 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6895 new_flags.is_write = 1;
6896 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6897 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6898 break;
6900 default:
6901 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6902 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6903 switch (format_ptr[i])
6905 case '0': /* unused field */
6906 case 'i': /* integer */
6907 case 'n': /* note */
6908 case 'L': /* location_t */
6909 case 'w': /* wide integer */
6910 case 's': /* pointer to string */
6911 case 'S': /* optional pointer to string */
6912 break;
6914 case 'e':
6915 if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6916 need_barrier = 1;
6917 break;
6919 case 'E':
6920 for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6921 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6922 need_barrier = 1;
6923 break;
6925 default:
6926 gcc_unreachable ();
6928 break;
6930 return need_barrier;
6933 /* Clear out the state for group_barrier_needed at the start of a
6934 sequence of insns. */
6936 static void
6937 init_insn_group_barriers (void)
6939 memset (rws_sum, 0, sizeof (rws_sum));
6940 first_instruction = 1;
6943 /* Given the current state, determine whether a group barrier (a stop bit) is
6944 necessary before INSN. Return nonzero if so. This modifies the state to
6945 include the effects of INSN as a side-effect. */
6947 static int
6948 group_barrier_needed (rtx_insn *insn)
6950 rtx pat;
6951 int need_barrier = 0;
6952 struct reg_flags flags;
6954 memset (&flags, 0, sizeof (flags));
6955 switch (GET_CODE (insn))
6957 case NOTE:
6958 case DEBUG_INSN:
6959 break;
6961 case BARRIER:
6962 /* A barrier doesn't imply an instruction group boundary. */
6963 break;
6965 case CODE_LABEL:
6966 memset (rws_insn, 0, sizeof (rws_insn));
6967 return 1;
6969 case CALL_INSN:
6970 flags.is_branch = 1;
6971 flags.is_sibcall = SIBLING_CALL_P (insn);
6972 memset (rws_insn, 0, sizeof (rws_insn));
6974 /* Don't bundle a call following another call. */
6975 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6977 need_barrier = 1;
6978 break;
6981 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6982 break;
6984 case JUMP_INSN:
6985 if (!ia64_spec_check_p (insn))
6986 flags.is_branch = 1;
6988 /* Don't bundle a jump following a call. */
6989 if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6991 need_barrier = 1;
6992 break;
6994 /* FALLTHRU */
6996 case INSN:
6997 if (GET_CODE (PATTERN (insn)) == USE
6998 || GET_CODE (PATTERN (insn)) == CLOBBER)
6999 /* Don't care about USE and CLOBBER "insns"---those are used to
7000 indicate to the optimizer that it shouldn't get rid of
7001 certain operations. */
7002 break;
7004 pat = PATTERN (insn);
7006 /* Ug. Hack hacks hacked elsewhere. */
7007 switch (recog_memoized (insn))
7009 /* We play dependency tricks with the epilogue in order
7010 to get proper schedules. Undo this for dv analysis. */
7011 case CODE_FOR_epilogue_deallocate_stack:
7012 case CODE_FOR_prologue_allocate_stack:
7013 pat = XVECEXP (pat, 0, 0);
7014 break;
7016 /* The pattern we use for br.cloop confuses the code above.
7017 The second element of the vector is representative. */
7018 case CODE_FOR_doloop_end_internal:
7019 pat = XVECEXP (pat, 0, 1);
7020 break;
7022 /* Doesn't generate code. */
7023 case CODE_FOR_pred_rel_mutex:
7024 case CODE_FOR_prologue_use:
7025 return 0;
7027 default:
7028 break;
7031 memset (rws_insn, 0, sizeof (rws_insn));
7032 need_barrier = rtx_needs_barrier (pat, flags, 0);
7034 /* Check to see if the previous instruction was a volatile
7035 asm. */
7036 if (! need_barrier)
7037 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
7039 break;
7041 default:
7042 gcc_unreachable ();
7045 if (first_instruction && important_for_bundling_p (insn))
7047 need_barrier = 0;
7048 first_instruction = 0;
7051 return need_barrier;
7054 /* Like group_barrier_needed, but do not clobber the current state. */
7056 static int
7057 safe_group_barrier_needed (rtx_insn *insn)
7059 int saved_first_instruction;
7060 int t;
7062 saved_first_instruction = first_instruction;
7063 in_safe_group_barrier = 1;
7065 t = group_barrier_needed (insn);
7067 first_instruction = saved_first_instruction;
7068 in_safe_group_barrier = 0;
7070 return t;
7073 /* Scan the current function and insert stop bits as necessary to
7074 eliminate dependencies. This function assumes that a final
7075 instruction scheduling pass has been run which has already
7076 inserted most of the necessary stop bits. This function only
7077 inserts new ones at basic block boundaries, since these are
7078 invisible to the scheduler. */
7080 static void
7081 emit_insn_group_barriers (FILE *dump)
7083 rtx_insn *insn;
7084 rtx_insn *last_label = 0;
7085 int insns_since_last_label = 0;
7087 init_insn_group_barriers ();
7089 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7091 if (LABEL_P (insn))
7093 if (insns_since_last_label)
7094 last_label = insn;
7095 insns_since_last_label = 0;
7097 else if (NOTE_P (insn)
7098 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
7100 if (insns_since_last_label)
7101 last_label = insn;
7102 insns_since_last_label = 0;
7104 else if (NONJUMP_INSN_P (insn)
7105 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7106 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7108 init_insn_group_barriers ();
7109 last_label = 0;
7111 else if (NONDEBUG_INSN_P (insn))
7113 insns_since_last_label = 1;
7115 if (group_barrier_needed (insn))
7117 if (last_label)
7119 if (dump)
7120 fprintf (dump, "Emitting stop before label %d\n",
7121 INSN_UID (last_label));
7122 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7123 insn = last_label;
7125 init_insn_group_barriers ();
7126 last_label = 0;
7133 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7134 This function has to emit all necessary group barriers. */
7136 static void
7137 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7139 rtx_insn *insn;
7141 init_insn_group_barriers ();
7143 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7145 if (BARRIER_P (insn))
7147 rtx_insn *last = prev_active_insn (insn);
7149 if (! last)
7150 continue;
7151 if (JUMP_TABLE_DATA_P (last))
7152 last = prev_active_insn (last);
7153 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7154 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7156 init_insn_group_barriers ();
7158 else if (NONDEBUG_INSN_P (insn))
7160 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7161 init_insn_group_barriers ();
7162 else if (group_barrier_needed (insn))
7164 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7165 init_insn_group_barriers ();
7166 group_barrier_needed (insn);
7174 /* Instruction scheduling support. */
7176 #define NR_BUNDLES 10
7178 /* A list of names of all available bundles. */
7180 static const char *bundle_name [NR_BUNDLES] =
7182 ".mii",
7183 ".mmi",
7184 ".mfi",
7185 ".mmf",
7186 #if NR_BUNDLES == 10
7187 ".bbb",
7188 ".mbb",
7189 #endif
7190 ".mib",
7191 ".mmb",
7192 ".mfb",
7193 ".mlx"
7196 /* Nonzero if we should insert stop bits into the schedule. */
7198 int ia64_final_schedule = 0;
7200 /* Codes of the corresponding queried units: */
7202 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7203 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
7205 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7206 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
7208 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7210 /* The following variable value is an insn group barrier. */
7212 static rtx_insn *dfa_stop_insn;
7214 /* The following variable value is the last issued insn. */
7216 static rtx_insn *last_scheduled_insn;
7218 /* The following variable value is pointer to a DFA state used as
7219 temporary variable. */
7221 static state_t temp_dfa_state = NULL;
7223 /* The following variable value is DFA state after issuing the last
7224 insn. */
7226 static state_t prev_cycle_state = NULL;
7228 /* The following array element values are TRUE if the corresponding
7229 insn requires to add stop bits before it. */
7231 static char *stops_p = NULL;
7233 /* The following variable is used to set up the mentioned above array. */
7235 static int stop_before_p = 0;
7237 /* The following variable value is length of the arrays `clocks' and
7238 `add_cycles'. */
7240 static int clocks_length;
7242 /* The following variable value is number of data speculations in progress. */
7243 static int pending_data_specs = 0;
7245 /* Number of memory references on current and three future processor cycles. */
7246 static char mem_ops_in_group[4];
7248 /* Number of current processor cycle (from scheduler's point of view). */
7249 static int current_cycle;
7251 static rtx ia64_single_set (rtx_insn *);
7252 static void ia64_emit_insn_before (rtx, rtx_insn *);
7254 /* Map a bundle number to its pseudo-op. */
7256 const char *
7257 get_bundle_name (int b)
7259 return bundle_name[b];
7263 /* Return the maximum number of instructions a cpu can issue. */
7265 static int
7266 ia64_issue_rate (void)
7268 return 6;
7271 /* Helper function - like single_set, but look inside COND_EXEC. */
7273 static rtx
7274 ia64_single_set (rtx_insn *insn)
7276 rtx x = PATTERN (insn), ret;
7277 if (GET_CODE (x) == COND_EXEC)
7278 x = COND_EXEC_CODE (x);
7279 if (GET_CODE (x) == SET)
7280 return x;
7282 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7283 Although they are not classical single set, the second set is there just
7284 to protect it from moving past FP-relative stack accesses. */
7285 switch (recog_memoized (insn))
7287 case CODE_FOR_prologue_allocate_stack:
7288 case CODE_FOR_prologue_allocate_stack_pr:
7289 case CODE_FOR_epilogue_deallocate_stack:
7290 case CODE_FOR_epilogue_deallocate_stack_pr:
7291 ret = XVECEXP (x, 0, 0);
7292 break;
7294 default:
7295 ret = single_set_2 (insn, x);
7296 break;
7299 return ret;
7302 /* Adjust the cost of a scheduling dependency.
7303 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7304 COST is the current cost, DW is dependency weakness. */
7305 static int
7306 ia64_adjust_cost (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7307 int cost, dw_t dw)
7309 enum reg_note dep_type = (enum reg_note) dep_type1;
7310 enum attr_itanium_class dep_class;
7311 enum attr_itanium_class insn_class;
7313 insn_class = ia64_safe_itanium_class (insn);
7314 dep_class = ia64_safe_itanium_class (dep_insn);
7316 /* Treat true memory dependencies separately. Ignore apparent true
7317 dependence between store and call (call has a MEM inside a SYMBOL_REF). */
7318 if (dep_type == REG_DEP_TRUE
7319 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7320 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7321 return 0;
7323 if (dw == MIN_DEP_WEAK)
7324 /* Store and load are likely to alias, use higher cost to avoid stall. */
7325 return param_sched_mem_true_dep_cost;
7326 else if (dw > MIN_DEP_WEAK)
7328 /* Store and load are less likely to alias. */
7329 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7330 /* Assume there will be no cache conflict for floating-point data.
7331 For integer data, L1 conflict penalty is huge (17 cycles), so we
7332 never assume it will not cause a conflict. */
7333 return 0;
7334 else
7335 return cost;
7338 if (dep_type != REG_DEP_OUTPUT)
7339 return cost;
7341 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7342 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7343 return 0;
7345 return cost;
7348 /* Like emit_insn_before, but skip cycle_display notes.
7349 ??? When cycle display notes are implemented, update this. */
7351 static void
7352 ia64_emit_insn_before (rtx insn, rtx_insn *before)
7354 emit_insn_before (insn, before);
7357 /* The following function marks insns who produce addresses for load
7358 and store insns. Such insns will be placed into M slots because it
7359 decrease latency time for Itanium1 (see function
7360 `ia64_produce_address_p' and the DFA descriptions). */
7362 static void
7363 ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
7365 rtx_insn *insn, *next, *next_tail;
7367 /* Before reload, which_alternative is not set, which means that
7368 ia64_safe_itanium_class will produce wrong results for (at least)
7369 move instructions. */
7370 if (!reload_completed)
7371 return;
7373 next_tail = NEXT_INSN (tail);
7374 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7375 if (INSN_P (insn))
7376 insn->call = 0;
7377 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7378 if (INSN_P (insn)
7379 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7381 sd_iterator_def sd_it;
7382 dep_t dep;
7383 bool has_mem_op_consumer_p = false;
7385 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7387 enum attr_itanium_class c;
7389 if (DEP_TYPE (dep) != REG_DEP_TRUE)
7390 continue;
7392 next = DEP_CON (dep);
7393 c = ia64_safe_itanium_class (next);
7394 if ((c == ITANIUM_CLASS_ST
7395 || c == ITANIUM_CLASS_STF)
7396 && ia64_st_address_bypass_p (insn, next))
7398 has_mem_op_consumer_p = true;
7399 break;
7401 else if ((c == ITANIUM_CLASS_LD
7402 || c == ITANIUM_CLASS_FLD
7403 || c == ITANIUM_CLASS_FLDP)
7404 && ia64_ld_address_bypass_p (insn, next))
7406 has_mem_op_consumer_p = true;
7407 break;
7411 insn->call = has_mem_op_consumer_p;
7415 /* We're beginning a new block. Initialize data structures as necessary. */
7417 static void
7418 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7419 int sched_verbose ATTRIBUTE_UNUSED,
7420 int max_ready ATTRIBUTE_UNUSED)
7422 if (flag_checking && !sel_sched_p () && reload_completed)
7424 for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head);
7425 insn != current_sched_info->next_tail;
7426 insn = NEXT_INSN (insn))
7427 gcc_assert (!SCHED_GROUP_P (insn));
7429 last_scheduled_insn = NULL;
7430 init_insn_group_barriers ();
7432 current_cycle = 0;
7433 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7436 /* We're beginning a scheduling pass. Check assertion. */
7438 static void
7439 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7440 int sched_verbose ATTRIBUTE_UNUSED,
7441 int max_ready ATTRIBUTE_UNUSED)
7443 gcc_assert (pending_data_specs == 0);
7446 /* Scheduling pass is now finished. Free/reset static variable. */
7447 static void
7448 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7449 int sched_verbose ATTRIBUTE_UNUSED)
7451 gcc_assert (pending_data_specs == 0);
7454 /* Return TRUE if INSN is a load (either normal or speculative, but not a
7455 speculation check), FALSE otherwise. */
7456 static bool
7457 is_load_p (rtx_insn *insn)
7459 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7461 return
7462 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7463 && get_attr_check_load (insn) == CHECK_LOAD_NO);
7466 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7467 (taking account for 3-cycle cache reference postponing for stores: Intel
7468 Itanium 2 Reference Manual for Software Development and Optimization,
7469 6.7.3.1). */
7470 static void
7471 record_memory_reference (rtx_insn *insn)
7473 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7475 switch (insn_class) {
7476 case ITANIUM_CLASS_FLD:
7477 case ITANIUM_CLASS_LD:
7478 mem_ops_in_group[current_cycle % 4]++;
7479 break;
7480 case ITANIUM_CLASS_STF:
7481 case ITANIUM_CLASS_ST:
7482 mem_ops_in_group[(current_cycle + 3) % 4]++;
7483 break;
7484 default:;
7488 /* We are about to being issuing insns for this clock cycle.
7489 Override the default sort algorithm to better slot instructions. */
7491 static int
7492 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7493 int *pn_ready, int clock_var,
7494 int reorder_type)
7496 int n_asms;
7497 int n_ready = *pn_ready;
7498 rtx_insn **e_ready = ready + n_ready;
7499 rtx_insn **insnp;
7501 if (sched_verbose)
7502 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7504 if (reorder_type == 0)
7506 /* First, move all USEs, CLOBBERs and other crud out of the way. */
7507 n_asms = 0;
7508 for (insnp = ready; insnp < e_ready; insnp++)
7509 if (insnp < e_ready)
7511 rtx_insn *insn = *insnp;
7512 enum attr_type t = ia64_safe_type (insn);
7513 if (t == TYPE_UNKNOWN)
7515 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7516 || asm_noperands (PATTERN (insn)) >= 0)
7518 rtx_insn *lowest = ready[n_asms];
7519 ready[n_asms] = insn;
7520 *insnp = lowest;
7521 n_asms++;
7523 else
7525 rtx_insn *highest = ready[n_ready - 1];
7526 ready[n_ready - 1] = insn;
7527 *insnp = highest;
7528 return 1;
7533 if (n_asms < n_ready)
7535 /* Some normal insns to process. Skip the asms. */
7536 ready += n_asms;
7537 n_ready -= n_asms;
7539 else if (n_ready > 0)
7540 return 1;
7543 if (ia64_final_schedule)
7545 int deleted = 0;
7546 int nr_need_stop = 0;
7548 for (insnp = ready; insnp < e_ready; insnp++)
7549 if (safe_group_barrier_needed (*insnp))
7550 nr_need_stop++;
7552 if (reorder_type == 1 && n_ready == nr_need_stop)
7553 return 0;
7554 if (reorder_type == 0)
7555 return 1;
7556 insnp = e_ready;
7557 /* Move down everything that needs a stop bit, preserving
7558 relative order. */
7559 while (insnp-- > ready + deleted)
7560 while (insnp >= ready + deleted)
7562 rtx_insn *insn = *insnp;
7563 if (! safe_group_barrier_needed (insn))
7564 break;
7565 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7566 *ready = insn;
7567 deleted++;
7569 n_ready -= deleted;
7570 ready += deleted;
7573 current_cycle = clock_var;
7574 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7576 int moved = 0;
7578 insnp = e_ready;
7579 /* Move down loads/stores, preserving relative order. */
7580 while (insnp-- > ready + moved)
7581 while (insnp >= ready + moved)
7583 rtx_insn *insn = *insnp;
7584 if (! is_load_p (insn))
7585 break;
7586 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7587 *ready = insn;
7588 moved++;
7590 n_ready -= moved;
7591 ready += moved;
7594 return 1;
7597 /* We are about to being issuing insns for this clock cycle. Override
7598 the default sort algorithm to better slot instructions. */
7600 static int
7601 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7602 int *pn_ready, int clock_var)
7604 return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7605 pn_ready, clock_var, 0);
7608 /* Like ia64_sched_reorder, but called after issuing each insn.
7609 Override the default sort algorithm to better slot instructions. */
7611 static int
7612 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7613 int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
7614 int *pn_ready, int clock_var)
7616 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7617 clock_var, 1);
7620 /* We are about to issue INSN. Return the number of insns left on the
7621 ready queue that can be issued this cycle. */
7623 static int
7624 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7625 int sched_verbose ATTRIBUTE_UNUSED,
7626 rtx_insn *insn,
7627 int can_issue_more ATTRIBUTE_UNUSED)
7629 if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7630 /* Modulo scheduling does not extend h_i_d when emitting
7631 new instructions. Don't use h_i_d, if we don't have to. */
7633 if (DONE_SPEC (insn) & BEGIN_DATA)
7634 pending_data_specs++;
7635 if (CHECK_SPEC (insn) & BEGIN_DATA)
7636 pending_data_specs--;
7639 if (DEBUG_INSN_P (insn))
7640 return 1;
7642 last_scheduled_insn = insn;
7643 memcpy (prev_cycle_state, curr_state, dfa_state_size);
7644 if (reload_completed)
7646 int needed = group_barrier_needed (insn);
7648 gcc_assert (!needed);
7649 if (CALL_P (insn))
7650 init_insn_group_barriers ();
7651 stops_p [INSN_UID (insn)] = stop_before_p;
7652 stop_before_p = 0;
7654 record_memory_reference (insn);
7656 return 1;
7659 /* We are choosing insn from the ready queue. Return zero if INSN
7660 can be chosen. */
7662 static int
7663 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
7665 gcc_assert (insn && INSN_P (insn));
7667 /* Size of ALAT is 32. As far as we perform conservative
7668 data speculation, we keep ALAT half-empty. */
7669 if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
7670 return ready_index == 0 ? -1 : 1;
7672 if (ready_index == 0)
7673 return 0;
7675 if ((!reload_completed
7676 || !safe_group_barrier_needed (insn))
7677 && (!mflag_sched_mem_insns_hard_limit
7678 || !is_load_p (insn)
7679 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7680 return 0;
7682 return 1;
7685 /* The following variable value is pseudo-insn used by the DFA insn
7686 scheduler to change the DFA state when the simulated clock is
7687 increased. */
7689 static rtx_insn *dfa_pre_cycle_insn;
7691 /* Returns 1 when a meaningful insn was scheduled between the last group
7692 barrier and LAST. */
7693 static int
7694 scheduled_good_insn (rtx_insn *last)
7696 if (last && recog_memoized (last) >= 0)
7697 return 1;
7699 for ( ;
7700 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7701 && !stops_p[INSN_UID (last)];
7702 last = PREV_INSN (last))
7703 /* We could hit a NOTE_INSN_DELETED here which is actually outside
7704 the ebb we're scheduling. */
7705 if (INSN_P (last) && recog_memoized (last) >= 0)
7706 return 1;
7708 return 0;
7711 /* We are about to being issuing INSN. Return nonzero if we cannot
7712 issue it on given cycle CLOCK and return zero if we should not sort
7713 the ready queue on the next clock start. */
7715 static int
7716 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
7717 int clock, int *sort_p)
7719 gcc_assert (insn && INSN_P (insn));
7721 if (DEBUG_INSN_P (insn))
7722 return 0;
7724 /* When a group barrier is needed for insn, last_scheduled_insn
7725 should be set. */
7726 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7727 || last_scheduled_insn);
7729 if ((reload_completed
7730 && (safe_group_barrier_needed (insn)
7731 || (mflag_sched_stop_bits_after_every_cycle
7732 && last_clock != clock
7733 && last_scheduled_insn
7734 && scheduled_good_insn (last_scheduled_insn))))
7735 || (last_scheduled_insn
7736 && (CALL_P (last_scheduled_insn)
7737 || unknown_for_bundling_p (last_scheduled_insn))))
7739 init_insn_group_barriers ();
7741 if (verbose && dump)
7742 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn),
7743 last_clock == clock ? " + cycle advance" : "");
7745 stop_before_p = 1;
7746 current_cycle = clock;
7747 mem_ops_in_group[current_cycle % 4] = 0;
7749 if (last_clock == clock)
7751 state_transition (curr_state, dfa_stop_insn);
7752 if (TARGET_EARLY_STOP_BITS)
7753 *sort_p = (last_scheduled_insn == NULL_RTX
7754 || ! CALL_P (last_scheduled_insn));
7755 else
7756 *sort_p = 0;
7757 return 1;
7760 if (last_scheduled_insn)
7762 if (unknown_for_bundling_p (last_scheduled_insn))
7763 state_reset (curr_state);
7764 else
7766 memcpy (curr_state, prev_cycle_state, dfa_state_size);
7767 state_transition (curr_state, dfa_stop_insn);
7768 state_transition (curr_state, dfa_pre_cycle_insn);
7769 state_transition (curr_state, NULL);
7773 return 0;
7776 /* Implement targetm.sched.h_i_d_extended hook.
7777 Extend internal data structures. */
7778 static void
7779 ia64_h_i_d_extended (void)
7781 if (stops_p != NULL)
7783 int new_clocks_length = get_max_uid () * 3 / 2;
7784 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7785 clocks_length = new_clocks_length;
7790 /* This structure describes the data used by the backend to guide scheduling.
7791 When the current scheduling point is switched, this data should be saved
7792 and restored later, if the scheduler returns to this point. */
7793 struct _ia64_sched_context
7795 state_t prev_cycle_state;
7796 rtx_insn *last_scheduled_insn;
7797 struct reg_write_state rws_sum[NUM_REGS];
7798 struct reg_write_state rws_insn[NUM_REGS];
7799 int first_instruction;
7800 int pending_data_specs;
7801 int current_cycle;
7802 char mem_ops_in_group[4];
7804 typedef struct _ia64_sched_context *ia64_sched_context_t;
7806 /* Allocates a scheduling context. */
7807 static void *
7808 ia64_alloc_sched_context (void)
7810 return xmalloc (sizeof (struct _ia64_sched_context));
7813 /* Initializes the _SC context with clean data, if CLEAN_P, and from
7814 the global context otherwise. */
7815 static void
7816 ia64_init_sched_context (void *_sc, bool clean_p)
7818 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7820 sc->prev_cycle_state = xmalloc (dfa_state_size);
7821 if (clean_p)
7823 state_reset (sc->prev_cycle_state);
7824 sc->last_scheduled_insn = NULL;
7825 memset (sc->rws_sum, 0, sizeof (rws_sum));
7826 memset (sc->rws_insn, 0, sizeof (rws_insn));
7827 sc->first_instruction = 1;
7828 sc->pending_data_specs = 0;
7829 sc->current_cycle = 0;
7830 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7832 else
7834 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7835 sc->last_scheduled_insn = last_scheduled_insn;
7836 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7837 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7838 sc->first_instruction = first_instruction;
7839 sc->pending_data_specs = pending_data_specs;
7840 sc->current_cycle = current_cycle;
7841 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7845 /* Sets the global scheduling context to the one pointed to by _SC. */
7846 static void
7847 ia64_set_sched_context (void *_sc)
7849 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7851 gcc_assert (sc != NULL);
7853 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7854 last_scheduled_insn = sc->last_scheduled_insn;
7855 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7856 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7857 first_instruction = sc->first_instruction;
7858 pending_data_specs = sc->pending_data_specs;
7859 current_cycle = sc->current_cycle;
7860 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7863 /* Clears the data in the _SC scheduling context. */
7864 static void
7865 ia64_clear_sched_context (void *_sc)
7867 ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7869 free (sc->prev_cycle_state);
7870 sc->prev_cycle_state = NULL;
7873 /* Frees the _SC scheduling context. */
7874 static void
7875 ia64_free_sched_context (void *_sc)
7877 gcc_assert (_sc != NULL);
7879 free (_sc);
7882 typedef rtx (* gen_func_t) (rtx, rtx);
7884 /* Return a function that will generate a load of mode MODE_NO
7885 with speculation types TS. */
7886 static gen_func_t
7887 get_spec_load_gen_function (ds_t ts, int mode_no)
7889 static gen_func_t gen_ld_[] = {
7890 gen_movbi,
7891 gen_movqi_internal,
7892 gen_movhi_internal,
7893 gen_movsi_internal,
7894 gen_movdi_internal,
7895 gen_movsf_internal,
7896 gen_movdf_internal,
7897 gen_movxf_internal,
7898 gen_movti_internal,
7899 gen_zero_extendqidi2,
7900 gen_zero_extendhidi2,
7901 gen_zero_extendsidi2,
7904 static gen_func_t gen_ld_a[] = {
7905 gen_movbi_advanced,
7906 gen_movqi_advanced,
7907 gen_movhi_advanced,
7908 gen_movsi_advanced,
7909 gen_movdi_advanced,
7910 gen_movsf_advanced,
7911 gen_movdf_advanced,
7912 gen_movxf_advanced,
7913 gen_movti_advanced,
7914 gen_zero_extendqidi2_advanced,
7915 gen_zero_extendhidi2_advanced,
7916 gen_zero_extendsidi2_advanced,
7918 static gen_func_t gen_ld_s[] = {
7919 gen_movbi_speculative,
7920 gen_movqi_speculative,
7921 gen_movhi_speculative,
7922 gen_movsi_speculative,
7923 gen_movdi_speculative,
7924 gen_movsf_speculative,
7925 gen_movdf_speculative,
7926 gen_movxf_speculative,
7927 gen_movti_speculative,
7928 gen_zero_extendqidi2_speculative,
7929 gen_zero_extendhidi2_speculative,
7930 gen_zero_extendsidi2_speculative,
7932 static gen_func_t gen_ld_sa[] = {
7933 gen_movbi_speculative_advanced,
7934 gen_movqi_speculative_advanced,
7935 gen_movhi_speculative_advanced,
7936 gen_movsi_speculative_advanced,
7937 gen_movdi_speculative_advanced,
7938 gen_movsf_speculative_advanced,
7939 gen_movdf_speculative_advanced,
7940 gen_movxf_speculative_advanced,
7941 gen_movti_speculative_advanced,
7942 gen_zero_extendqidi2_speculative_advanced,
7943 gen_zero_extendhidi2_speculative_advanced,
7944 gen_zero_extendsidi2_speculative_advanced,
7946 static gen_func_t gen_ld_s_a[] = {
7947 gen_movbi_speculative_a,
7948 gen_movqi_speculative_a,
7949 gen_movhi_speculative_a,
7950 gen_movsi_speculative_a,
7951 gen_movdi_speculative_a,
7952 gen_movsf_speculative_a,
7953 gen_movdf_speculative_a,
7954 gen_movxf_speculative_a,
7955 gen_movti_speculative_a,
7956 gen_zero_extendqidi2_speculative_a,
7957 gen_zero_extendhidi2_speculative_a,
7958 gen_zero_extendsidi2_speculative_a,
7961 gen_func_t *gen_ld;
7963 if (ts & BEGIN_DATA)
7965 if (ts & BEGIN_CONTROL)
7966 gen_ld = gen_ld_sa;
7967 else
7968 gen_ld = gen_ld_a;
7970 else if (ts & BEGIN_CONTROL)
7972 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7973 || ia64_needs_block_p (ts))
7974 gen_ld = gen_ld_s;
7975 else
7976 gen_ld = gen_ld_s_a;
7978 else if (ts == 0)
7979 gen_ld = gen_ld_;
7980 else
7981 gcc_unreachable ();
7983 return gen_ld[mode_no];
7986 /* Constants that help mapping 'machine_mode' to int. */
7987 enum SPEC_MODES
7989 SPEC_MODE_INVALID = -1,
7990 SPEC_MODE_FIRST = 0,
7991 SPEC_MODE_FOR_EXTEND_FIRST = 1,
7992 SPEC_MODE_FOR_EXTEND_LAST = 3,
7993 SPEC_MODE_LAST = 8
7996 enum
7998 /* Offset to reach ZERO_EXTEND patterns. */
7999 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
8002 /* Return index of the MODE. */
8003 static int
8004 ia64_mode_to_int (machine_mode mode)
8006 switch (mode)
8008 case E_BImode: return 0; /* SPEC_MODE_FIRST */
8009 case E_QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
8010 case E_HImode: return 2;
8011 case E_SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
8012 case E_DImode: return 4;
8013 case E_SFmode: return 5;
8014 case E_DFmode: return 6;
8015 case E_XFmode: return 7;
8016 case E_TImode:
8017 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not
8018 mentioned in itanium[12].md. Predicate fp_register_operand also
8019 needs to be defined. Bottom line: better disable for now. */
8020 return SPEC_MODE_INVALID;
8021 default: return SPEC_MODE_INVALID;
8025 /* Provide information about speculation capabilities. */
8026 static void
8027 ia64_set_sched_flags (spec_info_t spec_info)
8029 unsigned int *flags = &(current_sched_info->flags);
8031 if (*flags & SCHED_RGN
8032 || *flags & SCHED_EBB
8033 || *flags & SEL_SCHED)
8035 int mask = 0;
8037 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
8038 || (mflag_sched_ar_data_spec && reload_completed))
8040 mask |= BEGIN_DATA;
8042 if (!sel_sched_p ()
8043 && ((mflag_sched_br_in_data_spec && !reload_completed)
8044 || (mflag_sched_ar_in_data_spec && reload_completed)))
8045 mask |= BE_IN_DATA;
8048 if (mflag_sched_control_spec
8049 && (!sel_sched_p ()
8050 || reload_completed))
8052 mask |= BEGIN_CONTROL;
8054 if (!sel_sched_p () && mflag_sched_in_control_spec)
8055 mask |= BE_IN_CONTROL;
8058 spec_info->mask = mask;
8060 if (mask)
8062 *flags |= USE_DEPS_LIST | DO_SPECULATION;
8064 if (mask & BE_IN_SPEC)
8065 *flags |= NEW_BBS;
8067 spec_info->flags = 0;
8069 if ((mask & CONTROL_SPEC)
8070 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
8071 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
8073 if (sched_verbose >= 1)
8074 spec_info->dump = sched_dump;
8075 else
8076 spec_info->dump = 0;
8078 if (mflag_sched_count_spec_in_critical_path)
8079 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
8082 else
8083 spec_info->mask = 0;
8086 /* If INSN is an appropriate load return its mode.
8087 Return -1 otherwise. */
8088 static int
8089 get_mode_no_for_insn (rtx_insn *insn)
8091 rtx reg, mem, mode_rtx;
8092 int mode_no;
8093 bool extend_p;
8095 extract_insn_cached (insn);
8097 /* We use WHICH_ALTERNATIVE only after reload. This will
8098 guarantee that reload won't touch a speculative insn. */
8100 if (recog_data.n_operands != 2)
8101 return -1;
8103 reg = recog_data.operand[0];
8104 mem = recog_data.operand[1];
8106 /* We should use MEM's mode since REG's mode in presence of
8107 ZERO_EXTEND will always be DImode. */
8108 if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
8109 /* Process non-speculative ld. */
8111 if (!reload_completed)
8113 /* Do not speculate into regs like ar.lc. */
8114 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8115 return -1;
8117 if (!MEM_P (mem))
8118 return -1;
8121 rtx mem_reg = XEXP (mem, 0);
8123 if (!REG_P (mem_reg))
8124 return -1;
8127 mode_rtx = mem;
8129 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8131 gcc_assert (REG_P (reg) && MEM_P (mem));
8132 mode_rtx = mem;
8134 else
8135 return -1;
8137 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8138 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8139 || get_attr_check_load (insn) == CHECK_LOAD_YES)
8140 /* Process speculative ld or ld.c. */
8142 gcc_assert (REG_P (reg) && MEM_P (mem));
8143 mode_rtx = mem;
8145 else
8147 enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
8149 if (attr_class == ITANIUM_CLASS_CHK_A
8150 || attr_class == ITANIUM_CLASS_CHK_S_I
8151 || attr_class == ITANIUM_CLASS_CHK_S_F)
8152 /* Process chk. */
8153 mode_rtx = reg;
8154 else
8155 return -1;
8158 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
8160 if (mode_no == SPEC_MODE_INVALID)
8161 return -1;
8163 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8165 if (extend_p)
8167 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8168 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8169 return -1;
8171 mode_no += SPEC_GEN_EXTEND_OFFSET;
8174 return mode_no;
8177 /* If X is an unspec part of a speculative load, return its code.
8178 Return -1 otherwise. */
8179 static int
8180 get_spec_unspec_code (const_rtx x)
8182 if (GET_CODE (x) != UNSPEC)
8183 return -1;
8186 int code;
8188 code = XINT (x, 1);
8190 switch (code)
8192 case UNSPEC_LDA:
8193 case UNSPEC_LDS:
8194 case UNSPEC_LDS_A:
8195 case UNSPEC_LDSA:
8196 return code;
8198 default:
8199 return -1;
8204 /* Implement skip_rtx_p hook. */
8205 static bool
8206 ia64_skip_rtx_p (const_rtx x)
8208 return get_spec_unspec_code (x) != -1;
8211 /* If INSN is a speculative load, return its UNSPEC code.
8212 Return -1 otherwise. */
8213 static int
8214 get_insn_spec_code (const_rtx insn)
8216 rtx pat, reg, mem;
8218 pat = PATTERN (insn);
8220 if (GET_CODE (pat) == COND_EXEC)
8221 pat = COND_EXEC_CODE (pat);
8223 if (GET_CODE (pat) != SET)
8224 return -1;
8226 reg = SET_DEST (pat);
8227 if (!REG_P (reg))
8228 return -1;
8230 mem = SET_SRC (pat);
8231 if (GET_CODE (mem) == ZERO_EXTEND)
8232 mem = XEXP (mem, 0);
8234 return get_spec_unspec_code (mem);
8237 /* If INSN is a speculative load, return a ds with the speculation types.
8238 Otherwise [if INSN is a normal instruction] return 0. */
8239 static ds_t
8240 ia64_get_insn_spec_ds (rtx_insn *insn)
8242 int code = get_insn_spec_code (insn);
8244 switch (code)
8246 case UNSPEC_LDA:
8247 return BEGIN_DATA;
8249 case UNSPEC_LDS:
8250 case UNSPEC_LDS_A:
8251 return BEGIN_CONTROL;
8253 case UNSPEC_LDSA:
8254 return BEGIN_DATA | BEGIN_CONTROL;
8256 default:
8257 return 0;
8261 /* If INSN is a speculative load return a ds with the speculation types that
8262 will be checked.
8263 Otherwise [if INSN is a normal instruction] return 0. */
8264 static ds_t
8265 ia64_get_insn_checked_ds (rtx_insn *insn)
8267 int code = get_insn_spec_code (insn);
8269 switch (code)
8271 case UNSPEC_LDA:
8272 return BEGIN_DATA | BEGIN_CONTROL;
8274 case UNSPEC_LDS:
8275 return BEGIN_CONTROL;
8277 case UNSPEC_LDS_A:
8278 case UNSPEC_LDSA:
8279 return BEGIN_DATA | BEGIN_CONTROL;
8281 default:
8282 return 0;
8286 /* If GEN_P is true, calculate the index of needed speculation check and return
8287 speculative pattern for INSN with speculative mode TS, machine mode
8288 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8289 If GEN_P is false, just calculate the index of needed speculation check. */
8290 static rtx
8291 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8293 rtx pat, new_pat;
8294 gen_func_t gen_load;
8296 gen_load = get_spec_load_gen_function (ts, mode_no);
8298 new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8299 copy_rtx (recog_data.operand[1]));
8301 pat = PATTERN (insn);
8302 if (GET_CODE (pat) == COND_EXEC)
8303 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8304 new_pat);
8306 return new_pat;
8309 static bool
8310 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8311 ds_t ds ATTRIBUTE_UNUSED)
8313 return false;
8316 /* Implement targetm.sched.speculate_insn hook.
8317 Check if the INSN can be TS speculative.
8318 If 'no' - return -1.
8319 If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8320 If current pattern of the INSN already provides TS speculation,
8321 return 0. */
8322 static int
8323 ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
8325 int mode_no;
8326 int res;
8328 gcc_assert (!(ts & ~SPECULATIVE));
8330 if (ia64_spec_check_p (insn))
8331 return -1;
8333 if ((ts & BE_IN_SPEC)
8334 && !insn_can_be_in_speculative_p (insn, ts))
8335 return -1;
8337 mode_no = get_mode_no_for_insn (insn);
8339 if (mode_no != SPEC_MODE_INVALID)
8341 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8342 res = 0;
8343 else
8345 res = 1;
8346 *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8349 else
8350 res = -1;
8352 return res;
8355 /* Return a function that will generate a check for speculation TS with mode
8356 MODE_NO.
8357 If simple check is needed, pass true for SIMPLE_CHECK_P.
8358 If clearing check is needed, pass true for CLEARING_CHECK_P. */
8359 static gen_func_t
8360 get_spec_check_gen_function (ds_t ts, int mode_no,
8361 bool simple_check_p, bool clearing_check_p)
8363 static gen_func_t gen_ld_c_clr[] = {
8364 gen_movbi_clr,
8365 gen_movqi_clr,
8366 gen_movhi_clr,
8367 gen_movsi_clr,
8368 gen_movdi_clr,
8369 gen_movsf_clr,
8370 gen_movdf_clr,
8371 gen_movxf_clr,
8372 gen_movti_clr,
8373 gen_zero_extendqidi2_clr,
8374 gen_zero_extendhidi2_clr,
8375 gen_zero_extendsidi2_clr,
8377 static gen_func_t gen_ld_c_nc[] = {
8378 gen_movbi_nc,
8379 gen_movqi_nc,
8380 gen_movhi_nc,
8381 gen_movsi_nc,
8382 gen_movdi_nc,
8383 gen_movsf_nc,
8384 gen_movdf_nc,
8385 gen_movxf_nc,
8386 gen_movti_nc,
8387 gen_zero_extendqidi2_nc,
8388 gen_zero_extendhidi2_nc,
8389 gen_zero_extendsidi2_nc,
8391 static gen_func_t gen_chk_a_clr[] = {
8392 gen_advanced_load_check_clr_bi,
8393 gen_advanced_load_check_clr_qi,
8394 gen_advanced_load_check_clr_hi,
8395 gen_advanced_load_check_clr_si,
8396 gen_advanced_load_check_clr_di,
8397 gen_advanced_load_check_clr_sf,
8398 gen_advanced_load_check_clr_df,
8399 gen_advanced_load_check_clr_xf,
8400 gen_advanced_load_check_clr_ti,
8401 gen_advanced_load_check_clr_di,
8402 gen_advanced_load_check_clr_di,
8403 gen_advanced_load_check_clr_di,
8405 static gen_func_t gen_chk_a_nc[] = {
8406 gen_advanced_load_check_nc_bi,
8407 gen_advanced_load_check_nc_qi,
8408 gen_advanced_load_check_nc_hi,
8409 gen_advanced_load_check_nc_si,
8410 gen_advanced_load_check_nc_di,
8411 gen_advanced_load_check_nc_sf,
8412 gen_advanced_load_check_nc_df,
8413 gen_advanced_load_check_nc_xf,
8414 gen_advanced_load_check_nc_ti,
8415 gen_advanced_load_check_nc_di,
8416 gen_advanced_load_check_nc_di,
8417 gen_advanced_load_check_nc_di,
8419 static gen_func_t gen_chk_s[] = {
8420 gen_speculation_check_bi,
8421 gen_speculation_check_qi,
8422 gen_speculation_check_hi,
8423 gen_speculation_check_si,
8424 gen_speculation_check_di,
8425 gen_speculation_check_sf,
8426 gen_speculation_check_df,
8427 gen_speculation_check_xf,
8428 gen_speculation_check_ti,
8429 gen_speculation_check_di,
8430 gen_speculation_check_di,
8431 gen_speculation_check_di,
8434 gen_func_t *gen_check;
8436 if (ts & BEGIN_DATA)
8438 /* We don't need recovery because even if this is ld.sa
8439 ALAT entry will be allocated only if NAT bit is set to zero.
8440 So it is enough to use ld.c here. */
8442 if (simple_check_p)
8444 gcc_assert (mflag_sched_spec_ldc);
8446 if (clearing_check_p)
8447 gen_check = gen_ld_c_clr;
8448 else
8449 gen_check = gen_ld_c_nc;
8451 else
8453 if (clearing_check_p)
8454 gen_check = gen_chk_a_clr;
8455 else
8456 gen_check = gen_chk_a_nc;
8459 else if (ts & BEGIN_CONTROL)
8461 if (simple_check_p)
8462 /* We might want to use ld.sa -> ld.c instead of
8463 ld.s -> chk.s. */
8465 gcc_assert (!ia64_needs_block_p (ts));
8467 if (clearing_check_p)
8468 gen_check = gen_ld_c_clr;
8469 else
8470 gen_check = gen_ld_c_nc;
8472 else
8474 gen_check = gen_chk_s;
8477 else
8478 gcc_unreachable ();
8480 gcc_assert (mode_no >= 0);
8481 return gen_check[mode_no];
8484 /* Return nonzero, if INSN needs branchy recovery check. */
8485 static bool
8486 ia64_needs_block_p (ds_t ts)
8488 if (ts & BEGIN_DATA)
8489 return !mflag_sched_spec_ldc;
8491 gcc_assert ((ts & BEGIN_CONTROL) != 0);
8493 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8496 /* Generate (or regenerate) a recovery check for INSN. */
8497 static rtx
8498 ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
8500 rtx op1, pat, check_pat;
8501 gen_func_t gen_check;
8502 int mode_no;
8504 mode_no = get_mode_no_for_insn (insn);
8505 gcc_assert (mode_no >= 0);
8507 if (label)
8508 op1 = label;
8509 else
8511 gcc_assert (!ia64_needs_block_p (ds));
8512 op1 = copy_rtx (recog_data.operand[1]);
8515 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8516 true);
8518 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8520 pat = PATTERN (insn);
8521 if (GET_CODE (pat) == COND_EXEC)
8522 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8523 check_pat);
8525 return check_pat;
8528 /* Return nonzero, if X is branchy recovery check. */
8529 static int
8530 ia64_spec_check_p (rtx x)
8532 x = PATTERN (x);
8533 if (GET_CODE (x) == COND_EXEC)
8534 x = COND_EXEC_CODE (x);
8535 if (GET_CODE (x) == SET)
8536 return ia64_spec_check_src_p (SET_SRC (x));
8537 return 0;
8540 /* Return nonzero, if SRC belongs to recovery check. */
8541 static int
8542 ia64_spec_check_src_p (rtx src)
8544 if (GET_CODE (src) == IF_THEN_ELSE)
8546 rtx t;
8548 t = XEXP (src, 0);
8549 if (GET_CODE (t) == NE)
8551 t = XEXP (t, 0);
8553 if (GET_CODE (t) == UNSPEC)
8555 int code;
8557 code = XINT (t, 1);
8559 if (code == UNSPEC_LDCCLR
8560 || code == UNSPEC_LDCNC
8561 || code == UNSPEC_CHKACLR
8562 || code == UNSPEC_CHKANC
8563 || code == UNSPEC_CHKS)
8565 gcc_assert (code != 0);
8566 return code;
8571 return 0;
8575 /* The following page contains abstract data `bundle states' which are
8576 used for bundling insns (inserting nops and template generation). */
8578 /* The following describes state of insn bundling. */
8580 struct bundle_state
8582 /* Unique bundle state number to identify them in the debugging
8583 output */
8584 int unique_num;
8585 rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state */
8586 /* number nops before and after the insn */
8587 short before_nops_num, after_nops_num;
8588 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8589 insn */
8590 int cost; /* cost of the state in cycles */
8591 int accumulated_insns_num; /* number of all previous insns including
8592 nops. L is considered as 2 insns */
8593 int branch_deviation; /* deviation of previous branches from 3rd slots */
8594 int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8595 struct bundle_state *next; /* next state with the same insn_num */
8596 struct bundle_state *originator; /* originator (previous insn state) */
8597 /* All bundle states are in the following chain. */
8598 struct bundle_state *allocated_states_chain;
8599 /* The DFA State after issuing the insn and the nops. */
8600 state_t dfa_state;
8603 /* The following is map insn number to the corresponding bundle state. */
8605 static struct bundle_state **index_to_bundle_states;
8607 /* The unique number of next bundle state. */
8609 static int bundle_states_num;
8611 /* All allocated bundle states are in the following chain. */
8613 static struct bundle_state *allocated_bundle_states_chain;
8615 /* All allocated but not used bundle states are in the following
8616 chain. */
8618 static struct bundle_state *free_bundle_state_chain;
8621 /* The following function returns a free bundle state. */
8623 static struct bundle_state *
8624 get_free_bundle_state (void)
8626 struct bundle_state *result;
8628 if (free_bundle_state_chain != NULL)
8630 result = free_bundle_state_chain;
8631 free_bundle_state_chain = result->next;
8633 else
8635 result = XNEW (struct bundle_state);
8636 result->dfa_state = xmalloc (dfa_state_size);
8637 result->allocated_states_chain = allocated_bundle_states_chain;
8638 allocated_bundle_states_chain = result;
8640 result->unique_num = bundle_states_num++;
8641 return result;
8645 /* The following function frees given bundle state. */
8647 static void
8648 free_bundle_state (struct bundle_state *state)
8650 state->next = free_bundle_state_chain;
8651 free_bundle_state_chain = state;
8654 /* Start work with abstract data `bundle states'. */
8656 static void
8657 initiate_bundle_states (void)
8659 bundle_states_num = 0;
8660 free_bundle_state_chain = NULL;
8661 allocated_bundle_states_chain = NULL;
8664 /* Finish work with abstract data `bundle states'. */
8666 static void
8667 finish_bundle_states (void)
8669 struct bundle_state *curr_state, *next_state;
8671 for (curr_state = allocated_bundle_states_chain;
8672 curr_state != NULL;
8673 curr_state = next_state)
8675 next_state = curr_state->allocated_states_chain;
8676 free (curr_state->dfa_state);
8677 free (curr_state);
8681 /* Hashtable helpers. */
8683 struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
8685 static inline hashval_t hash (const bundle_state *);
8686 static inline bool equal (const bundle_state *, const bundle_state *);
8689 /* The function returns hash of BUNDLE_STATE. */
8691 inline hashval_t
8692 bundle_state_hasher::hash (const bundle_state *state)
8694 unsigned result, i;
8696 for (result = i = 0; i < dfa_state_size; i++)
8697 result += (((unsigned char *) state->dfa_state) [i]
8698 << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8699 return result + state->insn_num;
8702 /* The function returns nonzero if the bundle state keys are equal. */
8704 inline bool
8705 bundle_state_hasher::equal (const bundle_state *state1,
8706 const bundle_state *state2)
8708 return (state1->insn_num == state2->insn_num
8709 && memcmp (state1->dfa_state, state2->dfa_state,
8710 dfa_state_size) == 0);
8713 /* Hash table of the bundle states. The key is dfa_state and insn_num
8714 of the bundle states. */
8716 static hash_table<bundle_state_hasher> *bundle_state_table;
8718 /* The function inserts the BUNDLE_STATE into the hash table. The
8719 function returns nonzero if the bundle has been inserted into the
8720 table. The table contains the best bundle state with given key. */
8722 static int
8723 insert_bundle_state (struct bundle_state *bundle_state)
8725 struct bundle_state **entry_ptr;
8727 entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
8728 if (*entry_ptr == NULL)
8730 bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8731 index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8732 *entry_ptr = bundle_state;
8733 return TRUE;
8735 else if (bundle_state->cost < (*entry_ptr)->cost
8736 || (bundle_state->cost == (*entry_ptr)->cost
8737 && ((*entry_ptr)->accumulated_insns_num
8738 > bundle_state->accumulated_insns_num
8739 || ((*entry_ptr)->accumulated_insns_num
8740 == bundle_state->accumulated_insns_num
8741 && ((*entry_ptr)->branch_deviation
8742 > bundle_state->branch_deviation
8743 || ((*entry_ptr)->branch_deviation
8744 == bundle_state->branch_deviation
8745 && (*entry_ptr)->middle_bundle_stops
8746 > bundle_state->middle_bundle_stops))))))
8749 struct bundle_state temp;
8751 temp = **entry_ptr;
8752 **entry_ptr = *bundle_state;
8753 (*entry_ptr)->next = temp.next;
8754 *bundle_state = temp;
8756 return FALSE;
8759 /* Start work with the hash table. */
8761 static void
8762 initiate_bundle_state_table (void)
8764 bundle_state_table = new hash_table<bundle_state_hasher> (50);
8767 /* Finish work with the hash table. */
8769 static void
8770 finish_bundle_state_table (void)
8772 delete bundle_state_table;
8773 bundle_state_table = NULL;
8778 /* The following variable is a insn `nop' used to check bundle states
8779 with different number of inserted nops. */
8781 static rtx_insn *ia64_nop;
8783 /* The following function tries to issue NOPS_NUM nops for the current
8784 state without advancing processor cycle. If it failed, the
8785 function returns FALSE and frees the current state. */
8787 static int
8788 try_issue_nops (struct bundle_state *curr_state, int nops_num)
8790 int i;
8792 for (i = 0; i < nops_num; i++)
8793 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8795 free_bundle_state (curr_state);
8796 return FALSE;
8798 return TRUE;
8801 /* The following function tries to issue INSN for the current
8802 state without advancing processor cycle. If it failed, the
8803 function returns FALSE and frees the current state. */
8805 static int
8806 try_issue_insn (struct bundle_state *curr_state, rtx insn)
8808 if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8810 free_bundle_state (curr_state);
8811 return FALSE;
8813 return TRUE;
8816 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8817 starting with ORIGINATOR without advancing processor cycle. If
8818 TRY_BUNDLE_END_P is TRUE, the function also/only (if
8819 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8820 If it was successful, the function creates new bundle state and
8821 insert into the hash table and into `index_to_bundle_states'. */
8823 static void
8824 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8825 rtx_insn *insn, int try_bundle_end_p,
8826 int only_bundle_end_p)
8828 struct bundle_state *curr_state;
8830 curr_state = get_free_bundle_state ();
8831 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8832 curr_state->insn = insn;
8833 curr_state->insn_num = originator->insn_num + 1;
8834 curr_state->cost = originator->cost;
8835 curr_state->originator = originator;
8836 curr_state->before_nops_num = before_nops_num;
8837 curr_state->after_nops_num = 0;
8838 curr_state->accumulated_insns_num
8839 = originator->accumulated_insns_num + before_nops_num;
8840 curr_state->branch_deviation = originator->branch_deviation;
8841 curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8842 gcc_assert (insn);
8843 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8845 gcc_assert (GET_MODE (insn) != TImode);
8846 if (!try_issue_nops (curr_state, before_nops_num))
8847 return;
8848 if (!try_issue_insn (curr_state, insn))
8849 return;
8850 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8851 if (curr_state->accumulated_insns_num % 3 != 0)
8852 curr_state->middle_bundle_stops++;
8853 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8854 && curr_state->accumulated_insns_num % 3 != 0)
8856 free_bundle_state (curr_state);
8857 return;
8860 else if (GET_MODE (insn) != TImode)
8862 if (!try_issue_nops (curr_state, before_nops_num))
8863 return;
8864 if (!try_issue_insn (curr_state, insn))
8865 return;
8866 curr_state->accumulated_insns_num++;
8867 gcc_assert (!unknown_for_bundling_p (insn));
8869 if (ia64_safe_type (insn) == TYPE_L)
8870 curr_state->accumulated_insns_num++;
8872 else
8874 /* If this is an insn that must be first in a group, then don't allow
8875 nops to be emitted before it. Currently, alloc is the only such
8876 supported instruction. */
8877 /* ??? The bundling automatons should handle this for us, but they do
8878 not yet have support for the first_insn attribute. */
8879 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8881 free_bundle_state (curr_state);
8882 return;
8885 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8886 state_transition (curr_state->dfa_state, NULL);
8887 curr_state->cost++;
8888 if (!try_issue_nops (curr_state, before_nops_num))
8889 return;
8890 if (!try_issue_insn (curr_state, insn))
8891 return;
8892 curr_state->accumulated_insns_num++;
8893 if (unknown_for_bundling_p (insn))
8895 /* Finish bundle containing asm insn. */
8896 curr_state->after_nops_num
8897 = 3 - curr_state->accumulated_insns_num % 3;
8898 curr_state->accumulated_insns_num
8899 += 3 - curr_state->accumulated_insns_num % 3;
8901 else if (ia64_safe_type (insn) == TYPE_L)
8902 curr_state->accumulated_insns_num++;
8904 if (ia64_safe_type (insn) == TYPE_B)
8905 curr_state->branch_deviation
8906 += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8907 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8909 if (!only_bundle_end_p && insert_bundle_state (curr_state))
8911 state_t dfa_state;
8912 struct bundle_state *curr_state1;
8913 struct bundle_state *allocated_states_chain;
8915 curr_state1 = get_free_bundle_state ();
8916 dfa_state = curr_state1->dfa_state;
8917 allocated_states_chain = curr_state1->allocated_states_chain;
8918 *curr_state1 = *curr_state;
8919 curr_state1->dfa_state = dfa_state;
8920 curr_state1->allocated_states_chain = allocated_states_chain;
8921 memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8922 dfa_state_size);
8923 curr_state = curr_state1;
8925 if (!try_issue_nops (curr_state,
8926 3 - curr_state->accumulated_insns_num % 3))
8927 return;
8928 curr_state->after_nops_num
8929 = 3 - curr_state->accumulated_insns_num % 3;
8930 curr_state->accumulated_insns_num
8931 += 3 - curr_state->accumulated_insns_num % 3;
8933 if (!insert_bundle_state (curr_state))
8934 free_bundle_state (curr_state);
8935 return;
8938 /* The following function returns position in the two window bundle
8939 for given STATE. */
8941 static int
8942 get_max_pos (state_t state)
8944 if (cpu_unit_reservation_p (state, pos_6))
8945 return 6;
8946 else if (cpu_unit_reservation_p (state, pos_5))
8947 return 5;
8948 else if (cpu_unit_reservation_p (state, pos_4))
8949 return 4;
8950 else if (cpu_unit_reservation_p (state, pos_3))
8951 return 3;
8952 else if (cpu_unit_reservation_p (state, pos_2))
8953 return 2;
8954 else if (cpu_unit_reservation_p (state, pos_1))
8955 return 1;
8956 else
8957 return 0;
8960 /* The function returns code of a possible template for given position
8961 and state. The function should be called only with 2 values of
8962 position equal to 3 or 6. We avoid generating F NOPs by putting
8963 templates containing F insns at the end of the template search
8964 because undocumented anomaly in McKinley derived cores which can
8965 cause stalls if an F-unit insn (including a NOP) is issued within a
8966 six-cycle window after reading certain application registers (such
8967 as ar.bsp). Furthermore, power-considerations also argue against
8968 the use of F-unit instructions unless they're really needed. */
8970 static int
8971 get_template (state_t state, int pos)
8973 switch (pos)
8975 case 3:
8976 if (cpu_unit_reservation_p (state, _0mmi_))
8977 return 1;
8978 else if (cpu_unit_reservation_p (state, _0mii_))
8979 return 0;
8980 else if (cpu_unit_reservation_p (state, _0mmb_))
8981 return 7;
8982 else if (cpu_unit_reservation_p (state, _0mib_))
8983 return 6;
8984 else if (cpu_unit_reservation_p (state, _0mbb_))
8985 return 5;
8986 else if (cpu_unit_reservation_p (state, _0bbb_))
8987 return 4;
8988 else if (cpu_unit_reservation_p (state, _0mmf_))
8989 return 3;
8990 else if (cpu_unit_reservation_p (state, _0mfi_))
8991 return 2;
8992 else if (cpu_unit_reservation_p (state, _0mfb_))
8993 return 8;
8994 else if (cpu_unit_reservation_p (state, _0mlx_))
8995 return 9;
8996 else
8997 gcc_unreachable ();
8998 case 6:
8999 if (cpu_unit_reservation_p (state, _1mmi_))
9000 return 1;
9001 else if (cpu_unit_reservation_p (state, _1mii_))
9002 return 0;
9003 else if (cpu_unit_reservation_p (state, _1mmb_))
9004 return 7;
9005 else if (cpu_unit_reservation_p (state, _1mib_))
9006 return 6;
9007 else if (cpu_unit_reservation_p (state, _1mbb_))
9008 return 5;
9009 else if (cpu_unit_reservation_p (state, _1bbb_))
9010 return 4;
9011 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
9012 return 3;
9013 else if (cpu_unit_reservation_p (state, _1mfi_))
9014 return 2;
9015 else if (cpu_unit_reservation_p (state, _1mfb_))
9016 return 8;
9017 else if (cpu_unit_reservation_p (state, _1mlx_))
9018 return 9;
9019 else
9020 gcc_unreachable ();
9021 default:
9022 gcc_unreachable ();
9026 /* True when INSN is important for bundling. */
9028 static bool
9029 important_for_bundling_p (rtx_insn *insn)
9031 return (INSN_P (insn)
9032 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
9033 && GET_CODE (PATTERN (insn)) != USE
9034 && GET_CODE (PATTERN (insn)) != CLOBBER);
9037 /* The following function returns an insn important for insn bundling
9038 followed by INSN and before TAIL. */
9040 static rtx_insn *
9041 get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
9043 for (; insn && insn != tail; insn = NEXT_INSN (insn))
9044 if (important_for_bundling_p (insn))
9045 return insn;
9046 return NULL;
9049 /* True when INSN is unknown, but important, for bundling. */
9051 static bool
9052 unknown_for_bundling_p (rtx_insn *insn)
9054 return (INSN_P (insn)
9055 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
9056 && GET_CODE (PATTERN (insn)) != USE
9057 && GET_CODE (PATTERN (insn)) != CLOBBER);
9060 /* Add a bundle selector TEMPLATE0 before INSN. */
9062 static void
9063 ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
9065 rtx b = gen_bundle_selector (GEN_INT (template0));
9067 ia64_emit_insn_before (b, insn);
9068 #if NR_BUNDLES == 10
9069 if ((template0 == 4 || template0 == 5)
9070 && ia64_except_unwind_info (&global_options) == UI_TARGET)
9072 int i;
9073 rtx note = NULL_RTX;
9075 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
9076 first or second slot. If it is and has REG_EH_NOTE set, copy it
9077 to following nops, as br.call sets rp to the address of following
9078 bundle and therefore an EH region end must be on a bundle
9079 boundary. */
9080 insn = PREV_INSN (insn);
9081 for (i = 0; i < 3; i++)
9084 insn = next_active_insn (insn);
9085 while (NONJUMP_INSN_P (insn)
9086 && get_attr_empty (insn) == EMPTY_YES);
9087 if (CALL_P (insn))
9088 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
9089 else if (note)
9091 int code;
9093 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
9094 || code == CODE_FOR_nop_b);
9095 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
9096 note = NULL_RTX;
9097 else
9098 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
9102 #endif
9105 /* The following function does insn bundling. Bundling means
9106 inserting templates and nop insns to fit insn groups into permitted
9107 templates. Instruction scheduling uses NDFA (non-deterministic
9108 finite automata) encoding informations about the templates and the
9109 inserted nops. Nondeterminism of the automata permits follows
9110 all possible insn sequences very fast.
9112 Unfortunately it is not possible to get information about inserting
9113 nop insns and used templates from the automata states. The
9114 automata only says that we can issue an insn possibly inserting
9115 some nops before it and using some template. Therefore insn
9116 bundling in this function is implemented by using DFA
9117 (deterministic finite automata). We follow all possible insn
9118 sequences by inserting 0-2 nops (that is what the NDFA describe for
9119 insn scheduling) before/after each insn being bundled. We know the
9120 start of simulated processor cycle from insn scheduling (insn
9121 starting a new cycle has TImode).
9123 Simple implementation of insn bundling would create enormous
9124 number of possible insn sequences satisfying information about new
9125 cycle ticks taken from the insn scheduling. To make the algorithm
9126 practical we use dynamic programming. Each decision (about
9127 inserting nops and implicitly about previous decisions) is described
9128 by structure bundle_state (see above). If we generate the same
9129 bundle state (key is automaton state after issuing the insns and
9130 nops for it), we reuse already generated one. As consequence we
9131 reject some decisions which cannot improve the solution and
9132 reduce memory for the algorithm.
9134 When we reach the end of EBB (extended basic block), we choose the
9135 best sequence and then, moving back in EBB, insert templates for
9136 the best alternative. The templates are taken from querying
9137 automaton state for each insn in chosen bundle states.
9139 So the algorithm makes two (forward and backward) passes through
9140 EBB. */
9142 static void
9143 bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
9145 struct bundle_state *curr_state, *next_state, *best_state;
9146 rtx_insn *insn, *next_insn;
9147 int insn_num;
9148 int i, bundle_end_p, only_bundle_end_p, asm_p;
9149 int pos = 0, max_pos, template0, template1;
9150 rtx_insn *b;
9151 enum attr_type type;
9153 insn_num = 0;
9154 /* Count insns in the EBB. */
9155 for (insn = NEXT_INSN (prev_head_insn);
9156 insn && insn != tail;
9157 insn = NEXT_INSN (insn))
9158 if (INSN_P (insn))
9159 insn_num++;
9160 if (insn_num == 0)
9161 return;
9162 bundling_p = 1;
9163 dfa_clean_insn_cache ();
9164 initiate_bundle_state_table ();
9165 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
9166 /* First (forward) pass -- generation of bundle states. */
9167 curr_state = get_free_bundle_state ();
9168 curr_state->insn = NULL;
9169 curr_state->before_nops_num = 0;
9170 curr_state->after_nops_num = 0;
9171 curr_state->insn_num = 0;
9172 curr_state->cost = 0;
9173 curr_state->accumulated_insns_num = 0;
9174 curr_state->branch_deviation = 0;
9175 curr_state->middle_bundle_stops = 0;
9176 curr_state->next = NULL;
9177 curr_state->originator = NULL;
9178 state_reset (curr_state->dfa_state);
9179 index_to_bundle_states [0] = curr_state;
9180 insn_num = 0;
9181 /* Shift cycle mark if it is put on insn which could be ignored. */
9182 for (insn = NEXT_INSN (prev_head_insn);
9183 insn != tail;
9184 insn = NEXT_INSN (insn))
9185 if (INSN_P (insn)
9186 && !important_for_bundling_p (insn)
9187 && GET_MODE (insn) == TImode)
9189 PUT_MODE (insn, VOIDmode);
9190 for (next_insn = NEXT_INSN (insn);
9191 next_insn != tail;
9192 next_insn = NEXT_INSN (next_insn))
9193 if (important_for_bundling_p (next_insn)
9194 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
9196 PUT_MODE (next_insn, TImode);
9197 break;
9200 /* Forward pass: generation of bundle states. */
9201 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9202 insn != NULL_RTX;
9203 insn = next_insn)
9205 gcc_assert (important_for_bundling_p (insn));
9206 type = ia64_safe_type (insn);
9207 next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9208 insn_num++;
9209 index_to_bundle_states [insn_num] = NULL;
9210 for (curr_state = index_to_bundle_states [insn_num - 1];
9211 curr_state != NULL;
9212 curr_state = next_state)
9214 pos = curr_state->accumulated_insns_num % 3;
9215 next_state = curr_state->next;
9216 /* We must fill up the current bundle in order to start a
9217 subsequent asm insn in a new bundle. Asm insn is always
9218 placed in a separate bundle. */
9219 only_bundle_end_p
9220 = (next_insn != NULL_RTX
9221 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
9222 && unknown_for_bundling_p (next_insn));
9223 /* We may fill up the current bundle if it is the cycle end
9224 without a group barrier. */
9225 bundle_end_p
9226 = (only_bundle_end_p || next_insn == NULL_RTX
9227 || (GET_MODE (next_insn) == TImode
9228 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9229 if (type == TYPE_F || type == TYPE_B || type == TYPE_L
9230 || type == TYPE_S)
9231 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9232 only_bundle_end_p);
9233 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9234 only_bundle_end_p);
9235 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9236 only_bundle_end_p);
9238 gcc_assert (index_to_bundle_states [insn_num]);
9239 for (curr_state = index_to_bundle_states [insn_num];
9240 curr_state != NULL;
9241 curr_state = curr_state->next)
9242 if (verbose >= 2 && dump)
9244 /* This structure is taken from generated code of the
9245 pipeline hazard recognizer (see file insn-attrtab.cc).
9246 Please don't forget to change the structure if a new
9247 automaton is added to .md file. */
9248 struct DFA_chip
9250 unsigned short one_automaton_state;
9251 unsigned short oneb_automaton_state;
9252 unsigned short two_automaton_state;
9253 unsigned short twob_automaton_state;
9256 fprintf
9257 (dump,
9258 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9259 curr_state->unique_num,
9260 (curr_state->originator == NULL
9261 ? -1 : curr_state->originator->unique_num),
9262 curr_state->cost,
9263 curr_state->before_nops_num, curr_state->after_nops_num,
9264 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9265 curr_state->middle_bundle_stops,
9266 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9267 INSN_UID (insn));
9271 /* We should find a solution because the 2nd insn scheduling has
9272 found one. */
9273 gcc_assert (index_to_bundle_states [insn_num]);
9274 /* Find a state corresponding to the best insn sequence. */
9275 best_state = NULL;
9276 for (curr_state = index_to_bundle_states [insn_num];
9277 curr_state != NULL;
9278 curr_state = curr_state->next)
9279 /* We are just looking at the states with fully filled up last
9280 bundle. The first we prefer insn sequences with minimal cost
9281 then with minimal inserted nops and finally with branch insns
9282 placed in the 3rd slots. */
9283 if (curr_state->accumulated_insns_num % 3 == 0
9284 && (best_state == NULL || best_state->cost > curr_state->cost
9285 || (best_state->cost == curr_state->cost
9286 && (curr_state->accumulated_insns_num
9287 < best_state->accumulated_insns_num
9288 || (curr_state->accumulated_insns_num
9289 == best_state->accumulated_insns_num
9290 && (curr_state->branch_deviation
9291 < best_state->branch_deviation
9292 || (curr_state->branch_deviation
9293 == best_state->branch_deviation
9294 && curr_state->middle_bundle_stops
9295 < best_state->middle_bundle_stops)))))))
9296 best_state = curr_state;
9297 /* Second (backward) pass: adding nops and templates. */
9298 gcc_assert (best_state);
9299 insn_num = best_state->before_nops_num;
9300 template0 = template1 = -1;
9301 for (curr_state = best_state;
9302 curr_state->originator != NULL;
9303 curr_state = curr_state->originator)
9305 insn = curr_state->insn;
9306 asm_p = unknown_for_bundling_p (insn);
9307 insn_num++;
9308 if (verbose >= 2 && dump)
9310 struct DFA_chip
9312 unsigned short one_automaton_state;
9313 unsigned short oneb_automaton_state;
9314 unsigned short two_automaton_state;
9315 unsigned short twob_automaton_state;
9318 fprintf
9319 (dump,
9320 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9321 curr_state->unique_num,
9322 (curr_state->originator == NULL
9323 ? -1 : curr_state->originator->unique_num),
9324 curr_state->cost,
9325 curr_state->before_nops_num, curr_state->after_nops_num,
9326 curr_state->accumulated_insns_num, curr_state->branch_deviation,
9327 curr_state->middle_bundle_stops,
9328 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9329 INSN_UID (insn));
9331 /* Find the position in the current bundle window. The window can
9332 contain at most two bundles. Two bundle window means that
9333 the processor will make two bundle rotation. */
9334 max_pos = get_max_pos (curr_state->dfa_state);
9335 if (max_pos == 6
9336 /* The following (negative template number) means that the
9337 processor did one bundle rotation. */
9338 || (max_pos == 3 && template0 < 0))
9340 /* We are at the end of the window -- find template(s) for
9341 its bundle(s). */
9342 pos = max_pos;
9343 if (max_pos == 3)
9344 template0 = get_template (curr_state->dfa_state, 3);
9345 else
9347 template1 = get_template (curr_state->dfa_state, 3);
9348 template0 = get_template (curr_state->dfa_state, 6);
9351 if (max_pos > 3 && template1 < 0)
9352 /* It may happen when we have the stop inside a bundle. */
9354 gcc_assert (pos <= 3);
9355 template1 = get_template (curr_state->dfa_state, 3);
9356 pos += 3;
9358 if (!asm_p)
9359 /* Emit nops after the current insn. */
9360 for (i = 0; i < curr_state->after_nops_num; i++)
9362 rtx nop_pat = gen_nop ();
9363 rtx_insn *nop = emit_insn_after (nop_pat, insn);
9364 pos--;
9365 gcc_assert (pos >= 0);
9366 if (pos % 3 == 0)
9368 /* We are at the start of a bundle: emit the template
9369 (it should be defined). */
9370 gcc_assert (template0 >= 0);
9371 ia64_add_bundle_selector_before (template0, nop);
9372 /* If we have two bundle window, we make one bundle
9373 rotation. Otherwise template0 will be undefined
9374 (negative value). */
9375 template0 = template1;
9376 template1 = -1;
9379 /* Move the position backward in the window. Group barrier has
9380 no slot. Asm insn takes all bundle. */
9381 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9382 && !unknown_for_bundling_p (insn))
9383 pos--;
9384 /* Long insn takes 2 slots. */
9385 if (ia64_safe_type (insn) == TYPE_L)
9386 pos--;
9387 gcc_assert (pos >= 0);
9388 if (pos % 3 == 0
9389 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9390 && !unknown_for_bundling_p (insn))
9392 /* The current insn is at the bundle start: emit the
9393 template. */
9394 gcc_assert (template0 >= 0);
9395 ia64_add_bundle_selector_before (template0, insn);
9396 b = PREV_INSN (insn);
9397 insn = b;
9398 /* See comment above in analogous place for emitting nops
9399 after the insn. */
9400 template0 = template1;
9401 template1 = -1;
9403 /* Emit nops after the current insn. */
9404 for (i = 0; i < curr_state->before_nops_num; i++)
9406 rtx nop_pat = gen_nop ();
9407 ia64_emit_insn_before (nop_pat, insn);
9408 rtx_insn *nop = PREV_INSN (insn);
9409 insn = nop;
9410 pos--;
9411 gcc_assert (pos >= 0);
9412 if (pos % 3 == 0)
9414 /* See comment above in analogous place for emitting nops
9415 after the insn. */
9416 gcc_assert (template0 >= 0);
9417 ia64_add_bundle_selector_before (template0, insn);
9418 b = PREV_INSN (insn);
9419 insn = b;
9420 template0 = template1;
9421 template1 = -1;
9426 if (flag_checking)
9428 /* Assert right calculation of middle_bundle_stops. */
9429 int num = best_state->middle_bundle_stops;
9430 bool start_bundle = true, end_bundle = false;
9432 for (insn = NEXT_INSN (prev_head_insn);
9433 insn && insn != tail;
9434 insn = NEXT_INSN (insn))
9436 if (!INSN_P (insn))
9437 continue;
9438 if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9439 start_bundle = true;
9440 else
9442 rtx_insn *next_insn;
9444 for (next_insn = NEXT_INSN (insn);
9445 next_insn && next_insn != tail;
9446 next_insn = NEXT_INSN (next_insn))
9447 if (INSN_P (next_insn)
9448 && (ia64_safe_itanium_class (next_insn)
9449 != ITANIUM_CLASS_IGNORE
9450 || recog_memoized (next_insn)
9451 == CODE_FOR_bundle_selector)
9452 && GET_CODE (PATTERN (next_insn)) != USE
9453 && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9454 break;
9456 end_bundle = next_insn == NULL_RTX
9457 || next_insn == tail
9458 || (INSN_P (next_insn)
9459 && recog_memoized (next_insn) == CODE_FOR_bundle_selector);
9460 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9461 && !start_bundle && !end_bundle
9462 && next_insn
9463 && !unknown_for_bundling_p (next_insn))
9464 num--;
9466 start_bundle = false;
9470 gcc_assert (num == 0);
9473 free (index_to_bundle_states);
9474 finish_bundle_state_table ();
9475 bundling_p = 0;
9476 dfa_clean_insn_cache ();
9479 /* The following function is called at the end of scheduling BB or
9480 EBB. After reload, it inserts stop bits and does insn bundling. */
9482 static void
9483 ia64_sched_finish (FILE *dump, int sched_verbose)
9485 if (sched_verbose)
9486 fprintf (dump, "// Finishing schedule.\n");
9487 if (!reload_completed)
9488 return;
9489 if (reload_completed)
9491 final_emit_insn_group_barriers (dump);
9492 bundling (dump, sched_verbose, current_sched_info->prev_head,
9493 current_sched_info->next_tail);
9494 if (sched_verbose && dump)
9495 fprintf (dump, "// finishing %d-%d\n",
9496 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9497 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9499 return;
9503 /* The following function inserts stop bits in scheduled BB or EBB. */
9505 static void
9506 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9508 rtx_insn *insn;
9509 int need_barrier_p = 0;
9510 int seen_good_insn = 0;
9512 init_insn_group_barriers ();
9514 for (insn = NEXT_INSN (current_sched_info->prev_head);
9515 insn != current_sched_info->next_tail;
9516 insn = NEXT_INSN (insn))
9518 if (BARRIER_P (insn))
9520 rtx_insn *last = prev_active_insn (insn);
9522 if (! last)
9523 continue;
9524 if (JUMP_TABLE_DATA_P (last))
9525 last = prev_active_insn (last);
9526 if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9527 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9529 init_insn_group_barriers ();
9530 seen_good_insn = 0;
9531 need_barrier_p = 0;
9533 else if (NONDEBUG_INSN_P (insn))
9535 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9537 init_insn_group_barriers ();
9538 seen_good_insn = 0;
9539 need_barrier_p = 0;
9541 else if (need_barrier_p || group_barrier_needed (insn)
9542 || (mflag_sched_stop_bits_after_every_cycle
9543 && GET_MODE (insn) == TImode
9544 && seen_good_insn))
9546 if (TARGET_EARLY_STOP_BITS)
9548 rtx_insn *last;
9550 for (last = insn;
9551 last != current_sched_info->prev_head;
9552 last = PREV_INSN (last))
9553 if (INSN_P (last) && GET_MODE (last) == TImode
9554 && stops_p [INSN_UID (last)])
9555 break;
9556 if (last == current_sched_info->prev_head)
9557 last = insn;
9558 last = prev_active_insn (last);
9559 if (last
9560 && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9561 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9562 last);
9563 init_insn_group_barriers ();
9564 for (last = NEXT_INSN (last);
9565 last != insn;
9566 last = NEXT_INSN (last))
9567 if (INSN_P (last))
9569 group_barrier_needed (last);
9570 if (recog_memoized (last) >= 0
9571 && important_for_bundling_p (last))
9572 seen_good_insn = 1;
9575 else
9577 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9578 insn);
9579 init_insn_group_barriers ();
9580 seen_good_insn = 0;
9582 group_barrier_needed (insn);
9583 if (recog_memoized (insn) >= 0
9584 && important_for_bundling_p (insn))
9585 seen_good_insn = 1;
9587 else if (recog_memoized (insn) >= 0
9588 && important_for_bundling_p (insn))
9589 seen_good_insn = 1;
9590 need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
9597 /* If the following function returns TRUE, we will use the DFA
9598 insn scheduler. */
9600 static int
9601 ia64_first_cycle_multipass_dfa_lookahead (void)
9603 return (reload_completed ? 6 : 4);
9606 /* The following function initiates variable `dfa_pre_cycle_insn'. */
9608 static void
9609 ia64_init_dfa_pre_cycle_insn (void)
9611 if (temp_dfa_state == NULL)
9613 dfa_state_size = state_size ();
9614 temp_dfa_state = xmalloc (dfa_state_size);
9615 prev_cycle_state = xmalloc (dfa_state_size);
9617 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9618 SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9619 recog_memoized (dfa_pre_cycle_insn);
9620 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9621 SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9622 recog_memoized (dfa_stop_insn);
9625 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9626 used by the DFA insn scheduler. */
9628 static rtx
9629 ia64_dfa_pre_cycle_insn (void)
9631 return dfa_pre_cycle_insn;
9634 /* The following function returns TRUE if PRODUCER (of type ilog or
9635 ld) produces address for CONSUMER (of type st or stf). */
9638 ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9640 rtx dest, reg, mem;
9642 gcc_assert (producer && consumer);
9643 dest = ia64_single_set (producer);
9644 gcc_assert (dest);
9645 reg = SET_DEST (dest);
9646 gcc_assert (reg);
9647 if (GET_CODE (reg) == SUBREG)
9648 reg = SUBREG_REG (reg);
9649 gcc_assert (GET_CODE (reg) == REG);
9651 dest = ia64_single_set (consumer);
9652 gcc_assert (dest);
9653 mem = SET_DEST (dest);
9654 gcc_assert (mem && GET_CODE (mem) == MEM);
9655 return reg_mentioned_p (reg, mem);
9658 /* The following function returns TRUE if PRODUCER (of type ilog or
9659 ld) produces address for CONSUMER (of type ld or fld). */
9662 ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9664 rtx dest, src, reg, mem;
9666 gcc_assert (producer && consumer);
9667 dest = ia64_single_set (producer);
9668 gcc_assert (dest);
9669 reg = SET_DEST (dest);
9670 gcc_assert (reg);
9671 if (GET_CODE (reg) == SUBREG)
9672 reg = SUBREG_REG (reg);
9673 gcc_assert (GET_CODE (reg) == REG);
9675 src = ia64_single_set (consumer);
9676 gcc_assert (src);
9677 mem = SET_SRC (src);
9678 gcc_assert (mem);
9680 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9681 mem = XVECEXP (mem, 0, 0);
9682 else if (GET_CODE (mem) == IF_THEN_ELSE)
9683 /* ??? Is this bypass necessary for ld.c? */
9685 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9686 mem = XEXP (mem, 1);
9689 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9690 mem = XEXP (mem, 0);
9692 if (GET_CODE (mem) == UNSPEC)
9694 int c = XINT (mem, 1);
9696 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9697 || c == UNSPEC_LDSA);
9698 mem = XVECEXP (mem, 0, 0);
9701 /* Note that LO_SUM is used for GOT loads. */
9702 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9704 return reg_mentioned_p (reg, mem);
9707 /* The following function returns TRUE if INSN produces address for a
9708 load/store insn. We will place such insns into M slot because it
9709 decreases its latency time. */
9712 ia64_produce_address_p (rtx insn)
9714 return insn->call;
9718 /* Emit pseudo-ops for the assembler to describe predicate relations.
9719 At present this assumes that we only consider predicate pairs to
9720 be mutex, and that the assembler can deduce proper values from
9721 straight-line code. */
9723 static void
9724 emit_predicate_relation_info (void)
9726 basic_block bb;
9728 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9730 int r;
9731 rtx_insn *head = BB_HEAD (bb);
9733 /* We only need such notes at code labels. */
9734 if (! LABEL_P (head))
9735 continue;
9736 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9737 head = NEXT_INSN (head);
9739 /* Skip p0, which may be thought to be live due to (reg:DI p0)
9740 grabbing the entire block of predicate registers. */
9741 for (r = PR_REG (2); r < PR_REG (64); r += 2)
9742 if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9744 rtx p = gen_rtx_REG (BImode, r);
9745 rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
9746 if (head == BB_END (bb))
9747 BB_END (bb) = n;
9748 head = n;
9752 /* Look for conditional calls that do not return, and protect predicate
9753 relations around them. Otherwise the assembler will assume the call
9754 returns, and complain about uses of call-clobbered predicates after
9755 the call. */
9756 FOR_EACH_BB_REVERSE_FN (bb, cfun)
9758 rtx_insn *insn = BB_HEAD (bb);
9760 while (1)
9762 if (CALL_P (insn)
9763 && GET_CODE (PATTERN (insn)) == COND_EXEC
9764 && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9766 rtx_insn *b =
9767 emit_insn_before (gen_safe_across_calls_all (), insn);
9768 rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9769 if (BB_HEAD (bb) == insn)
9770 BB_HEAD (bb) = b;
9771 if (BB_END (bb) == insn)
9772 BB_END (bb) = a;
9775 if (insn == BB_END (bb))
9776 break;
9777 insn = NEXT_INSN (insn);
9782 /* Perform machine dependent operations on the rtl chain INSNS. */
9784 static void
9785 ia64_reorg (void)
9787 /* We are freeing block_for_insn in the toplev to keep compatibility
9788 with old MDEP_REORGS that are not CFG based. Recompute it now. */
9789 compute_bb_for_insn ();
9791 /* If optimizing, we'll have split before scheduling. */
9792 if (optimize == 0)
9793 split_all_insns ();
9795 if (optimize && flag_schedule_insns_after_reload
9796 && dbg_cnt (ia64_sched2))
9798 basic_block bb;
9799 timevar_push (TV_SCHED2);
9800 ia64_final_schedule = 1;
9802 /* We can't let modulo-sched prevent us from scheduling any bbs,
9803 since we need the final schedule to produce bundle information. */
9804 FOR_EACH_BB_FN (bb, cfun)
9805 bb->flags &= ~BB_DISABLE_SCHEDULE;
9807 initiate_bundle_states ();
9808 ia64_nop = make_insn_raw (gen_nop ());
9809 SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
9810 recog_memoized (ia64_nop);
9811 clocks_length = get_max_uid () + 1;
9812 stops_p = XCNEWVEC (char, clocks_length);
9814 if (ia64_tune == PROCESSOR_ITANIUM2)
9816 pos_1 = get_cpu_unit_code ("2_1");
9817 pos_2 = get_cpu_unit_code ("2_2");
9818 pos_3 = get_cpu_unit_code ("2_3");
9819 pos_4 = get_cpu_unit_code ("2_4");
9820 pos_5 = get_cpu_unit_code ("2_5");
9821 pos_6 = get_cpu_unit_code ("2_6");
9822 _0mii_ = get_cpu_unit_code ("2b_0mii.");
9823 _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9824 _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9825 _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9826 _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9827 _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9828 _0mib_ = get_cpu_unit_code ("2b_0mib.");
9829 _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9830 _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9831 _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9832 _1mii_ = get_cpu_unit_code ("2b_1mii.");
9833 _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9834 _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9835 _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9836 _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9837 _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9838 _1mib_ = get_cpu_unit_code ("2b_1mib.");
9839 _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9840 _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9841 _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9843 else
9845 pos_1 = get_cpu_unit_code ("1_1");
9846 pos_2 = get_cpu_unit_code ("1_2");
9847 pos_3 = get_cpu_unit_code ("1_3");
9848 pos_4 = get_cpu_unit_code ("1_4");
9849 pos_5 = get_cpu_unit_code ("1_5");
9850 pos_6 = get_cpu_unit_code ("1_6");
9851 _0mii_ = get_cpu_unit_code ("1b_0mii.");
9852 _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9853 _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9854 _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9855 _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9856 _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9857 _0mib_ = get_cpu_unit_code ("1b_0mib.");
9858 _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9859 _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9860 _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9861 _1mii_ = get_cpu_unit_code ("1b_1mii.");
9862 _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9863 _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9864 _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9865 _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9866 _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9867 _1mib_ = get_cpu_unit_code ("1b_1mib.");
9868 _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9869 _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9870 _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9873 if (flag_selective_scheduling2
9874 && !maybe_skip_selective_scheduling ())
9875 run_selective_scheduling ();
9876 else
9877 schedule_ebbs ();
9879 /* Redo alignment computation, as it might gone wrong. */
9880 compute_alignments ();
9882 /* We cannot reuse this one because it has been corrupted by the
9883 evil glat. */
9884 finish_bundle_states ();
9885 free (stops_p);
9886 stops_p = NULL;
9887 emit_insn_group_barriers (dump_file);
9889 ia64_final_schedule = 0;
9890 timevar_pop (TV_SCHED2);
9892 else
9893 emit_all_insn_group_barriers (dump_file);
9895 df_analyze ();
9897 /* A call must not be the last instruction in a function, so that the
9898 return address is still within the function, so that unwinding works
9899 properly. Note that IA-64 differs from dwarf2 on this point. */
9900 if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9902 rtx_insn *insn;
9903 int saw_stop = 0;
9905 insn = get_last_insn ();
9906 if (! INSN_P (insn))
9907 insn = prev_active_insn (insn);
9908 if (insn)
9910 /* Skip over insns that expand to nothing. */
9911 while (NONJUMP_INSN_P (insn)
9912 && get_attr_empty (insn) == EMPTY_YES)
9914 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9915 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9916 saw_stop = 1;
9917 insn = prev_active_insn (insn);
9919 if (CALL_P (insn))
9921 if (! saw_stop)
9922 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9923 emit_insn (gen_break_f ());
9924 emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9929 emit_predicate_relation_info ();
9931 if (flag_var_tracking)
9933 timevar_push (TV_VAR_TRACKING);
9934 variable_tracking_main ();
9935 timevar_pop (TV_VAR_TRACKING);
9937 df_finish_pass (false);
9940 /* Return true if REGNO is used by the epilogue. */
9943 ia64_epilogue_uses (int regno)
9945 switch (regno)
9947 case R_GR (1):
9948 /* With a call to a function in another module, we will write a new
9949 value to "gp". After returning from such a call, we need to make
9950 sure the function restores the original gp-value, even if the
9951 function itself does not use the gp anymore. */
9952 return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9954 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9955 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9956 /* For functions defined with the syscall_linkage attribute, all
9957 input registers are marked as live at all function exits. This
9958 prevents the register allocator from using the input registers,
9959 which in turn makes it possible to restart a system call after
9960 an interrupt without having to save/restore the input registers.
9961 This also prevents kernel data from leaking to application code. */
9962 return lookup_attribute ("syscall_linkage",
9963 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9965 case R_BR (0):
9966 /* Conditional return patterns can't represent the use of `b0' as
9967 the return address, so we force the value live this way. */
9968 return 1;
9970 case AR_PFS_REGNUM:
9971 /* Likewise for ar.pfs, which is used by br.ret. */
9972 return 1;
9974 default:
9975 return 0;
9979 /* Return true if REGNO is used by the frame unwinder. */
9982 ia64_eh_uses (int regno)
9984 unsigned int r;
9986 if (! reload_completed)
9987 return 0;
9989 if (regno == 0)
9990 return 0;
9992 for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9993 if (regno == current_frame_info.r[r]
9994 || regno == emitted_frame_related_regs[r])
9995 return 1;
9997 return 0;
10000 /* Return true if this goes in small data/bss. */
10002 /* ??? We could also support own long data here. Generating movl/add/ld8
10003 instead of addl,ld8/ld8. This makes the code bigger, but should make the
10004 code faster because there is one less load. This also includes incomplete
10005 types which can't go in sdata/sbss. */
10007 static bool
10008 ia64_in_small_data_p (const_tree exp)
10010 if (TARGET_NO_SDATA)
10011 return false;
10013 /* We want to merge strings, so we never consider them small data. */
10014 if (TREE_CODE (exp) == STRING_CST)
10015 return false;
10017 /* Functions are never small data. */
10018 if (TREE_CODE (exp) == FUNCTION_DECL)
10019 return false;
10021 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
10023 const char *section = DECL_SECTION_NAME (exp);
10025 if (strcmp (section, ".sdata") == 0
10026 || startswith (section, ".sdata.")
10027 || startswith (section, ".gnu.linkonce.s.")
10028 || strcmp (section, ".sbss") == 0
10029 || startswith (section, ".sbss.")
10030 || startswith (section, ".gnu.linkonce.sb."))
10031 return true;
10033 else
10035 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
10037 /* If this is an incomplete type with size 0, then we can't put it
10038 in sdata because it might be too big when completed. */
10039 if (size > 0 && size <= ia64_section_threshold)
10040 return true;
10043 return false;
10046 /* Output assembly directives for prologue regions. */
10048 /* The current basic block number. */
10050 static bool last_block;
10052 /* True if we need a copy_state command at the start of the next block. */
10054 static bool need_copy_state;
10056 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
10057 # define MAX_ARTIFICIAL_LABEL_BYTES 30
10058 #endif
10060 /* The function emits unwind directives for the start of an epilogue. */
10062 static void
10063 process_epilogue (FILE *out_file, rtx insn ATTRIBUTE_UNUSED,
10064 bool unwind, bool frame ATTRIBUTE_UNUSED)
10066 /* If this isn't the last block of the function, then we need to label the
10067 current state, and copy it back in at the start of the next block. */
10069 if (!last_block)
10071 if (unwind)
10072 fprintf (out_file, "\t.label_state %d\n",
10073 ++cfun->machine->state_num);
10074 need_copy_state = true;
10077 if (unwind)
10078 fprintf (out_file, "\t.restore sp\n");
10081 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */
10083 static void
10084 process_cfa_adjust_cfa (FILE *out_file, rtx pat, rtx insn,
10085 bool unwind, bool frame)
10087 rtx dest = SET_DEST (pat);
10088 rtx src = SET_SRC (pat);
10090 if (dest == stack_pointer_rtx)
10092 if (GET_CODE (src) == PLUS)
10094 rtx op0 = XEXP (src, 0);
10095 rtx op1 = XEXP (src, 1);
10097 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
10099 if (INTVAL (op1) < 0)
10101 gcc_assert (!frame_pointer_needed);
10102 if (unwind)
10103 fprintf (out_file,
10104 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
10105 -INTVAL (op1));
10107 else
10108 process_epilogue (out_file, insn, unwind, frame);
10110 else
10112 gcc_assert (src == hard_frame_pointer_rtx);
10113 process_epilogue (out_file, insn, unwind, frame);
10116 else if (dest == hard_frame_pointer_rtx)
10118 gcc_assert (src == stack_pointer_rtx);
10119 gcc_assert (frame_pointer_needed);
10121 if (unwind)
10122 fprintf (out_file, "\t.vframe r%d\n",
10123 ia64_debugger_regno (REGNO (dest)));
10125 else
10126 gcc_unreachable ();
10129 /* This function processes a SET pattern for REG_CFA_REGISTER. */
10131 static void
10132 process_cfa_register (FILE *out_file, rtx pat, bool unwind)
10134 rtx dest = SET_DEST (pat);
10135 rtx src = SET_SRC (pat);
10136 int dest_regno = REGNO (dest);
10137 int src_regno;
10139 if (src == pc_rtx)
10141 /* Saving return address pointer. */
10142 if (unwind)
10143 fprintf (out_file, "\t.save rp, r%d\n",
10144 ia64_debugger_regno (dest_regno));
10145 return;
10148 src_regno = REGNO (src);
10150 switch (src_regno)
10152 case PR_REG (0):
10153 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10154 if (unwind)
10155 fprintf (out_file, "\t.save pr, r%d\n",
10156 ia64_debugger_regno (dest_regno));
10157 break;
10159 case AR_UNAT_REGNUM:
10160 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10161 if (unwind)
10162 fprintf (out_file, "\t.save ar.unat, r%d\n",
10163 ia64_debugger_regno (dest_regno));
10164 break;
10166 case AR_LC_REGNUM:
10167 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10168 if (unwind)
10169 fprintf (out_file, "\t.save ar.lc, r%d\n",
10170 ia64_debugger_regno (dest_regno));
10171 break;
10173 default:
10174 /* Everything else should indicate being stored to memory. */
10175 gcc_unreachable ();
10179 /* This function processes a SET pattern for REG_CFA_OFFSET. */
10181 static void
10182 process_cfa_offset (FILE *out_file, rtx pat, bool unwind)
10184 rtx dest = SET_DEST (pat);
10185 rtx src = SET_SRC (pat);
10186 int src_regno = REGNO (src);
10187 const char *saveop;
10188 HOST_WIDE_INT off;
10189 rtx base;
10191 gcc_assert (MEM_P (dest));
10192 if (GET_CODE (XEXP (dest, 0)) == REG)
10194 base = XEXP (dest, 0);
10195 off = 0;
10197 else
10199 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10200 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10201 base = XEXP (XEXP (dest, 0), 0);
10202 off = INTVAL (XEXP (XEXP (dest, 0), 1));
10205 if (base == hard_frame_pointer_rtx)
10207 saveop = ".savepsp";
10208 off = - off;
10210 else
10212 gcc_assert (base == stack_pointer_rtx);
10213 saveop = ".savesp";
10216 src_regno = REGNO (src);
10217 switch (src_regno)
10219 case BR_REG (0):
10220 gcc_assert (!current_frame_info.r[reg_save_b0]);
10221 if (unwind)
10222 fprintf (out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10223 saveop, off);
10224 break;
10226 case PR_REG (0):
10227 gcc_assert (!current_frame_info.r[reg_save_pr]);
10228 if (unwind)
10229 fprintf (out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10230 saveop, off);
10231 break;
10233 case AR_LC_REGNUM:
10234 gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10235 if (unwind)
10236 fprintf (out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10237 saveop, off);
10238 break;
10240 case AR_PFS_REGNUM:
10241 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10242 if (unwind)
10243 fprintf (out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10244 saveop, off);
10245 break;
10247 case AR_UNAT_REGNUM:
10248 gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10249 if (unwind)
10250 fprintf (out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10251 saveop, off);
10252 break;
10254 case GR_REG (4):
10255 case GR_REG (5):
10256 case GR_REG (6):
10257 case GR_REG (7):
10258 if (unwind)
10259 fprintf (out_file, "\t.save.g 0x%x\n",
10260 1 << (src_regno - GR_REG (4)));
10261 break;
10263 case BR_REG (1):
10264 case BR_REG (2):
10265 case BR_REG (3):
10266 case BR_REG (4):
10267 case BR_REG (5):
10268 if (unwind)
10269 fprintf (out_file, "\t.save.b 0x%x\n",
10270 1 << (src_regno - BR_REG (1)));
10271 break;
10273 case FR_REG (2):
10274 case FR_REG (3):
10275 case FR_REG (4):
10276 case FR_REG (5):
10277 if (unwind)
10278 fprintf (out_file, "\t.save.f 0x%x\n",
10279 1 << (src_regno - FR_REG (2)));
10280 break;
10282 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10283 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10284 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10285 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10286 if (unwind)
10287 fprintf (out_file, "\t.save.gf 0x0, 0x%x\n",
10288 1 << (src_regno - FR_REG (12)));
10289 break;
10291 default:
10292 /* ??? For some reason we mark other general registers, even those
10293 we can't represent in the unwind info. Ignore them. */
10294 break;
10298 /* This function looks at a single insn and emits any directives
10299 required to unwind this insn. */
10301 static void
10302 ia64_asm_unwind_emit (FILE *out_file, rtx_insn *insn)
10304 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10305 bool frame = dwarf2out_do_frame ();
10306 rtx note, pat;
10307 bool handled_one;
10309 if (!unwind && !frame)
10310 return;
10312 if (NOTE_INSN_BASIC_BLOCK_P (insn))
10314 last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10315 == EXIT_BLOCK_PTR_FOR_FN (cfun);
10317 /* Restore unwind state from immediately before the epilogue. */
10318 if (need_copy_state)
10320 if (unwind)
10322 fprintf (out_file, "\t.body\n");
10323 fprintf (out_file, "\t.copy_state %d\n",
10324 cfun->machine->state_num);
10326 need_copy_state = false;
10330 if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10331 return;
10333 /* Look for the ALLOC insn. */
10334 if (INSN_CODE (insn) == CODE_FOR_alloc)
10336 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10337 int dest_regno = REGNO (dest);
10339 /* If this is the final destination for ar.pfs, then this must
10340 be the alloc in the prologue. */
10341 if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10343 if (unwind)
10344 fprintf (out_file, "\t.save ar.pfs, r%d\n",
10345 ia64_debugger_regno (dest_regno));
10347 else
10349 /* This must be an alloc before a sibcall. We must drop the
10350 old frame info. The easiest way to drop the old frame
10351 info is to ensure we had a ".restore sp" directive
10352 followed by a new prologue. If the procedure doesn't
10353 have a memory-stack frame, we'll issue a dummy ".restore
10354 sp" now. */
10355 if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10356 /* if haven't done process_epilogue() yet, do it now */
10357 process_epilogue (out_file, insn, unwind, frame);
10358 if (unwind)
10359 fprintf (out_file, "\t.prologue\n");
10361 return;
10364 handled_one = false;
10365 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10366 switch (REG_NOTE_KIND (note))
10368 case REG_CFA_ADJUST_CFA:
10369 pat = XEXP (note, 0);
10370 if (pat == NULL)
10371 pat = PATTERN (insn);
10372 process_cfa_adjust_cfa (out_file, pat, insn, unwind, frame);
10373 handled_one = true;
10374 break;
10376 case REG_CFA_OFFSET:
10377 pat = XEXP (note, 0);
10378 if (pat == NULL)
10379 pat = PATTERN (insn);
10380 process_cfa_offset (out_file, pat, unwind);
10381 handled_one = true;
10382 break;
10384 case REG_CFA_REGISTER:
10385 pat = XEXP (note, 0);
10386 if (pat == NULL)
10387 pat = PATTERN (insn);
10388 process_cfa_register (out_file, pat, unwind);
10389 handled_one = true;
10390 break;
10392 case REG_FRAME_RELATED_EXPR:
10393 case REG_CFA_DEF_CFA:
10394 case REG_CFA_EXPRESSION:
10395 case REG_CFA_RESTORE:
10396 case REG_CFA_SET_VDRAP:
10397 /* Not used in the ia64 port. */
10398 gcc_unreachable ();
10400 default:
10401 /* Not a frame-related note. */
10402 break;
10405 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10406 explicit action to take. No guessing required. */
10407 gcc_assert (handled_one);
10410 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
10412 static void
10413 ia64_asm_emit_except_personality (rtx personality)
10415 fputs ("\t.personality\t", asm_out_file);
10416 output_addr_const (asm_out_file, personality);
10417 fputc ('\n', asm_out_file);
10420 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
10422 static void
10423 ia64_asm_init_sections (void)
10425 exception_section = get_unnamed_section (0, output_section_asm_op,
10426 "\t.handlerdata");
10429 /* Implement TARGET_DEBUG_UNWIND_INFO. */
10431 static enum unwind_info_type
10432 ia64_debug_unwind_info (void)
10434 return UI_TARGET;
10437 enum ia64_builtins
10439 IA64_BUILTIN_BSP,
10440 IA64_BUILTIN_COPYSIGNQ,
10441 IA64_BUILTIN_FABSQ,
10442 IA64_BUILTIN_FLUSHRS,
10443 IA64_BUILTIN_INFQ,
10444 IA64_BUILTIN_HUGE_VALQ,
10445 IA64_BUILTIN_NANQ,
10446 IA64_BUILTIN_NANSQ,
10447 IA64_BUILTIN_max
10450 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10452 void
10453 ia64_init_builtins (void)
10455 tree fpreg_type;
10456 tree float80_type;
10457 tree decl;
10459 /* The __fpreg type. */
10460 fpreg_type = make_node (REAL_TYPE);
10461 TYPE_PRECISION (fpreg_type) = 82;
10462 layout_type (fpreg_type);
10463 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10465 /* The __float80 type. */
10466 if (float64x_type_node != NULL_TREE
10467 && TYPE_MODE (float64x_type_node) == XFmode)
10468 float80_type = float64x_type_node;
10469 else
10471 float80_type = make_node (REAL_TYPE);
10472 TYPE_PRECISION (float80_type) = 80;
10473 layout_type (float80_type);
10475 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10477 /* The __float128 type. */
10478 if (!TARGET_HPUX)
10480 tree ftype;
10481 tree const_string_type
10482 = build_pointer_type (build_qualified_type
10483 (char_type_node, TYPE_QUAL_CONST));
10485 if (float128t_type_node == NULL_TREE)
10487 float128t_type_node = make_node (REAL_TYPE);
10488 TYPE_PRECISION (float128t_type_node)
10489 = TYPE_PRECISION (float128_type_node);
10490 layout_type (float128t_type_node);
10491 SET_TYPE_MODE (float128t_type_node, TYPE_MODE (float128_type_node));
10493 (*lang_hooks.types.register_builtin_type) (float128t_type_node,
10494 "__float128");
10496 /* TFmode support builtins. */
10497 ftype = build_function_type_list (float128t_type_node, NULL_TREE);
10498 decl = add_builtin_function ("__builtin_infq", ftype,
10499 IA64_BUILTIN_INFQ, BUILT_IN_MD,
10500 NULL, NULL_TREE);
10501 ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10503 decl = add_builtin_function ("__builtin_huge_valq", ftype,
10504 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10505 NULL, NULL_TREE);
10506 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10508 ftype = build_function_type_list (float128t_type_node,
10509 const_string_type,
10510 NULL_TREE);
10511 decl = add_builtin_function ("__builtin_nanq", ftype,
10512 IA64_BUILTIN_NANQ, BUILT_IN_MD,
10513 "nanq", NULL_TREE);
10514 TREE_READONLY (decl) = 1;
10515 ia64_builtins[IA64_BUILTIN_NANQ] = decl;
10517 decl = add_builtin_function ("__builtin_nansq", ftype,
10518 IA64_BUILTIN_NANSQ, BUILT_IN_MD,
10519 "nansq", NULL_TREE);
10520 TREE_READONLY (decl) = 1;
10521 ia64_builtins[IA64_BUILTIN_NANSQ] = decl;
10523 ftype = build_function_type_list (float128t_type_node,
10524 float128t_type_node,
10525 NULL_TREE);
10526 decl = add_builtin_function ("__builtin_fabsq", ftype,
10527 IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10528 "__fabstf2", NULL_TREE);
10529 TREE_READONLY (decl) = 1;
10530 ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10532 ftype = build_function_type_list (float128t_type_node,
10533 float128t_type_node,
10534 float128t_type_node,
10535 NULL_TREE);
10536 decl = add_builtin_function ("__builtin_copysignq", ftype,
10537 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10538 "__copysigntf3", NULL_TREE);
10539 TREE_READONLY (decl) = 1;
10540 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10542 else
10543 /* Under HPUX, this is a synonym for "long double". */
10544 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10545 "__float128");
10547 /* Fwrite on VMS is non-standard. */
10548 #if TARGET_ABI_OPEN_VMS
10549 vms_patch_builtins ();
10550 #endif
10552 #define def_builtin(name, type, code) \
10553 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \
10554 NULL, NULL_TREE)
10556 decl = def_builtin ("__builtin_ia64_bsp",
10557 build_function_type_list (ptr_type_node, NULL_TREE),
10558 IA64_BUILTIN_BSP);
10559 ia64_builtins[IA64_BUILTIN_BSP] = decl;
10561 decl = def_builtin ("__builtin_ia64_flushrs",
10562 build_function_type_list (void_type_node, NULL_TREE),
10563 IA64_BUILTIN_FLUSHRS);
10564 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10566 #undef def_builtin
10568 if (TARGET_HPUX)
10570 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10571 set_user_assembler_name (decl, "_Isfinite");
10572 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10573 set_user_assembler_name (decl, "_Isfinitef");
10574 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10575 set_user_assembler_name (decl, "_Isfinitef128");
10579 static tree
10580 ia64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10581 tree *args, bool ignore ATTRIBUTE_UNUSED)
10583 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
10585 enum ia64_builtins fn_code
10586 = (enum ia64_builtins) DECL_MD_FUNCTION_CODE (fndecl);
10587 switch (fn_code)
10589 case IA64_BUILTIN_NANQ:
10590 case IA64_BUILTIN_NANSQ:
10592 tree type = TREE_TYPE (TREE_TYPE (fndecl));
10593 const char *str = c_getstr (*args);
10594 int quiet = fn_code == IA64_BUILTIN_NANQ;
10595 REAL_VALUE_TYPE real;
10597 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
10598 return build_real (type, real);
10599 return NULL_TREE;
10602 default:
10603 break;
10607 #ifdef SUBTARGET_FOLD_BUILTIN
10608 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
10609 #endif
10611 return NULL_TREE;
10615 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10616 machine_mode mode ATTRIBUTE_UNUSED,
10617 int ignore ATTRIBUTE_UNUSED)
10619 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10620 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
10622 switch (fcode)
10624 case IA64_BUILTIN_BSP:
10625 if (! target || ! register_operand (target, DImode))
10626 target = gen_reg_rtx (DImode);
10627 emit_insn (gen_bsp_value (target));
10628 #ifdef POINTERS_EXTEND_UNSIGNED
10629 target = convert_memory_address (ptr_mode, target);
10630 #endif
10631 return target;
10633 case IA64_BUILTIN_FLUSHRS:
10634 emit_insn (gen_flushrs ());
10635 return const0_rtx;
10637 case IA64_BUILTIN_INFQ:
10638 case IA64_BUILTIN_HUGE_VALQ:
10640 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10641 REAL_VALUE_TYPE inf;
10642 rtx tmp;
10644 real_inf (&inf);
10645 tmp = const_double_from_real_value (inf, target_mode);
10647 tmp = validize_mem (force_const_mem (target_mode, tmp));
10649 if (target == 0)
10650 target = gen_reg_rtx (target_mode);
10652 emit_move_insn (target, tmp);
10653 return target;
10656 case IA64_BUILTIN_NANQ:
10657 case IA64_BUILTIN_NANSQ:
10658 case IA64_BUILTIN_FABSQ:
10659 case IA64_BUILTIN_COPYSIGNQ:
10660 return expand_call (exp, target, ignore);
10662 default:
10663 gcc_unreachable ();
10666 return NULL_RTX;
10669 /* Return the ia64 builtin for CODE. */
10671 static tree
10672 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10674 if (code >= IA64_BUILTIN_max)
10675 return error_mark_node;
10677 return ia64_builtins[code];
10680 /* Implement TARGET_FUNCTION_ARG_PADDING.
10682 For the HP-UX IA64 aggregate parameters are passed stored in the
10683 most significant bits of the stack slot. */
10685 static pad_direction
10686 ia64_function_arg_padding (machine_mode mode, const_tree type)
10688 /* Exception to normal case for structures/unions/etc. */
10689 if (TARGET_HPUX
10690 && type
10691 && AGGREGATE_TYPE_P (type)
10692 && int_size_in_bytes (type) < UNITS_PER_WORD)
10693 return PAD_UPWARD;
10695 /* Fall back to the default. */
10696 return default_function_arg_padding (mode, type);
10699 /* Emit text to declare externally defined variables and functions, because
10700 the Intel assembler does not support undefined externals. */
10702 void
10703 ia64_asm_output_external (FILE *file, tree decl, const char *name)
10705 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10706 set in order to avoid putting out names that are never really
10707 used. */
10708 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10710 /* maybe_assemble_visibility will return 1 if the assembler
10711 visibility directive is output. */
10712 int need_visibility = ((*targetm.binds_local_p) (decl)
10713 && maybe_assemble_visibility (decl));
10715 /* GNU as does not need anything here, but the HP linker does
10716 need something for external functions. */
10717 if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10718 && TREE_CODE (decl) == FUNCTION_DECL)
10719 (*targetm.asm_out.globalize_decl_name) (file, decl);
10720 else if (need_visibility && !TARGET_GNU_AS)
10721 (*targetm.asm_out.globalize_label) (file, name);
10725 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
10726 modes of word_mode and larger. Rename the TFmode libfuncs using the
10727 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10728 backward compatibility. */
10730 static void
10731 ia64_init_libfuncs (void)
10733 set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10734 set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10735 set_optab_libfunc (smod_optab, SImode, "__modsi3");
10736 set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10738 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10739 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10740 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10741 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10742 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10744 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10745 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10746 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10747 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10748 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10749 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10751 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10752 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10753 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10754 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10755 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10757 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10758 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10759 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10760 /* HP-UX 11.23 libc does not have a function for unsigned
10761 SImode-to-TFmode conversion. */
10762 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10765 /* Rename all the TFmode libfuncs using the HPUX conventions. */
10767 static void
10768 ia64_hpux_init_libfuncs (void)
10770 ia64_init_libfuncs ();
10772 /* The HP SI millicode division and mod functions expect DI arguments.
10773 By turning them off completely we avoid using both libgcc and the
10774 non-standard millicode routines and use the HP DI millicode routines
10775 instead. */
10777 set_optab_libfunc (sdiv_optab, SImode, 0);
10778 set_optab_libfunc (udiv_optab, SImode, 0);
10779 set_optab_libfunc (smod_optab, SImode, 0);
10780 set_optab_libfunc (umod_optab, SImode, 0);
10782 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10783 set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10784 set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10785 set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10787 /* HP-UX libc has TF min/max/abs routines in it. */
10788 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10789 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10790 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10792 /* ia64_expand_compare uses this. */
10793 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10795 /* These should never be used. */
10796 set_optab_libfunc (eq_optab, TFmode, 0);
10797 set_optab_libfunc (ne_optab, TFmode, 0);
10798 set_optab_libfunc (gt_optab, TFmode, 0);
10799 set_optab_libfunc (ge_optab, TFmode, 0);
10800 set_optab_libfunc (lt_optab, TFmode, 0);
10801 set_optab_libfunc (le_optab, TFmode, 0);
10804 /* Rename the division and modulus functions in VMS. */
10806 static void
10807 ia64_vms_init_libfuncs (void)
10809 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10810 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10811 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10812 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10813 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10814 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10815 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10816 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10817 #ifdef MEM_LIBFUNCS_INIT
10818 MEM_LIBFUNCS_INIT;
10819 #endif
10822 /* Rename the TFmode libfuncs available from soft-fp in glibc using
10823 the HPUX conventions. */
10825 static void
10826 ia64_sysv4_init_libfuncs (void)
10828 ia64_init_libfuncs ();
10830 /* These functions are not part of the HPUX TFmode interface. We
10831 use them instead of _U_Qfcmp, which doesn't work the way we
10832 expect. */
10833 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10834 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10835 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10836 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10837 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10838 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10840 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10841 glibc doesn't have them. */
10844 /* Use soft-fp. */
10846 static void
10847 ia64_soft_fp_init_libfuncs (void)
10851 static bool
10852 ia64_vms_valid_pointer_mode (scalar_int_mode mode)
10854 return (mode == SImode || mode == DImode);
10857 /* For HPUX, it is illegal to have relocations in shared segments. */
10859 static int
10860 ia64_hpux_reloc_rw_mask (void)
10862 return 3;
10865 /* For others, relax this so that relocations to local data goes in
10866 read-only segments, but we still cannot allow global relocations
10867 in read-only segments. */
10869 static int
10870 ia64_reloc_rw_mask (void)
10872 return flag_pic ? 3 : 2;
10875 /* Return the section to use for X. The only special thing we do here
10876 is to honor small data. */
10878 static section *
10879 ia64_select_rtx_section (machine_mode mode, rtx x,
10880 unsigned HOST_WIDE_INT align)
10882 if (GET_MODE_SIZE (mode) > 0
10883 && GET_MODE_SIZE (mode) <= ia64_section_threshold
10884 && !TARGET_NO_SDATA)
10885 return sdata_section;
10886 else
10887 return default_elf_select_rtx_section (mode, x, align);
10890 static unsigned int
10891 ia64_section_type_flags (tree decl, const char *name, int reloc)
10893 unsigned int flags = 0;
10895 if (strcmp (name, ".sdata") == 0
10896 || startswith (name, ".sdata.")
10897 || startswith (name, ".gnu.linkonce.s.")
10898 || startswith (name, ".sdata2.")
10899 || startswith (name, ".gnu.linkonce.s2.")
10900 || strcmp (name, ".sbss") == 0
10901 || startswith (name, ".sbss.")
10902 || startswith (name, ".gnu.linkonce.sb."))
10903 flags = SECTION_SMALL;
10905 flags |= default_section_type_flags (decl, name, reloc);
10906 return flags;
10909 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10910 structure type and that the address of that type should be passed
10911 in out0, rather than in r8. */
10913 static bool
10914 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10916 tree ret_type = TREE_TYPE (fntype);
10918 /* The Itanium C++ ABI requires that out0, rather than r8, be used
10919 as the structure return address parameter, if the return value
10920 type has a non-trivial copy constructor or destructor. It is not
10921 clear if this same convention should be used for other
10922 programming languages. Until G++ 3.4, we incorrectly used r8 for
10923 these return values. */
10924 return (abi_version_at_least (2)
10925 && ret_type
10926 && TYPE_MODE (ret_type) == BLKmode
10927 && TREE_ADDRESSABLE (ret_type)
10928 && lang_GNU_CXX ());
10931 /* Output the assembler code for a thunk function. THUNK_DECL is the
10932 declaration for the thunk function itself, FUNCTION is the decl for
10933 the target function. DELTA is an immediate constant offset to be
10934 added to THIS. If VCALL_OFFSET is nonzero, the word at
10935 *(*this + vcall_offset) should be added to THIS. */
10937 static void
10938 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10939 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10940 tree function)
10942 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
10943 rtx this_rtx, funexp;
10944 rtx_insn *insn;
10945 unsigned int this_parmno;
10946 unsigned int this_regno;
10947 rtx delta_rtx;
10949 reload_completed = 1;
10950 epilogue_completed = 1;
10952 /* Set things up as ia64_expand_prologue might. */
10953 last_scratch_gr_reg = 15;
10955 memset (&current_frame_info, 0, sizeof (current_frame_info));
10956 current_frame_info.spill_cfa_off = -16;
10957 current_frame_info.n_input_regs = 1;
10958 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10960 /* Mark the end of the (empty) prologue. */
10961 emit_note (NOTE_INSN_PROLOGUE_END);
10963 /* Figure out whether "this" will be the first parameter (the
10964 typical case) or the second parameter (as happens when the
10965 virtual function returns certain class objects). */
10966 this_parmno
10967 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10968 ? 1 : 0);
10969 this_regno = IN_REG (this_parmno);
10970 if (!TARGET_REG_NAMES)
10971 reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10973 this_rtx = gen_rtx_REG (Pmode, this_regno);
10975 /* Apply the constant offset, if required. */
10976 delta_rtx = GEN_INT (delta);
10977 if (TARGET_ILP32)
10979 rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10980 REG_POINTER (tmp) = 1;
10981 if (delta && satisfies_constraint_I (delta_rtx))
10983 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10984 delta = 0;
10986 else
10987 emit_insn (gen_ptr_extend (this_rtx, tmp));
10989 if (delta)
10991 if (!satisfies_constraint_I (delta_rtx))
10993 rtx tmp = gen_rtx_REG (Pmode, 2);
10994 emit_move_insn (tmp, delta_rtx);
10995 delta_rtx = tmp;
10997 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
11000 /* Apply the offset from the vtable, if required. */
11001 if (vcall_offset)
11003 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
11004 rtx tmp = gen_rtx_REG (Pmode, 2);
11006 if (TARGET_ILP32)
11008 rtx t = gen_rtx_REG (ptr_mode, 2);
11009 REG_POINTER (t) = 1;
11010 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
11011 if (satisfies_constraint_I (vcall_offset_rtx))
11013 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
11014 vcall_offset = 0;
11016 else
11017 emit_insn (gen_ptr_extend (tmp, t));
11019 else
11020 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
11022 if (vcall_offset)
11024 if (!satisfies_constraint_J (vcall_offset_rtx))
11026 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
11027 emit_move_insn (tmp2, vcall_offset_rtx);
11028 vcall_offset_rtx = tmp2;
11030 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
11033 if (TARGET_ILP32)
11034 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
11035 else
11036 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
11038 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
11041 /* Generate a tail call to the target function. */
11042 if (! TREE_USED (function))
11044 assemble_external (function);
11045 TREE_USED (function) = 1;
11047 funexp = XEXP (DECL_RTL (function), 0);
11048 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11049 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
11050 insn = get_last_insn ();
11051 SIBLING_CALL_P (insn) = 1;
11053 /* Code generation for calls relies on splitting. */
11054 reload_completed = 1;
11055 epilogue_completed = 1;
11056 try_split (PATTERN (insn), insn, 0);
11058 emit_barrier ();
11060 /* Run just enough of rest_of_compilation to get the insns emitted.
11061 There's not really enough bulk here to make other passes such as
11062 instruction scheduling worth while. */
11064 emit_all_insn_group_barriers (NULL);
11065 insn = get_insns ();
11066 shorten_branches (insn);
11067 assemble_start_function (thunk, fnname);
11068 final_start_function (insn, file, 1);
11069 final (insn, file, 1);
11070 final_end_function ();
11071 assemble_end_function (thunk, fnname);
11073 reload_completed = 0;
11074 epilogue_completed = 0;
11077 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
11079 static rtx
11080 ia64_struct_value_rtx (tree fntype,
11081 int incoming ATTRIBUTE_UNUSED)
11083 if (TARGET_ABI_OPEN_VMS ||
11084 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
11085 return NULL_RTX;
11086 return gen_rtx_REG (Pmode, GR_REG (8));
11089 static bool
11090 ia64_scalar_mode_supported_p (scalar_mode mode)
11092 switch (mode)
11094 case E_QImode:
11095 case E_HImode:
11096 case E_SImode:
11097 case E_DImode:
11098 case E_TImode:
11099 return true;
11101 case E_SFmode:
11102 case E_DFmode:
11103 case E_XFmode:
11104 case E_RFmode:
11105 return true;
11107 case E_TFmode:
11108 return true;
11110 default:
11111 return false;
11115 static bool
11116 ia64_vector_mode_supported_p (machine_mode mode)
11118 switch (mode)
11120 case E_V8QImode:
11121 case E_V4HImode:
11122 case E_V2SImode:
11123 return true;
11125 case E_V2SFmode:
11126 return true;
11128 default:
11129 return false;
11133 /* Implement the FUNCTION_PROFILER macro. */
11135 void
11136 ia64_output_function_profiler (FILE *file, int labelno)
11138 bool indirect_call;
11140 /* If the function needs a static chain and the static chain
11141 register is r15, we use an indirect call so as to bypass
11142 the PLT stub in case the executable is dynamically linked,
11143 because the stub clobbers r15 as per 5.3.6 of the psABI.
11144 We don't need to do that in non canonical PIC mode. */
11146 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
11148 gcc_assert (STATIC_CHAIN_REGNUM == 15);
11149 indirect_call = true;
11151 else
11152 indirect_call = false;
11154 if (TARGET_GNU_AS)
11155 fputs ("\t.prologue 4, r40\n", file);
11156 else
11157 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11158 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
11160 if (NO_PROFILE_COUNTERS)
11161 fputs ("\tmov out3 = r0\n", file);
11162 else
11164 char buf[20];
11165 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11167 if (TARGET_AUTO_PIC)
11168 fputs ("\tmovl out3 = @gprel(", file);
11169 else
11170 fputs ("\taddl out3 = @ltoff(", file);
11171 assemble_name (file, buf);
11172 if (TARGET_AUTO_PIC)
11173 fputs (")\n", file);
11174 else
11175 fputs ("), r1\n", file);
11178 if (indirect_call)
11179 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11180 fputs ("\t;;\n", file);
11182 fputs ("\t.save rp, r42\n", file);
11183 fputs ("\tmov out2 = b0\n", file);
11184 if (indirect_call)
11185 fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
11186 fputs ("\t.body\n", file);
11187 fputs ("\tmov out1 = r1\n", file);
11188 if (indirect_call)
11190 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11191 fputs ("\tmov b6 = r16\n", file);
11192 fputs ("\tld8 r1 = [r14]\n", file);
11193 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11195 else
11196 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
11199 static GTY(()) rtx mcount_func_rtx;
11200 static rtx
11201 gen_mcount_func_rtx (void)
11203 if (!mcount_func_rtx)
11204 mcount_func_rtx = init_one_libfunc ("_mcount");
11205 return mcount_func_rtx;
11208 void
11209 ia64_profile_hook (int labelno)
11211 rtx label, ip;
11213 if (NO_PROFILE_COUNTERS)
11214 label = const0_rtx;
11215 else
11217 char buf[30];
11218 const char *label_name;
11219 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11220 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
11221 label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11222 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11224 ip = gen_reg_rtx (Pmode);
11225 emit_insn (gen_ip_value (ip));
11226 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11227 VOIDmode,
11228 gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11229 ip, Pmode,
11230 label, Pmode);
11233 /* Return the mangling of TYPE if it is an extended fundamental type. */
11235 static const char *
11236 ia64_mangle_type (const_tree type)
11238 type = TYPE_MAIN_VARIANT (type);
11240 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11241 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11242 return NULL;
11244 if (type == float128_type_node || type == float64x_type_node)
11245 return NULL;
11247 /* On HP-UX, "long double" is mangled as "e" so __float128 is
11248 mangled as "e". */
11249 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11250 return "g";
11251 /* On HP-UX, "e" is not available as a mangling of __float80 so use
11252 an extended mangling. Elsewhere, "e" is available since long
11253 double is 80 bits. */
11254 if (TYPE_MODE (type) == XFmode)
11255 return TARGET_HPUX ? "u9__float80" : "e";
11256 if (TYPE_MODE (type) == RFmode)
11257 return "u7__fpreg";
11258 return NULL;
11261 /* Return the diagnostic message string if conversion from FROMTYPE to
11262 TOTYPE is not allowed, NULL otherwise. */
11263 static const char *
11264 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
11266 /* Reject nontrivial conversion to or from __fpreg. */
11267 if (TYPE_MODE (fromtype) == RFmode
11268 && TYPE_MODE (totype) != RFmode
11269 && TYPE_MODE (totype) != VOIDmode)
11270 return N_("invalid conversion from %<__fpreg%>");
11271 if (TYPE_MODE (totype) == RFmode
11272 && TYPE_MODE (fromtype) != RFmode)
11273 return N_("invalid conversion to %<__fpreg%>");
11274 return NULL;
11277 /* Return the diagnostic message string if the unary operation OP is
11278 not permitted on TYPE, NULL otherwise. */
11279 static const char *
11280 ia64_invalid_unary_op (int op, const_tree type)
11282 /* Reject operations on __fpreg other than unary + or &. */
11283 if (TYPE_MODE (type) == RFmode
11284 && op != CONVERT_EXPR
11285 && op != ADDR_EXPR)
11286 return N_("invalid operation on %<__fpreg%>");
11287 return NULL;
11290 /* Return the diagnostic message string if the binary operation OP is
11291 not permitted on TYPE1 and TYPE2, NULL otherwise. */
11292 static const char *
11293 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
11295 /* Reject operations on __fpreg. */
11296 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11297 return N_("invalid operation on %<__fpreg%>");
11298 return NULL;
11301 /* HP-UX version_id attribute.
11302 For object foo, if the version_id is set to 1234 put out an alias
11303 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything
11304 other than an alias statement because it is an illegal symbol name. */
11306 static tree
11307 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11308 tree name ATTRIBUTE_UNUSED,
11309 tree args,
11310 int flags ATTRIBUTE_UNUSED,
11311 bool *no_add_attrs)
11313 tree arg = TREE_VALUE (args);
11315 if (TREE_CODE (arg) != STRING_CST)
11317 error("version attribute is not a string");
11318 *no_add_attrs = true;
11319 return NULL_TREE;
11321 return NULL_TREE;
11324 /* Target hook for c_mode_for_suffix. */
11326 static machine_mode
11327 ia64_c_mode_for_suffix (char suffix)
11329 if (suffix == 'q')
11330 return TFmode;
11331 if (suffix == 'w')
11332 return XFmode;
11334 return VOIDmode;
11337 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return DFmode, XFmode
11338 or TFmode for TI_LONG_DOUBLE_TYPE which is for long double type,
11339 go with the default one for the others. */
11341 static machine_mode
11342 ia64_c_mode_for_floating_type (enum tree_index ti)
11344 /* long double is XFmode normally, and TFmode for HPUX. It should be
11345 TFmode for VMS as well but we only support up to DFmode now. */
11346 if (ti == TI_LONG_DOUBLE_TYPE)
11347 return TARGET_HPUX ? TFmode : (TARGET_ABI_OPEN_VMS ? DFmode : XFmode);
11348 return default_mode_for_floating_type (ti);
11351 static GTY(()) rtx ia64_dconst_0_5_rtx;
11354 ia64_dconst_0_5 (void)
11356 if (! ia64_dconst_0_5_rtx)
11358 REAL_VALUE_TYPE rv;
11359 real_from_string (&rv, "0.5");
11360 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11362 return ia64_dconst_0_5_rtx;
11365 static GTY(()) rtx ia64_dconst_0_375_rtx;
11368 ia64_dconst_0_375 (void)
11370 if (! ia64_dconst_0_375_rtx)
11372 REAL_VALUE_TYPE rv;
11373 real_from_string (&rv, "0.375");
11374 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11376 return ia64_dconst_0_375_rtx;
11379 static fixed_size_mode
11380 ia64_get_reg_raw_mode (int regno)
11382 if (FR_REGNO_P (regno))
11383 return XFmode;
11384 return default_get_reg_raw_mode(regno);
11387 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed
11388 anymore. */
11390 bool
11391 ia64_member_type_forces_blk (const_tree, machine_mode mode)
11393 return TARGET_HPUX && mode == TFmode;
11396 /* Always default to .text section until HP-UX linker is fixed. */
11398 ATTRIBUTE_UNUSED static section *
11399 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11400 enum node_frequency freq ATTRIBUTE_UNUSED,
11401 bool startup ATTRIBUTE_UNUSED,
11402 bool exit ATTRIBUTE_UNUSED)
11404 return NULL;
11407 /* Construct (set target (vec_select op0 (parallel perm))) and
11408 return true if that's a valid instruction in the active ISA. */
11410 static bool
11411 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11413 rtx rperm[MAX_VECT_LEN], x;
11414 unsigned i;
11416 for (i = 0; i < nelt; ++i)
11417 rperm[i] = GEN_INT (perm[i]);
11419 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11420 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11421 x = gen_rtx_SET (target, x);
11423 rtx_insn *insn = emit_insn (x);
11424 if (recog_memoized (insn) < 0)
11426 remove_insn (insn);
11427 return false;
11429 return true;
11432 /* Similar, but generate a vec_concat from op0 and op1 as well. */
11434 static bool
11435 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11436 const unsigned char *perm, unsigned nelt)
11438 machine_mode v2mode;
11439 rtx x;
11441 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
11442 return false;
11443 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11444 return expand_vselect (target, x, perm, nelt);
11447 /* Try to expand a no-op permutation. */
11449 static bool
11450 expand_vec_perm_identity (struct expand_vec_perm_d *d)
11452 unsigned i, nelt = d->nelt;
11454 for (i = 0; i < nelt; ++i)
11455 if (d->perm[i] != i)
11456 return false;
11458 if (!d->testing_p)
11459 emit_move_insn (d->target, d->op0);
11461 return true;
11464 /* Try to expand D via a shrp instruction. */
11466 static bool
11467 expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11469 unsigned i, nelt = d->nelt, shift, mask;
11470 rtx tmp, hi, lo;
11472 /* ??? Don't force V2SFmode into the integer registers. */
11473 if (d->vmode == V2SFmode)
11474 return false;
11476 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11478 shift = d->perm[0];
11479 if (BYTES_BIG_ENDIAN && shift > nelt)
11480 return false;
11482 for (i = 1; i < nelt; ++i)
11483 if (d->perm[i] != ((shift + i) & mask))
11484 return false;
11486 if (d->testing_p)
11487 return true;
11489 hi = shift < nelt ? d->op1 : d->op0;
11490 lo = shift < nelt ? d->op0 : d->op1;
11492 shift %= nelt;
11494 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11496 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */
11497 gcc_assert (IN_RANGE (shift, 1, 63));
11499 /* Recall that big-endian elements are numbered starting at the top of
11500 the register. Ideally we'd have a shift-left-pair. But since we
11501 don't, convert to a shift the other direction. */
11502 if (BYTES_BIG_ENDIAN)
11503 shift = 64 - shift;
11505 tmp = gen_reg_rtx (DImode);
11506 hi = gen_lowpart (DImode, hi);
11507 lo = gen_lowpart (DImode, lo);
11508 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11510 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11511 return true;
11514 /* Try to instantiate D in a single instruction. */
11516 static bool
11517 expand_vec_perm_1 (struct expand_vec_perm_d *d)
11519 unsigned i, nelt = d->nelt;
11520 unsigned char perm2[MAX_VECT_LEN];
11522 /* Try single-operand selections. */
11523 if (d->one_operand_p)
11525 if (expand_vec_perm_identity (d))
11526 return true;
11527 if (expand_vselect (d->target, d->op0, d->perm, nelt))
11528 return true;
11531 /* Try two operand selections. */
11532 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11533 return true;
11535 /* Recognize interleave style patterns with reversed operands. */
11536 if (!d->one_operand_p)
11538 for (i = 0; i < nelt; ++i)
11540 unsigned e = d->perm[i];
11541 if (e >= nelt)
11542 e -= nelt;
11543 else
11544 e += nelt;
11545 perm2[i] = e;
11548 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11549 return true;
11552 if (expand_vec_perm_shrp (d))
11553 return true;
11555 /* ??? Look for deposit-like permutations where most of the result
11556 comes from one vector unchanged and the rest comes from a
11557 sequential hunk of the other vector. */
11559 return false;
11562 /* Pattern match broadcast permutations. */
11564 static bool
11565 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11567 unsigned i, elt, nelt = d->nelt;
11568 unsigned char perm2[2];
11569 rtx temp;
11570 bool ok;
11572 if (!d->one_operand_p)
11573 return false;
11575 elt = d->perm[0];
11576 for (i = 1; i < nelt; ++i)
11577 if (d->perm[i] != elt)
11578 return false;
11580 switch (d->vmode)
11582 case E_V2SImode:
11583 case E_V2SFmode:
11584 /* Implementable by interleave. */
11585 perm2[0] = elt;
11586 perm2[1] = elt + 2;
11587 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11588 gcc_assert (ok);
11589 break;
11591 case E_V8QImode:
11592 /* Implementable by extract + broadcast. */
11593 if (BYTES_BIG_ENDIAN)
11594 elt = 7 - elt;
11595 elt *= BITS_PER_UNIT;
11596 temp = gen_reg_rtx (DImode);
11597 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11598 GEN_INT (8), GEN_INT (elt)));
11599 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11600 break;
11602 case E_V4HImode:
11603 /* Should have been matched directly by vec_select. */
11604 default:
11605 gcc_unreachable ();
11608 return true;
11611 /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
11612 two vector permutation into a single vector permutation by using
11613 an interleave operation to merge the vectors. */
11615 static bool
11616 expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11618 struct expand_vec_perm_d dremap, dfinal;
11619 unsigned char remap[2 * MAX_VECT_LEN];
11620 unsigned contents, i, nelt, nelt2;
11621 unsigned h0, h1, h2, h3;
11622 rtx_insn *seq;
11623 bool ok;
11625 if (d->one_operand_p)
11626 return false;
11628 nelt = d->nelt;
11629 nelt2 = nelt / 2;
11631 /* Examine from whence the elements come. */
11632 contents = 0;
11633 for (i = 0; i < nelt; ++i)
11634 contents |= 1u << d->perm[i];
11636 memset (remap, 0xff, sizeof (remap));
11637 dremap = *d;
11639 h0 = (1u << nelt2) - 1;
11640 h1 = h0 << nelt2;
11641 h2 = h0 << nelt;
11642 h3 = h0 << (nelt + nelt2);
11644 if ((contents & (h0 | h2)) == contents) /* punpck even halves */
11646 for (i = 0; i < nelt; ++i)
11648 unsigned which = i / 2 + (i & 1 ? nelt : 0);
11649 remap[which] = i;
11650 dremap.perm[i] = which;
11653 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
11655 for (i = 0; i < nelt; ++i)
11657 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11658 remap[which] = i;
11659 dremap.perm[i] = which;
11662 else if ((contents & 0x5555) == contents) /* mix even elements */
11664 for (i = 0; i < nelt; ++i)
11666 unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11667 remap[which] = i;
11668 dremap.perm[i] = which;
11671 else if ((contents & 0xaaaa) == contents) /* mix odd elements */
11673 for (i = 0; i < nelt; ++i)
11675 unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11676 remap[which] = i;
11677 dremap.perm[i] = which;
11680 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11682 unsigned shift = ctz_hwi (contents);
11683 for (i = 0; i < nelt; ++i)
11685 unsigned which = (i + shift) & (2 * nelt - 1);
11686 remap[which] = i;
11687 dremap.perm[i] = which;
11690 else
11691 return false;
11693 /* Use the remapping array set up above to move the elements from their
11694 swizzled locations into their final destinations. */
11695 dfinal = *d;
11696 for (i = 0; i < nelt; ++i)
11698 unsigned e = remap[d->perm[i]];
11699 gcc_assert (e < nelt);
11700 dfinal.perm[i] = e;
11702 if (d->testing_p)
11703 dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11704 else
11705 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11706 dfinal.op1 = dfinal.op0;
11707 dfinal.one_operand_p = true;
11708 dremap.target = dfinal.op0;
11710 /* Test if the final remap can be done with a single insn. For V4HImode
11711 this *will* succeed. For V8QImode or V2SImode it may not. */
11712 start_sequence ();
11713 ok = expand_vec_perm_1 (&dfinal);
11714 seq = get_insns ();
11715 end_sequence ();
11716 if (!ok)
11717 return false;
11718 if (d->testing_p)
11719 return true;
11721 ok = expand_vec_perm_1 (&dremap);
11722 gcc_assert (ok);
11724 emit_insn (seq);
11725 return true;
11728 /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
11729 constant permutation via two mux2 and a merge. */
11731 static bool
11732 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11734 unsigned char perm2[4];
11735 rtx rmask[4];
11736 unsigned i;
11737 rtx t0, t1, mask, x;
11738 bool ok;
11740 if (d->vmode != V4HImode || d->one_operand_p)
11741 return false;
11742 if (d->testing_p)
11743 return true;
11745 for (i = 0; i < 4; ++i)
11747 perm2[i] = d->perm[i] & 3;
11748 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11750 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11751 mask = force_reg (V4HImode, mask);
11753 t0 = gen_reg_rtx (V4HImode);
11754 t1 = gen_reg_rtx (V4HImode);
11756 ok = expand_vselect (t0, d->op0, perm2, 4);
11757 gcc_assert (ok);
11758 ok = expand_vselect (t1, d->op1, perm2, 4);
11759 gcc_assert (ok);
11761 x = gen_rtx_AND (V4HImode, mask, t0);
11762 emit_insn (gen_rtx_SET (t0, x));
11764 x = gen_rtx_NOT (V4HImode, mask);
11765 x = gen_rtx_AND (V4HImode, x, t1);
11766 emit_insn (gen_rtx_SET (t1, x));
11768 x = gen_rtx_IOR (V4HImode, t0, t1);
11769 emit_insn (gen_rtx_SET (d->target, x));
11771 return true;
11774 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11775 With all of the interface bits taken care of, perform the expansion
11776 in D and return true on success. */
11778 static bool
11779 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11781 if (expand_vec_perm_1 (d))
11782 return true;
11783 if (expand_vec_perm_broadcast (d))
11784 return true;
11785 if (expand_vec_perm_interleave_2 (d))
11786 return true;
11787 if (expand_vec_perm_v4hi_5 (d))
11788 return true;
11789 return false;
11792 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
11794 static bool
11795 ia64_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
11796 rtx target, rtx op0, rtx op1,
11797 const vec_perm_indices &sel)
11799 if (vmode != op_mode)
11800 return false;
11802 struct expand_vec_perm_d d;
11803 unsigned char perm[MAX_VECT_LEN];
11804 unsigned int i, nelt, which;
11806 d.target = target;
11807 if (op0)
11809 rtx nop0 = force_reg (vmode, op0);
11810 if (op0 == op1)
11811 op1 = nop0;
11812 op0 = nop0;
11814 if (op1)
11815 op1 = force_reg (vmode, op1);
11816 d.op0 = op0;
11817 d.op1 = op1;
11819 d.vmode = vmode;
11820 gcc_assert (VECTOR_MODE_P (d.vmode));
11821 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11822 d.testing_p = !target;
11824 gcc_assert (sel.length () == nelt);
11825 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11827 for (i = which = 0; i < nelt; ++i)
11829 unsigned int ei = sel[i] & (2 * nelt - 1);
11831 which |= (ei < nelt ? 1 : 2);
11832 d.perm[i] = ei;
11833 perm[i] = ei;
11836 switch (which)
11838 default:
11839 gcc_unreachable();
11841 case 3:
11842 if (d.testing_p || !rtx_equal_p (d.op0, d.op1))
11844 d.one_operand_p = false;
11845 break;
11848 /* The elements of PERM do not suggest that only the first operand
11849 is used, but both operands are identical. Allow easier matching
11850 of the permutation by folding the permutation into the single
11851 input vector. */
11852 for (i = 0; i < nelt; ++i)
11853 if (d.perm[i] >= nelt)
11854 d.perm[i] -= nelt;
11855 /* FALLTHRU */
11857 case 1:
11858 d.op1 = d.op0;
11859 d.one_operand_p = true;
11860 break;
11862 case 2:
11863 for (i = 0; i < nelt; ++i)
11864 d.perm[i] -= nelt;
11865 d.op0 = d.op1;
11866 d.one_operand_p = true;
11867 break;
11870 if (d.testing_p)
11872 /* We have to go through the motions and see if we can
11873 figure out how to generate the requested permutation. */
11874 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11875 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11876 if (!d.one_operand_p)
11877 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11879 start_sequence ();
11880 bool ret = ia64_expand_vec_perm_const_1 (&d);
11881 end_sequence ();
11883 return ret;
11886 if (ia64_expand_vec_perm_const_1 (&d))
11887 return true;
11889 /* If the mask says both arguments are needed, but they are the same,
11890 the above tried to expand with one_operand_p true. If that didn't
11891 work, retry with one_operand_p false, as that's what we used in _ok. */
11892 if (which == 3 && d.one_operand_p)
11894 memcpy (d.perm, perm, sizeof (perm));
11895 d.one_operand_p = false;
11896 return ia64_expand_vec_perm_const_1 (&d);
11899 return false;
11902 void
11903 ia64_expand_vec_setv2sf (rtx operands[3])
11905 struct expand_vec_perm_d d;
11906 unsigned int which;
11907 bool ok;
11909 d.target = operands[0];
11910 d.op0 = operands[0];
11911 d.op1 = gen_reg_rtx (V2SFmode);
11912 d.vmode = V2SFmode;
11913 d.nelt = 2;
11914 d.one_operand_p = false;
11915 d.testing_p = false;
11917 which = INTVAL (operands[2]);
11918 gcc_assert (which <= 1);
11919 d.perm[0] = 1 - which;
11920 d.perm[1] = which + 2;
11922 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11924 ok = ia64_expand_vec_perm_const_1 (&d);
11925 gcc_assert (ok);
11928 void
11929 ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11931 struct expand_vec_perm_d d;
11932 machine_mode vmode = GET_MODE (target);
11933 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11934 bool ok;
11936 d.target = target;
11937 d.op0 = op0;
11938 d.op1 = op1;
11939 d.vmode = vmode;
11940 d.nelt = nelt;
11941 d.one_operand_p = false;
11942 d.testing_p = false;
11944 for (i = 0; i < nelt; ++i)
11945 d.perm[i] = i * 2 + odd;
11947 ok = ia64_expand_vec_perm_const_1 (&d);
11948 gcc_assert (ok);
11951 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
11953 In BR regs, we can't change the DImode at all.
11954 In FP regs, we can't change FP values to integer values and vice versa,
11955 but we can change e.g. DImode to SImode, and V2SFmode into DImode. */
11957 static bool
11958 ia64_can_change_mode_class (machine_mode from, machine_mode to,
11959 reg_class_t rclass)
11961 if (reg_classes_intersect_p (rclass, BR_REGS))
11962 return from == to;
11963 if (SCALAR_FLOAT_MODE_P (from) != SCALAR_FLOAT_MODE_P (to))
11964 return !reg_classes_intersect_p (rclass, FR_REGS);
11965 return true;
11968 #include "gt-ia64.h"