[PR testsuite/116860] Testsuite adjustment for recently added tests
[official-gcc.git] / gcc / config / alpha / alpha.cc
blob6965ece16d0bbe0ff9aae623bd3c5522f53d4d23
1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992-2025 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "stringpool.h"
32 #include "attribs.h"
33 #include "memmodel.h"
34 #include "gimple.h"
35 #include "df.h"
36 #include "predict.h"
37 #include "tm_p.h"
38 #include "ssa.h"
39 #include "expmed.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "reload.h"
55 #include "except.h"
56 #include "common/common-target.h"
57 #include "debug.h"
58 #include "langhooks.h"
59 #include "cfgrtl.h"
60 #include "tree-pass.h"
61 #include "context.h"
62 #include "gimple-iterator.h"
63 #include "gimplify.h"
64 #include "tree-stdarg.h"
65 #include "tm-constrs.h"
66 #include "libfuncs.h"
67 #include "builtins.h"
68 #include "rtl-iter.h"
69 #include "flags.h"
70 #include "opts.h"
72 /* This file should be included last. */
73 #include "target-def.h"
75 /* Specify which cpu to schedule for. */
76 enum processor_type alpha_tune;
78 /* Which cpu we're generating code for. */
79 enum processor_type alpha_cpu;
81 static const char * const alpha_cpu_name[] =
83 "ev4", "ev5", "ev6"
86 /* Specify how accurate floating-point traps need to be. */
88 enum alpha_trap_precision alpha_tp;
90 /* Specify the floating-point rounding mode. */
92 enum alpha_fp_rounding_mode alpha_fprm;
94 /* Specify which things cause traps. */
96 enum alpha_fp_trap_mode alpha_fptm;
98 /* Nonzero if inside of a function, because the Alpha asm can't
99 handle .files inside of functions. */
101 static int inside_function = FALSE;
103 /* The number of cycles of latency we should assume on memory reads. */
105 static int alpha_memory_latency = 3;
107 /* Whether the function needs the GP. */
109 static int alpha_function_needs_gp;
111 /* The assembler name of the current function. */
113 static const char *alpha_fnname;
115 /* The next explicit relocation sequence number. */
116 extern GTY(()) int alpha_next_sequence_number;
117 int alpha_next_sequence_number = 1;
119 /* The literal and gpdisp sequence numbers for this insn, as printed
120 by %# and %* respectively. */
121 extern GTY(()) int alpha_this_literal_sequence_number;
122 extern GTY(()) int alpha_this_gpdisp_sequence_number;
123 int alpha_this_literal_sequence_number;
124 int alpha_this_gpdisp_sequence_number;
126 /* Costs of various operations on the different architectures. */
128 struct alpha_rtx_cost_data
130 unsigned char fp_add;
131 unsigned char fp_mult;
132 unsigned char fp_div_sf;
133 unsigned char fp_div_df;
134 unsigned char int_mult_si;
135 unsigned char int_mult_di;
136 unsigned char int_shift;
137 unsigned char int_cmov;
138 unsigned short int_div;
141 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
143 { /* EV4 */
144 COSTS_N_INSNS (6), /* fp_add */
145 COSTS_N_INSNS (6), /* fp_mult */
146 COSTS_N_INSNS (34), /* fp_div_sf */
147 COSTS_N_INSNS (63), /* fp_div_df */
148 COSTS_N_INSNS (23), /* int_mult_si */
149 COSTS_N_INSNS (23), /* int_mult_di */
150 COSTS_N_INSNS (2), /* int_shift */
151 COSTS_N_INSNS (2), /* int_cmov */
152 COSTS_N_INSNS (97), /* int_div */
154 { /* EV5 */
155 COSTS_N_INSNS (4), /* fp_add */
156 COSTS_N_INSNS (4), /* fp_mult */
157 COSTS_N_INSNS (15), /* fp_div_sf */
158 COSTS_N_INSNS (22), /* fp_div_df */
159 COSTS_N_INSNS (8), /* int_mult_si */
160 COSTS_N_INSNS (12), /* int_mult_di */
161 COSTS_N_INSNS (1) + 1, /* int_shift */
162 COSTS_N_INSNS (1), /* int_cmov */
163 COSTS_N_INSNS (83), /* int_div */
165 { /* EV6 */
166 COSTS_N_INSNS (4), /* fp_add */
167 COSTS_N_INSNS (4), /* fp_mult */
168 COSTS_N_INSNS (12), /* fp_div_sf */
169 COSTS_N_INSNS (15), /* fp_div_df */
170 COSTS_N_INSNS (7), /* int_mult_si */
171 COSTS_N_INSNS (7), /* int_mult_di */
172 COSTS_N_INSNS (1), /* int_shift */
173 COSTS_N_INSNS (2), /* int_cmov */
174 COSTS_N_INSNS (86), /* int_div */
178 /* Similar but tuned for code size instead of execution latency. The
179 extra +N is fractional cost tuning based on latency. It's used to
180 encourage use of cheaper insns like shift, but only if there's just
181 one of them. */
183 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
185 COSTS_N_INSNS (1), /* fp_add */
186 COSTS_N_INSNS (1), /* fp_mult */
187 COSTS_N_INSNS (1), /* fp_div_sf */
188 COSTS_N_INSNS (1) + 1, /* fp_div_df */
189 COSTS_N_INSNS (1) + 1, /* int_mult_si */
190 COSTS_N_INSNS (1) + 2, /* int_mult_di */
191 COSTS_N_INSNS (1), /* int_shift */
192 COSTS_N_INSNS (1), /* int_cmov */
193 COSTS_N_INSNS (6), /* int_div */
196 /* Get the number of args of a function in one of two ways. */
197 #if TARGET_ABI_OPEN_VMS
198 #define NUM_ARGS crtl->args.info.num_args
199 #else
200 #define NUM_ARGS crtl->args.info
201 #endif
203 #define REG_PV 27
204 #define REG_RA 26
206 /* Declarations of static functions. */
207 static struct machine_function *alpha_init_machine_status (void);
208 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
209 static void alpha_handle_trap_shadows (void);
210 static void alpha_align_insns (void);
211 static void alpha_override_options_after_change (void);
213 #if TARGET_ABI_OPEN_VMS
214 static void alpha_write_linkage (FILE *, const char *);
215 static bool vms_valid_pointer_mode (scalar_int_mode);
216 #else
217 #define vms_patch_builtins() gcc_unreachable()
218 #endif
220 static unsigned int
221 rest_of_handle_trap_shadows (void)
223 alpha_handle_trap_shadows ();
224 return 0;
227 namespace {
229 const pass_data pass_data_handle_trap_shadows =
231 RTL_PASS,
232 "trap_shadows", /* name */
233 OPTGROUP_NONE, /* optinfo_flags */
234 TV_NONE, /* tv_id */
235 0, /* properties_required */
236 0, /* properties_provided */
237 0, /* properties_destroyed */
238 0, /* todo_flags_start */
239 TODO_df_finish, /* todo_flags_finish */
242 class pass_handle_trap_shadows : public rtl_opt_pass
244 public:
245 pass_handle_trap_shadows(gcc::context *ctxt)
246 : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt)
249 /* opt_pass methods: */
250 virtual bool gate (function *)
252 return alpha_tp != ALPHA_TP_PROG || flag_exceptions;
255 virtual unsigned int execute (function *)
257 return rest_of_handle_trap_shadows ();
260 }; // class pass_handle_trap_shadows
262 } // anon namespace
264 rtl_opt_pass *
265 make_pass_handle_trap_shadows (gcc::context *ctxt)
267 return new pass_handle_trap_shadows (ctxt);
270 static unsigned int
271 rest_of_align_insns (void)
273 alpha_align_insns ();
274 return 0;
277 namespace {
279 const pass_data pass_data_align_insns =
281 RTL_PASS,
282 "align_insns", /* name */
283 OPTGROUP_NONE, /* optinfo_flags */
284 TV_NONE, /* tv_id */
285 0, /* properties_required */
286 0, /* properties_provided */
287 0, /* properties_destroyed */
288 0, /* todo_flags_start */
289 TODO_df_finish, /* todo_flags_finish */
292 class pass_align_insns : public rtl_opt_pass
294 public:
295 pass_align_insns(gcc::context *ctxt)
296 : rtl_opt_pass(pass_data_align_insns, ctxt)
299 /* opt_pass methods: */
300 virtual bool gate (function *)
302 /* Due to the number of extra trapb insns, don't bother fixing up
303 alignment when trap precision is instruction. Moreover, we can
304 only do our job when sched2 is run. */
305 return ((alpha_tune == PROCESSOR_EV4
306 || alpha_tune == PROCESSOR_EV5)
307 && optimize && !optimize_size
308 && alpha_tp != ALPHA_TP_INSN
309 && flag_schedule_insns_after_reload);
312 virtual unsigned int execute (function *)
314 return rest_of_align_insns ();
317 }; // class pass_align_insns
319 } // anon namespace
321 rtl_opt_pass *
322 make_pass_align_insns (gcc::context *ctxt)
324 return new pass_align_insns (ctxt);
327 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
328 /* Implement TARGET_MANGLE_TYPE. */
330 static const char *
331 alpha_mangle_type (const_tree type)
333 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
334 && TARGET_LONG_DOUBLE_128)
335 return "g";
337 /* For all other types, use normal C++ mangling. */
338 return NULL;
340 #endif
342 /* Parse target option strings. */
344 static void
345 alpha_option_override (void)
347 static const struct cpu_table {
348 const char *const name;
349 const enum processor_type processor;
350 const int flags;
351 const unsigned short line_size; /* in bytes */
352 const unsigned short l1_size; /* in kb. */
353 const unsigned short l2_size; /* in kb. */
354 } cpu_table[] = {
355 /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
356 EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45
357 had 64k to 8M 8-byte direct Bcache. */
358 { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
359 { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
360 { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 },
362 /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
363 and 1M to 16M 64 byte L3 (not modeled).
364 PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
365 PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */
366 { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 },
367 { "21164", PROCESSOR_EV5, 0, 32, 8, 96 },
368 { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
369 { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
370 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
371 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
372 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
374 /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */
375 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
376 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
377 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
378 64, 64, 16*1024 },
379 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
380 64, 64, 16*1024 }
383 int const ct_size = ARRAY_SIZE (cpu_table);
384 int line_size = 0, l1_size = 0, l2_size = 0;
385 int i;
387 #ifdef SUBTARGET_OVERRIDE_OPTIONS
388 SUBTARGET_OVERRIDE_OPTIONS;
389 #endif
391 /* Default to full IEEE compliance mode for Go language. */
392 if (strcmp (lang_hooks.name, "GNU Go") == 0
393 && !(target_flags_explicit & MASK_IEEE))
394 target_flags |= MASK_IEEE;
396 alpha_fprm = ALPHA_FPRM_NORM;
397 alpha_tp = ALPHA_TP_PROG;
398 alpha_fptm = ALPHA_FPTM_N;
400 if (TARGET_IEEE)
402 alpha_tp = ALPHA_TP_INSN;
403 alpha_fptm = ALPHA_FPTM_SU;
405 if (TARGET_IEEE_WITH_INEXACT)
407 alpha_tp = ALPHA_TP_INSN;
408 alpha_fptm = ALPHA_FPTM_SUI;
411 if (alpha_tp_string)
413 if (! strcmp (alpha_tp_string, "p"))
414 alpha_tp = ALPHA_TP_PROG;
415 else if (! strcmp (alpha_tp_string, "f"))
416 alpha_tp = ALPHA_TP_FUNC;
417 else if (! strcmp (alpha_tp_string, "i"))
418 alpha_tp = ALPHA_TP_INSN;
419 else
420 error ("bad value %qs for %<-mtrap-precision%> switch",
421 alpha_tp_string);
424 if (alpha_fprm_string)
426 if (! strcmp (alpha_fprm_string, "n"))
427 alpha_fprm = ALPHA_FPRM_NORM;
428 else if (! strcmp (alpha_fprm_string, "m"))
429 alpha_fprm = ALPHA_FPRM_MINF;
430 else if (! strcmp (alpha_fprm_string, "c"))
431 alpha_fprm = ALPHA_FPRM_CHOP;
432 else if (! strcmp (alpha_fprm_string,"d"))
433 alpha_fprm = ALPHA_FPRM_DYN;
434 else
435 error ("bad value %qs for %<-mfp-rounding-mode%> switch",
436 alpha_fprm_string);
439 if (alpha_fptm_string)
441 if (strcmp (alpha_fptm_string, "n") == 0)
442 alpha_fptm = ALPHA_FPTM_N;
443 else if (strcmp (alpha_fptm_string, "u") == 0)
444 alpha_fptm = ALPHA_FPTM_U;
445 else if (strcmp (alpha_fptm_string, "su") == 0)
446 alpha_fptm = ALPHA_FPTM_SU;
447 else if (strcmp (alpha_fptm_string, "sui") == 0)
448 alpha_fptm = ALPHA_FPTM_SUI;
449 else
450 error ("bad value %qs for %<-mfp-trap-mode%> switch",
451 alpha_fptm_string);
454 if (alpha_cpu_string)
456 for (i = 0; i < ct_size; i++)
457 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
459 alpha_tune = alpha_cpu = cpu_table[i].processor;
460 line_size = cpu_table[i].line_size;
461 l1_size = cpu_table[i].l1_size;
462 l2_size = cpu_table[i].l2_size;
463 target_flags &= ~((MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX)
464 & ~target_flags_explicit);
465 target_flags |= cpu_table[i].flags & ~target_flags_explicit;
466 break;
468 if (i == ct_size)
469 error ("bad value %qs for %<-mcpu%> switch", alpha_cpu_string);
472 if (alpha_tune_string)
474 for (i = 0; i < ct_size; i++)
475 if (! strcmp (alpha_tune_string, cpu_table [i].name))
477 alpha_tune = cpu_table[i].processor;
478 line_size = cpu_table[i].line_size;
479 l1_size = cpu_table[i].l1_size;
480 l2_size = cpu_table[i].l2_size;
481 break;
483 if (i == ct_size)
484 error ("bad value %qs for %<-mtune%> switch", alpha_tune_string);
487 if (line_size)
488 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
489 param_l1_cache_line_size, line_size);
490 if (l1_size)
491 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
492 param_l1_cache_size, l1_size);
493 if (l2_size)
494 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
495 param_l2_cache_size, l2_size);
497 /* Do some sanity checks on the above options. */
499 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
500 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
502 warning (0, "fp software completion requires %<-mtrap-precision=i%>");
503 alpha_tp = ALPHA_TP_INSN;
506 if (alpha_cpu == PROCESSOR_EV6)
508 /* Except for EV6 pass 1 (not released), we always have precise
509 arithmetic traps. Which means we can do software completion
510 without minding trap shadows. */
511 alpha_tp = ALPHA_TP_PROG;
514 if (TARGET_FLOAT_VAX)
516 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
518 warning (0, "rounding mode not supported for VAX floats");
519 alpha_fprm = ALPHA_FPRM_NORM;
521 if (alpha_fptm == ALPHA_FPTM_SUI)
523 warning (0, "trap mode not supported for VAX floats");
524 alpha_fptm = ALPHA_FPTM_SU;
526 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
527 warning (0, "128-bit %<long double%> not supported for VAX floats");
528 target_flags &= ~MASK_LONG_DOUBLE_128;
532 char *end;
533 int lat;
535 if (!alpha_mlat_string)
536 alpha_mlat_string = "L1";
538 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
539 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
541 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
542 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
543 && alpha_mlat_string[2] == '\0')
545 static int const cache_latency[][4] =
547 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
548 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
549 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
552 lat = alpha_mlat_string[1] - '0';
553 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
555 warning (0, "L%d cache latency unknown for %s",
556 lat, alpha_cpu_name[alpha_tune]);
557 lat = 3;
559 else
560 lat = cache_latency[alpha_tune][lat-1];
562 else if (! strcmp (alpha_mlat_string, "main"))
564 /* Most current memories have about 370ns latency. This is
565 a reasonable guess for a fast cpu. */
566 lat = 150;
568 else
570 warning (0, "bad value %qs for %<-mmemory-latency%>",
571 alpha_mlat_string);
572 lat = 3;
575 alpha_memory_latency = lat;
578 /* Default the definition of "small data" to 8 bytes. */
579 if (!OPTION_SET_P (g_switch_value))
580 g_switch_value = 8;
582 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
583 if (flag_pic == 1)
584 target_flags |= MASK_SMALL_DATA;
585 else if (flag_pic == 2)
586 target_flags &= ~MASK_SMALL_DATA;
588 alpha_override_options_after_change ();
590 /* Register variables and functions with the garbage collector. */
592 /* Set up function hooks. */
593 init_machine_status = alpha_init_machine_status;
595 /* Tell the compiler when we're using VAX floating point. */
596 if (TARGET_FLOAT_VAX)
598 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
599 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
600 REAL_MODE_FORMAT (TFmode) = NULL;
603 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
604 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
605 target_flags |= MASK_LONG_DOUBLE_128;
606 #endif
610 /* Implement targetm.override_options_after_change. */
612 static void
613 alpha_override_options_after_change (void)
615 /* Align labels and loops for optimal branching. */
616 /* ??? Kludge these by not doing anything if we don't optimize. */
617 if (optimize > 0)
619 if (flag_align_loops && !str_align_loops)
620 str_align_loops = "16";
621 if (flag_align_jumps && !str_align_jumps)
622 str_align_jumps = "16";
624 if (flag_align_functions && !str_align_functions)
625 str_align_functions = "16";
628 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
631 zap_mask (HOST_WIDE_INT value)
633 int i;
635 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
636 i++, value >>= 8)
637 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
638 return 0;
640 return 1;
643 /* Return true if OP is valid for a particular TLS relocation.
644 We are already guaranteed that OP is a CONST. */
647 tls_symbolic_operand_1 (rtx op, int size, int unspec)
649 op = XEXP (op, 0);
651 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
652 return 0;
653 op = XVECEXP (op, 0, 0);
655 if (GET_CODE (op) != SYMBOL_REF)
656 return 0;
658 switch (SYMBOL_REF_TLS_MODEL (op))
660 case TLS_MODEL_LOCAL_DYNAMIC:
661 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
662 case TLS_MODEL_INITIAL_EXEC:
663 return unspec == UNSPEC_TPREL && size == 64;
664 case TLS_MODEL_LOCAL_EXEC:
665 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
666 default:
667 gcc_unreachable ();
671 /* Used by aligned_memory_operand and unaligned_memory_operand to
672 resolve what reload is going to do with OP if it's a register. */
675 resolve_reload_operand (rtx op)
677 if (reload_in_progress)
679 rtx tmp = op;
680 if (SUBREG_P (tmp))
681 tmp = SUBREG_REG (tmp);
682 if (REG_P (tmp)
683 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
685 op = reg_equiv_memory_loc (REGNO (tmp));
686 if (op == 0)
687 return 0;
690 return op;
693 /* The scalar modes supported differs from the default check-what-c-supports
694 version in that sometimes TFmode is available even when long double
695 indicates only DFmode. */
697 static bool
698 alpha_scalar_mode_supported_p (scalar_mode mode)
700 switch (mode)
702 case E_QImode:
703 case E_HImode:
704 case E_SImode:
705 case E_DImode:
706 case E_TImode: /* via optabs.cc */
707 return true;
709 case E_SFmode:
710 case E_DFmode:
711 return true;
713 case E_TFmode:
714 return TARGET_HAS_XFLOATING_LIBS;
716 default:
717 return false;
721 /* Alpha implements a couple of integer vector mode operations when
722 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
723 which allows the vectorizer to operate on e.g. move instructions,
724 or when expand_vector_operations can do something useful. */
726 static bool
727 alpha_vector_mode_supported_p (machine_mode mode)
729 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
732 /* Return the TLS model to use for SYMBOL. */
734 static enum tls_model
735 tls_symbolic_operand_type (rtx symbol)
737 enum tls_model model;
739 if (GET_CODE (symbol) != SYMBOL_REF)
740 return TLS_MODEL_NONE;
741 model = SYMBOL_REF_TLS_MODEL (symbol);
743 /* Local-exec with a 64-bit size is the same code as initial-exec. */
744 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
745 model = TLS_MODEL_INITIAL_EXEC;
747 return model;
750 /* Return true if the function DECL will share the same GP as any
751 function in the current unit of translation. */
753 static bool
754 decl_has_samegp (const_tree decl)
756 /* Functions that are not local can be overridden, and thus may
757 not share the same gp. */
758 if (!(*targetm.binds_local_p) (decl))
759 return false;
761 /* If -msmall-data is in effect, assume that there is only one GP
762 for the module, and so any local symbol has this property. We
763 need explicit relocations to be able to enforce this for symbols
764 not defined in this unit of translation, however. */
765 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
766 return true;
768 /* Functions that are not external are defined in this UoT. */
769 /* ??? Irritatingly, static functions not yet emitted are still
770 marked "external". Apply this to non-static functions only. */
771 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
774 /* Return true if EXP should be placed in the small data section. */
776 static bool
777 alpha_in_small_data_p (const_tree exp)
779 /* We want to merge strings, so we never consider them small data. */
780 if (TREE_CODE (exp) == STRING_CST)
781 return false;
783 /* Functions are never in the small data area. Duh. */
784 if (TREE_CODE (exp) == FUNCTION_DECL)
785 return false;
787 /* COMMON symbols are never small data. */
788 if (VAR_P (exp) && DECL_COMMON (exp))
789 return false;
791 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
793 const char *section = DECL_SECTION_NAME (exp);
794 if (strcmp (section, ".sdata") == 0
795 || strcmp (section, ".sbss") == 0)
796 return true;
798 else
800 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
802 /* If this is an incomplete type with size 0, then we can't put it
803 in sdata because it might be too big when completed. */
804 if (size > 0 && size <= g_switch_value)
805 return true;
808 return false;
811 #if TARGET_ABI_OPEN_VMS
812 static bool
813 vms_valid_pointer_mode (scalar_int_mode mode)
815 return (mode == SImode || mode == DImode);
818 static bool
819 alpha_linkage_symbol_p (const char *symname)
821 int symlen = strlen (symname);
823 if (symlen > 4)
824 return strcmp (&symname [symlen - 4], "..lk") == 0;
826 return false;
829 #define LINKAGE_SYMBOL_REF_P(X) \
830 ((GET_CODE (X) == SYMBOL_REF \
831 && alpha_linkage_symbol_p (XSTR (X, 0))) \
832 || (GET_CODE (X) == CONST \
833 && GET_CODE (XEXP (X, 0)) == PLUS \
834 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
835 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
836 #endif
838 /* legitimate_address_p recognizes an RTL expression that is a valid
839 memory address for an instruction. The MODE argument is the
840 machine mode for the MEM expression that wants to use this address.
842 For Alpha, we have either a constant address or the sum of a
843 register and a constant address, or just a register. For DImode,
844 any of those forms can be surrounded with an AND that clear the
845 low-order three bits; this is an "unaligned" access. */
847 static bool
848 alpha_legitimate_address_p (machine_mode mode, rtx x, bool strict,
849 code_helper = ERROR_MARK)
851 /* If this is an ldq_u type address, discard the outer AND. */
852 if (mode == DImode
853 && GET_CODE (x) == AND
854 && CONST_INT_P (XEXP (x, 1))
855 && INTVAL (XEXP (x, 1)) == -8)
856 x = XEXP (x, 0);
858 /* Discard non-paradoxical subregs. */
859 if (SUBREG_P (x)
860 && (GET_MODE_SIZE (GET_MODE (x))
861 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
862 x = SUBREG_REG (x);
864 /* Unadorned general registers are valid. */
865 if (REG_P (x)
866 && (strict
867 ? STRICT_REG_OK_FOR_BASE_P (x)
868 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
869 return true;
871 /* Constant addresses (i.e. +/- 32k) are valid. */
872 if (CONSTANT_ADDRESS_P (x))
873 return true;
875 #if TARGET_ABI_OPEN_VMS
876 if (LINKAGE_SYMBOL_REF_P (x))
877 return true;
878 #endif
880 /* Register plus a small constant offset is valid. */
881 if (GET_CODE (x) == PLUS)
883 rtx ofs = XEXP (x, 1);
884 x = XEXP (x, 0);
886 /* Discard non-paradoxical subregs. */
887 if (SUBREG_P (x)
888 && (GET_MODE_SIZE (GET_MODE (x))
889 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
890 x = SUBREG_REG (x);
892 if (REG_P (x))
894 if (! strict
895 && NONSTRICT_REG_OK_FP_BASE_P (x)
896 && CONST_INT_P (ofs))
897 return true;
898 if ((strict
899 ? STRICT_REG_OK_FOR_BASE_P (x)
900 : NONSTRICT_REG_OK_FOR_BASE_P (x))
901 && CONSTANT_ADDRESS_P (ofs))
902 return true;
906 /* If we're managing explicit relocations, LO_SUM is valid, as are small
907 data symbols. Avoid explicit relocations of modes larger than word
908 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
909 else if (TARGET_EXPLICIT_RELOCS
910 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
912 if (small_symbolic_operand (x, Pmode))
913 return true;
915 if (GET_CODE (x) == LO_SUM)
917 rtx ofs = XEXP (x, 1);
918 x = XEXP (x, 0);
920 /* Discard non-paradoxical subregs. */
921 if (SUBREG_P (x)
922 && (GET_MODE_SIZE (GET_MODE (x))
923 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
924 x = SUBREG_REG (x);
926 /* Must have a valid base register. */
927 if (! (REG_P (x)
928 && (strict
929 ? STRICT_REG_OK_FOR_BASE_P (x)
930 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
931 return false;
933 /* The symbol must be local. */
934 if (local_symbolic_operand (ofs, Pmode)
935 || dtp32_symbolic_operand (ofs, Pmode)
936 || tp32_symbolic_operand (ofs, Pmode))
937 return true;
941 return false;
944 /* Build the SYMBOL_REF for __tls_get_addr. */
946 static GTY(()) rtx tls_get_addr_libfunc;
948 static rtx
949 get_tls_get_addr (void)
951 if (!tls_get_addr_libfunc)
952 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
953 return tls_get_addr_libfunc;
956 /* Try machine-dependent ways of modifying an illegitimate address
957 to be legitimate. If we find one, return the new, valid address. */
959 static rtx
960 alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
962 HOST_WIDE_INT addend;
964 /* If the address is (plus reg const_int) and the CONST_INT is not a
965 valid offset, compute the high part of the constant and add it to
966 the register. Then our address is (plus temp low-part-const). */
967 if (GET_CODE (x) == PLUS
968 && REG_P (XEXP (x, 0))
969 && CONST_INT_P (XEXP (x, 1))
970 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
972 addend = INTVAL (XEXP (x, 1));
973 x = XEXP (x, 0);
974 goto split_addend;
977 /* If the address is (const (plus FOO const_int)), find the low-order
978 part of the CONST_INT. Then load FOO plus any high-order part of the
979 CONST_INT into a register. Our address is (plus reg low-part-const).
980 This is done to reduce the number of GOT entries. */
981 if (can_create_pseudo_p ()
982 && GET_CODE (x) == CONST
983 && GET_CODE (XEXP (x, 0)) == PLUS
984 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
986 addend = INTVAL (XEXP (XEXP (x, 0), 1));
987 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
988 goto split_addend;
991 /* If we have a (plus reg const), emit the load as in (2), then add
992 the two registers, and finally generate (plus reg low-part-const) as
993 our address. */
994 if (can_create_pseudo_p ()
995 && GET_CODE (x) == PLUS
996 && REG_P (XEXP (x, 0))
997 && GET_CODE (XEXP (x, 1)) == CONST
998 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
999 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
1001 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
1002 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
1003 XEXP (XEXP (XEXP (x, 1), 0), 0),
1004 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1005 goto split_addend;
1008 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
1009 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
1010 around +/- 32k offset. */
1011 if (TARGET_EXPLICIT_RELOCS
1012 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
1013 && symbolic_operand (x, Pmode))
1015 rtx r0, r16, eqv, tga, tp, dest, seq;
1016 rtx_insn *insn;
1018 switch (tls_symbolic_operand_type (x))
1020 case TLS_MODEL_NONE:
1021 break;
1023 case TLS_MODEL_GLOBAL_DYNAMIC:
1025 start_sequence ();
1027 r0 = gen_rtx_REG (Pmode, 0);
1028 r16 = gen_rtx_REG (Pmode, 16);
1029 tga = get_tls_get_addr ();
1030 dest = gen_reg_rtx (Pmode);
1031 seq = GEN_INT (alpha_next_sequence_number++);
1033 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
1034 rtx val = gen_call_value_osf_tlsgd (r0, tga, seq);
1035 insn = emit_call_insn (val);
1036 RTL_CONST_CALL_P (insn) = 1;
1037 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1039 insn = get_insns ();
1040 end_sequence ();
1042 emit_libcall_block (insn, dest, r0, x);
1043 return dest;
1046 case TLS_MODEL_LOCAL_DYNAMIC:
1048 start_sequence ();
1050 r0 = gen_rtx_REG (Pmode, 0);
1051 r16 = gen_rtx_REG (Pmode, 16);
1052 tga = get_tls_get_addr ();
1053 scratch = gen_reg_rtx (Pmode);
1054 seq = GEN_INT (alpha_next_sequence_number++);
1056 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1057 rtx val = gen_call_value_osf_tlsldm (r0, tga, seq);
1058 insn = emit_call_insn (val);
1059 RTL_CONST_CALL_P (insn) = 1;
1060 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1062 insn = get_insns ();
1063 end_sequence ();
1065 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1066 UNSPEC_TLSLDM_CALL);
1067 emit_libcall_block (insn, scratch, r0, eqv);
1069 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1070 eqv = gen_rtx_CONST (Pmode, eqv);
1072 if (alpha_tls_size == 64)
1074 dest = gen_reg_rtx (Pmode);
1075 emit_insn (gen_rtx_SET (dest, eqv));
1076 emit_insn (gen_adddi3 (dest, dest, scratch));
1077 return dest;
1079 if (alpha_tls_size == 32)
1081 rtx temp = gen_rtx_HIGH (Pmode, eqv);
1082 temp = gen_rtx_PLUS (Pmode, scratch, temp);
1083 scratch = gen_reg_rtx (Pmode);
1084 emit_insn (gen_rtx_SET (scratch, temp));
1086 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1089 case TLS_MODEL_INITIAL_EXEC:
1090 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1091 eqv = gen_rtx_CONST (Pmode, eqv);
1092 tp = gen_reg_rtx (Pmode);
1093 scratch = gen_reg_rtx (Pmode);
1094 dest = gen_reg_rtx (Pmode);
1096 emit_insn (gen_get_thread_pointerdi (tp));
1097 emit_insn (gen_rtx_SET (scratch, eqv));
1098 emit_insn (gen_adddi3 (dest, tp, scratch));
1099 return dest;
1101 case TLS_MODEL_LOCAL_EXEC:
1102 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1103 eqv = gen_rtx_CONST (Pmode, eqv);
1104 tp = gen_reg_rtx (Pmode);
1106 emit_insn (gen_get_thread_pointerdi (tp));
1107 if (alpha_tls_size == 32)
1109 rtx temp = gen_rtx_HIGH (Pmode, eqv);
1110 temp = gen_rtx_PLUS (Pmode, tp, temp);
1111 tp = gen_reg_rtx (Pmode);
1112 emit_insn (gen_rtx_SET (tp, temp));
1114 return gen_rtx_LO_SUM (Pmode, tp, eqv);
1116 default:
1117 gcc_unreachable ();
1120 if (local_symbolic_operand (x, Pmode))
1122 if (small_symbolic_operand (x, Pmode))
1123 return x;
1124 else
1126 if (can_create_pseudo_p ())
1127 scratch = gen_reg_rtx (Pmode);
1128 emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x)));
1129 return gen_rtx_LO_SUM (Pmode, scratch, x);
1134 return NULL;
1136 split_addend:
1138 HOST_WIDE_INT low, high;
1140 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1141 addend -= low;
1142 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1143 addend -= high;
1145 if (addend)
1146 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1147 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1148 1, OPTAB_LIB_WIDEN);
1149 if (high)
1150 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1151 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1152 1, OPTAB_LIB_WIDEN);
1154 return plus_constant (Pmode, x, low);
1159 /* Try machine-dependent ways of modifying an illegitimate address
1160 to be legitimate. Return X or the new, valid address. */
1162 static rtx
1163 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1164 machine_mode mode)
1166 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1167 return new_x ? new_x : x;
1170 /* Return true if ADDR has an effect that depends on the machine mode it
1171 is used for. On the Alpha this is true only for the unaligned modes.
1172 We can simplify the test since we know that the address must be valid. */
1174 static bool
1175 alpha_mode_dependent_address_p (const_rtx addr,
1176 addr_space_t as ATTRIBUTE_UNUSED)
1178 return GET_CODE (addr) == AND;
1181 /* Primarily this is required for TLS symbols, but given that our move
1182 patterns *ought* to be able to handle any symbol at any time, we
1183 should never be spilling symbolic operands to the constant pool, ever. */
1185 static bool
1186 alpha_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1188 enum rtx_code code = GET_CODE (x);
1189 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1192 /* We do not allow indirect calls to be optimized into sibling calls, nor
1193 can we allow a call to a function with a different GP to be optimized
1194 into a sibcall. */
1196 static bool
1197 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1199 /* Can't do indirect tail calls, since we don't know if the target
1200 uses the same GP. */
1201 if (!decl)
1202 return false;
1204 /* Otherwise, we can make a tail call if the target function shares
1205 the same GP. */
1206 return decl_has_samegp (decl);
1209 bool
1210 some_small_symbolic_operand_int (rtx x)
1212 subrtx_var_iterator::array_type array;
1213 FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
1215 rtx x = *iter;
1216 /* Don't re-split. */
1217 if (GET_CODE (x) == LO_SUM)
1218 iter.skip_subrtxes ();
1219 else if (small_symbolic_operand (x, Pmode))
1220 return true;
1222 return false;
1226 split_small_symbolic_operand (rtx x)
1228 x = copy_insn (x);
1229 subrtx_ptr_iterator::array_type array;
1230 FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL)
1232 rtx *ptr = *iter;
1233 rtx x = *ptr;
1234 /* Don't re-split. */
1235 if (GET_CODE (x) == LO_SUM)
1236 iter.skip_subrtxes ();
1237 else if (small_symbolic_operand (x, Pmode))
1239 *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1240 iter.skip_subrtxes ();
1243 return x;
1246 /* Indicate that INSN cannot be duplicated. This is true for any insn
1247 that we've marked with gpdisp relocs, since those have to stay in
1248 1-1 correspondence with one another.
1250 Technically we could copy them if we could set up a mapping from one
1251 sequence number to another, across the set of insns to be duplicated.
1252 This seems overly complicated and error-prone since interblock motion
1253 from sched-ebb could move one of the pair of insns to a different block.
1255 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1256 then they'll be in a different block from their ldgp. Which could lead
1257 the bb reorder code to think that it would be ok to copy just the block
1258 containing the call and branch to the block containing the ldgp. */
1260 static bool
1261 alpha_cannot_copy_insn_p (rtx_insn *insn)
1263 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1264 return false;
1265 if (recog_memoized (insn) >= 0)
1266 return get_attr_cannot_copy (insn);
1267 else
1268 return false;
1272 /* Try a machine-dependent way of reloading an illegitimate address
1273 operand. If we find one, push the reload and return the new rtx. */
1276 alpha_legitimize_reload_address (rtx x,
1277 machine_mode mode ATTRIBUTE_UNUSED,
1278 int opnum, int type,
1279 int ind_levels ATTRIBUTE_UNUSED)
1281 /* We must recognize output that we have already generated ourselves. */
1282 if (GET_CODE (x) == PLUS
1283 && GET_CODE (XEXP (x, 0)) == PLUS
1284 && REG_P (XEXP (XEXP (x, 0), 0))
1285 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1286 && CONST_INT_P (XEXP (x, 1)))
1288 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1289 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1290 opnum, (enum reload_type) type);
1291 return x;
1294 /* We wish to handle large displacements off a base register by
1295 splitting the addend across an ldah and the mem insn. This
1296 cuts number of extra insns needed from 3 to 1. */
1297 if (GET_CODE (x) == PLUS
1298 && REG_P (XEXP (x, 0))
1299 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1300 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1301 && CONST_INT_P (XEXP (x, 1)))
1303 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1304 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1305 HOST_WIDE_INT high
1306 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1308 /* Check for 32-bit overflow. */
1309 if (high + low != val)
1310 return NULL_RTX;
1312 /* Reload the high part into a base reg; leave the low part
1313 in the mem directly. */
1314 x = gen_rtx_PLUS (GET_MODE (x),
1315 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1316 GEN_INT (high)),
1317 GEN_INT (low));
1319 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1320 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1321 opnum, (enum reload_type) type);
1322 return x;
1325 return NULL_RTX;
1328 /* Return the cost of moving between registers of various classes. Moving
1329 between FLOAT_REGS and anything else except float regs is expensive.
1330 In fact, we make it quite expensive because we really don't want to
1331 do these moves unless it is clearly worth it. Optimizations may
1332 reduce the impact of not being able to allocate a pseudo to a
1333 hard register. */
1335 static int
1336 alpha_register_move_cost (machine_mode /*mode*/,
1337 reg_class_t from, reg_class_t to)
1339 if ((from == FLOAT_REGS) == (to == FLOAT_REGS))
1340 return 2;
1342 if (TARGET_FIX)
1343 return (from == FLOAT_REGS) ? 6 : 8;
1345 return 4 + 2 * alpha_memory_latency;
1348 /* Return the cost of moving data of MODE from a register to
1349 or from memory. On the Alpha, bump this up a bit. */
1351 static int
1352 alpha_memory_move_cost (machine_mode /*mode*/, reg_class_t /*regclass*/,
1353 bool /*in*/)
1355 return 2 * alpha_memory_latency;
1358 /* Compute a (partial) cost for rtx X. Return true if the complete
1359 cost has been computed, and false if subexpressions should be
1360 scanned. In either case, *TOTAL contains the cost result. */
1362 static bool
1363 alpha_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
1364 bool speed)
1366 int code = GET_CODE (x);
1367 bool float_mode_p = FLOAT_MODE_P (mode);
1368 const struct alpha_rtx_cost_data *cost_data;
1370 if (!speed)
1371 cost_data = &alpha_rtx_cost_size;
1372 else
1373 cost_data = &alpha_rtx_cost_data[alpha_tune];
1375 switch (code)
1377 case CONST_INT:
1378 /* If this is an 8-bit constant, return zero since it can be used
1379 nearly anywhere with no cost. If it is a valid operand for an
1380 ADD or AND, likewise return 0 if we know it will be used in that
1381 context. Otherwise, return 2 since it might be used there later.
1382 All other constants take at least two insns. */
1383 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1385 *total = 0;
1386 return true;
1388 /* FALLTHRU */
1390 case CONST_DOUBLE:
1391 case CONST_WIDE_INT:
1392 if (x == CONST0_RTX (mode))
1393 *total = 0;
1394 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1395 || (outer_code == AND && and_operand (x, VOIDmode)))
1396 *total = 0;
1397 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1398 *total = 2;
1399 else
1400 *total = COSTS_N_INSNS (2);
1401 return true;
1403 case CONST:
1404 case SYMBOL_REF:
1405 case LABEL_REF:
1406 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1407 *total = COSTS_N_INSNS (outer_code != MEM);
1408 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1409 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1410 else if (tls_symbolic_operand_type (x))
1411 /* Estimate of cost for call_pal rduniq. */
1412 /* ??? How many insns do we emit here? More than one... */
1413 *total = COSTS_N_INSNS (15);
1414 else
1415 /* Otherwise we do a load from the GOT. */
1416 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1417 return true;
1419 case HIGH:
1420 /* This is effectively an add_operand. */
1421 *total = 2;
1422 return true;
1424 case PLUS:
1425 case MINUS:
1426 if (float_mode_p)
1427 *total = cost_data->fp_add;
1428 else if (GET_CODE (XEXP (x, 0)) == ASHIFT
1429 && const23_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1431 *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode,
1432 (enum rtx_code) outer_code, opno, speed)
1433 + rtx_cost (XEXP (x, 1), mode,
1434 (enum rtx_code) outer_code, opno, speed)
1435 + COSTS_N_INSNS (1));
1436 return true;
1438 return false;
1440 case MULT:
1441 if (float_mode_p)
1442 *total = cost_data->fp_mult;
1443 else if (mode == DImode)
1444 *total = cost_data->int_mult_di;
1445 else
1446 *total = cost_data->int_mult_si;
1447 return false;
1449 case ASHIFT:
1450 if (CONST_INT_P (XEXP (x, 1))
1451 && INTVAL (XEXP (x, 1)) <= 3)
1453 *total = COSTS_N_INSNS (1);
1454 return false;
1456 /* FALLTHRU */
1458 case ASHIFTRT:
1459 case LSHIFTRT:
1460 *total = cost_data->int_shift;
1461 return false;
1463 case IF_THEN_ELSE:
1464 if (float_mode_p)
1465 *total = cost_data->fp_add;
1466 else
1467 *total = cost_data->int_cmov;
1468 return false;
1470 case DIV:
1471 case UDIV:
1472 case MOD:
1473 case UMOD:
1474 if (!float_mode_p)
1475 *total = cost_data->int_div;
1476 else if (mode == SFmode)
1477 *total = cost_data->fp_div_sf;
1478 else
1479 *total = cost_data->fp_div_df;
1480 return false;
1482 case MEM:
1483 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1484 return true;
1486 case NEG:
1487 if (! float_mode_p)
1489 *total = COSTS_N_INSNS (1);
1490 return false;
1492 /* FALLTHRU */
1494 case ABS:
1495 if (! float_mode_p)
1497 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1498 return false;
1500 /* FALLTHRU */
1502 case FLOAT:
1503 case UNSIGNED_FLOAT:
1504 case FIX:
1505 case UNSIGNED_FIX:
1506 case FLOAT_TRUNCATE:
1507 *total = cost_data->fp_add;
1508 return false;
1510 case FLOAT_EXTEND:
1511 if (MEM_P (XEXP (x, 0)))
1512 *total = 0;
1513 else
1514 *total = cost_data->fp_add;
1515 return false;
1517 default:
1518 return false;
1522 /* REF is an alignable memory location. Place an aligned SImode
1523 reference into *PALIGNED_MEM and the number of bits to shift into
1524 *PBITNUM. SCRATCH is a free register for use in reloading out
1525 of range stack slots. */
1527 void
1528 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1530 rtx base;
1531 HOST_WIDE_INT disp, offset;
1533 gcc_assert (MEM_P (ref));
1535 if (reload_in_progress)
1537 base = find_replacement (&XEXP (ref, 0));
1538 gcc_assert (memory_address_p (GET_MODE (ref), base));
1540 else
1541 base = XEXP (ref, 0);
1543 if (GET_CODE (base) == PLUS)
1544 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1545 else
1546 disp = 0;
1548 /* Find the byte offset within an aligned word. If the memory itself is
1549 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1550 will have examined the base register and determined it is aligned, and
1551 thus displacements from it are naturally alignable. */
1552 if (MEM_ALIGN (ref) >= 32)
1553 offset = 0;
1554 else
1555 offset = disp & 3;
1557 /* The location should not cross aligned word boundary. */
1558 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1559 <= GET_MODE_SIZE (SImode));
1561 /* Access the entire aligned word. */
1562 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1564 /* Convert the byte offset within the word to a bit offset. */
1565 offset *= BITS_PER_UNIT;
1566 *pbitnum = GEN_INT (offset);
1569 /* Similar, but just get the address. Handle the two reload cases.
1570 Add EXTRA_OFFSET to the address we return. */
1573 get_unaligned_address (rtx ref)
1575 rtx base;
1576 HOST_WIDE_INT offset = 0;
1578 gcc_assert (MEM_P (ref));
1580 if (reload_in_progress)
1582 base = find_replacement (&XEXP (ref, 0));
1583 gcc_assert (memory_address_p (GET_MODE (ref), base));
1585 else
1586 base = XEXP (ref, 0);
1588 if (GET_CODE (base) == PLUS)
1589 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1591 return plus_constant (Pmode, base, offset);
1594 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1595 X is always returned in a register. */
1598 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1600 if (GET_CODE (addr) == PLUS)
1602 ofs += INTVAL (XEXP (addr, 1));
1603 addr = XEXP (addr, 0);
1606 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1607 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1610 /* On the Alpha, all (non-symbolic) constants except zero go into
1611 a floating-point register via memory. Note that we cannot
1612 return anything that is not a subset of RCLASS, and that some
1613 symbolic constants cannot be dropped to memory. */
1615 enum reg_class
1616 alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1618 /* Zero is present in any register class. */
1619 if (x == CONST0_RTX (GET_MODE (x)))
1620 return rclass;
1622 /* These sorts of constants we can easily drop to memory. */
1623 if (CONST_SCALAR_INT_P (x)
1624 || CONST_DOUBLE_P (x)
1625 || GET_CODE (x) == CONST_VECTOR)
1627 if (rclass == FLOAT_REGS)
1628 return NO_REGS;
1629 if (rclass == ALL_REGS)
1630 return GENERAL_REGS;
1631 return rclass;
1634 /* All other kinds of constants should not (and in the case of HIGH
1635 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1636 secondary reload. */
1637 if (CONSTANT_P (x))
1638 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1640 return rclass;
1643 /* Inform reload about cases where moving X with a mode MODE to a register in
1644 RCLASS requires an extra scratch or immediate register. Return the class
1645 needed for the immediate register. */
1647 static reg_class_t
1648 alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1649 machine_mode mode, secondary_reload_info *sri)
1651 enum reg_class rclass = (enum reg_class) rclass_i;
1653 /* Loading and storing HImode or QImode values to and from memory
1654 usually requires a scratch register. */
1655 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1657 if (any_memory_operand (x, mode))
1659 if (in_p)
1661 if (!aligned_memory_operand (x, mode))
1662 sri->icode = direct_optab_handler (reload_in_optab, mode);
1664 else
1665 sri->icode = direct_optab_handler (reload_out_optab, mode);
1666 return NO_REGS;
1670 /* We also cannot do integral arithmetic into FP regs, as might result
1671 from register elimination into a DImode fp register. */
1672 if (rclass == FLOAT_REGS)
1674 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1675 return GENERAL_REGS;
1676 if (in_p && INTEGRAL_MODE_P (mode)
1677 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1678 return GENERAL_REGS;
1681 return NO_REGS;
1684 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
1686 If we are copying between general and FP registers, we need a memory
1687 location unless the FIX extension is available. */
1689 static bool
1690 alpha_secondary_memory_needed (machine_mode, reg_class_t class1,
1691 reg_class_t class2)
1693 return (!TARGET_FIX
1694 && ((class1 == FLOAT_REGS && class2 != FLOAT_REGS)
1695 || (class2 == FLOAT_REGS && class1 != FLOAT_REGS)));
1698 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. If MODE is
1699 floating-point, use it. Otherwise, widen to a word like the default.
1700 This is needed because we always store integers in FP registers in
1701 quadword format. This whole area is very tricky! */
1703 static machine_mode
1704 alpha_secondary_memory_needed_mode (machine_mode mode)
1706 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1707 return mode;
1708 if (GET_MODE_SIZE (mode) >= 4)
1709 return mode;
1710 return mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (mode), 0).require ();
1713 /* Given SEQ, which is an INSN list, look for any MEMs in either
1714 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1715 volatile flags from REF into each of the MEMs found. If REF is not
1716 a MEM, don't do anything. */
1718 void
1719 alpha_set_memflags (rtx seq, rtx ref)
1721 rtx_insn *insn;
1723 if (!MEM_P (ref))
1724 return;
1726 /* This is only called from alpha.md, after having had something
1727 generated from one of the insn patterns. So if everything is
1728 zero, the pattern is already up-to-date. */
1729 if (!MEM_VOLATILE_P (ref)
1730 && !MEM_NOTRAP_P (ref)
1731 && !MEM_READONLY_P (ref))
1732 return;
1734 subrtx_var_iterator::array_type array;
1735 for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
1736 if (INSN_P (insn))
1737 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
1739 rtx x = *iter;
1740 if (MEM_P (x))
1742 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref);
1743 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref);
1744 MEM_READONLY_P (x) = MEM_READONLY_P (ref);
1745 /* Sadly, we cannot use alias sets because the extra
1746 aliasing produced by the AND interferes. Given that
1747 two-byte quantities are the only thing we would be
1748 able to differentiate anyway, there does not seem to
1749 be any point in convoluting the early out of the
1750 alias check. */
1751 iter.skip_subrtxes ();
1754 else
1755 gcc_unreachable ();
1758 static rtx alpha_emit_set_const (rtx, machine_mode, HOST_WIDE_INT,
1759 int, bool);
1761 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1762 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1763 and return pc_rtx if successful. */
1765 static rtx
1766 alpha_emit_set_const_1 (rtx target, machine_mode mode,
1767 HOST_WIDE_INT c, int n, bool no_output)
1769 HOST_WIDE_INT new_const;
1770 int i, bits;
1771 /* Use a pseudo if highly optimizing and still generating RTL. */
1772 rtx subtarget
1773 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1774 rtx temp, insn;
1776 /* If this is a sign-extended 32-bit constant, we can do this in at most
1777 three insns, so do it if we have enough insns left. */
1779 if (c >> 31 == -1 || c >> 31 == 0)
1781 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1782 HOST_WIDE_INT tmp1 = c - low;
1783 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1784 HOST_WIDE_INT extra = 0;
1786 /* If HIGH will be interpreted as negative but the constant is
1787 positive, we must adjust it to do two ldha insns. */
1789 if ((high & 0x8000) != 0 && c >= 0)
1791 extra = 0x4000;
1792 tmp1 -= 0x40000000;
1793 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1796 if (c == low || (low == 0 && extra == 0))
1798 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1799 but that meant that we can't handle INT_MIN on 32-bit machines
1800 (like NT/Alpha), because we recurse indefinitely through
1801 emit_move_insn to gen_movdi. So instead, since we know exactly
1802 what we want, create it explicitly. */
1804 if (no_output)
1805 return pc_rtx;
1806 if (target == NULL)
1807 target = gen_reg_rtx (mode);
1808 emit_insn (gen_rtx_SET (target, GEN_INT (c)));
1809 return target;
1811 else if (n >= 2 + (extra != 0))
1813 if (no_output)
1814 return pc_rtx;
1815 if (!can_create_pseudo_p ())
1817 emit_insn (gen_rtx_SET (target, GEN_INT (high << 16)));
1818 temp = target;
1820 else
1821 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1822 subtarget, mode);
1824 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1825 This means that if we go through expand_binop, we'll try to
1826 generate extensions, etc, which will require new pseudos, which
1827 will fail during some split phases. The SImode add patterns
1828 still exist, but are not named. So build the insns by hand. */
1830 if (extra != 0)
1832 if (! subtarget)
1833 subtarget = gen_reg_rtx (mode);
1834 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1835 insn = gen_rtx_SET (subtarget, insn);
1836 emit_insn (insn);
1837 temp = subtarget;
1840 if (target == NULL)
1841 target = gen_reg_rtx (mode);
1842 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1843 insn = gen_rtx_SET (target, insn);
1844 emit_insn (insn);
1845 return target;
1849 /* If we couldn't do it that way, try some other methods. But if we have
1850 no instructions left, don't bother. Likewise, if this is SImode and
1851 we can't make pseudos, we can't do anything since the expand_binop
1852 and expand_unop calls will widen and try to make pseudos. */
1854 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1855 return 0;
1857 /* Next, see if we can load a related constant and then shift and possibly
1858 negate it to get the constant we want. Try this once each increasing
1859 numbers of insns. */
1861 for (i = 1; i < n; i++)
1863 /* First, see if minus some low bits, we've an easy load of
1864 high bits. */
1866 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1867 if (new_const != 0)
1869 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1870 if (temp)
1872 if (no_output)
1873 return temp;
1874 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1875 target, 0, OPTAB_WIDEN);
1879 /* Next try complementing. */
1880 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1881 if (temp)
1883 if (no_output)
1884 return temp;
1885 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1888 /* Next try to form a constant and do a left shift. We can do this
1889 if some low-order bits are zero; the exact_log2 call below tells
1890 us that information. The bits we are shifting out could be any
1891 value, but here we'll just try the 0- and sign-extended forms of
1892 the constant. To try to increase the chance of having the same
1893 constant in more than one insn, start at the highest number of
1894 bits to shift, but try all possibilities in case a ZAPNOT will
1895 be useful. */
1897 bits = exact_log2 (c & -c);
1898 if (bits > 0)
1899 for (; bits > 0; bits--)
1901 new_const = c >> bits;
1902 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1903 if (!temp && c < 0)
1905 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1906 temp = alpha_emit_set_const (subtarget, mode, new_const,
1907 i, no_output);
1909 if (temp)
1911 if (no_output)
1912 return temp;
1913 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1914 target, 0, OPTAB_WIDEN);
1918 /* Now try high-order zero bits. Here we try the shifted-in bits as
1919 all zero and all ones. Be careful to avoid shifting outside the
1920 mode and to avoid shifting outside the host wide int size. */
1922 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1923 - floor_log2 (c) - 1);
1924 if (bits > 0)
1925 for (; bits > 0; bits--)
1927 new_const = c << bits;
1928 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1929 if (!temp)
1931 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1932 temp = alpha_emit_set_const (subtarget, mode, new_const,
1933 i, no_output);
1935 if (temp)
1937 if (no_output)
1938 return temp;
1939 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1940 target, 1, OPTAB_WIDEN);
1944 /* Now try high-order 1 bits. We get that with a sign-extension.
1945 But one bit isn't enough here. Be careful to avoid shifting outside
1946 the mode and to avoid shifting outside the host wide int size. */
1948 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1949 - floor_log2 (~ c) - 2);
1950 if (bits > 0)
1951 for (; bits > 0; bits--)
1953 new_const = c << bits;
1954 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1955 if (!temp)
1957 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1958 temp = alpha_emit_set_const (subtarget, mode, new_const,
1959 i, no_output);
1961 if (temp)
1963 if (no_output)
1964 return temp;
1965 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1966 target, 0, OPTAB_WIDEN);
1971 /* Finally, see if can load a value into the target that is the same as the
1972 constant except that all bytes that are 0 are changed to be 0xff. If we
1973 can, then we can do a ZAPNOT to obtain the desired constant. */
1975 new_const = c;
1976 for (i = 0; i < 64; i += 8)
1977 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1978 new_const |= (HOST_WIDE_INT) 0xff << i;
1980 /* We are only called for SImode and DImode. If this is SImode, ensure that
1981 we are sign extended to a full word. */
1983 if (mode == SImode)
1984 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1986 if (new_const != c)
1988 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1989 if (temp)
1991 if (no_output)
1992 return temp;
1993 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1994 target, 0, OPTAB_WIDEN);
1998 return 0;
2001 /* Try to output insns to set TARGET equal to the constant C if it can be
2002 done in less than N insns. Do all computations in MODE. Returns the place
2003 where the output has been placed if it can be done and the insns have been
2004 emitted. If it would take more than N insns, zero is returned and no
2005 insns and emitted. */
2007 static rtx
2008 alpha_emit_set_const (rtx target, machine_mode mode,
2009 HOST_WIDE_INT c, int n, bool no_output)
2011 machine_mode orig_mode = mode;
2012 rtx orig_target = target;
2013 rtx result = 0;
2014 int i;
2016 /* If we can't make any pseudos, TARGET is an SImode hard register, we
2017 can't load this constant in one insn, do this in DImode. */
2018 if (!can_create_pseudo_p () && mode == SImode
2019 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
2021 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
2022 if (result)
2023 return result;
2025 target = no_output ? NULL : gen_lowpart (DImode, target);
2026 mode = DImode;
2028 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
2030 target = no_output ? NULL : gen_lowpart (DImode, target);
2031 mode = DImode;
2034 /* Try 1 insn, then 2, then up to N. */
2035 for (i = 1; i <= n; i++)
2037 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
2038 if (result)
2040 rtx_insn *insn;
2041 rtx set;
2043 if (no_output)
2044 return result;
2046 insn = get_last_insn ();
2047 set = single_set (insn);
2048 if (! CONSTANT_P (SET_SRC (set)))
2049 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
2050 break;
2054 /* Allow for the case where we changed the mode of TARGET. */
2055 if (result)
2057 if (result == target)
2058 result = orig_target;
2059 else if (mode != orig_mode)
2060 result = gen_lowpart (orig_mode, result);
2063 return result;
2066 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
2067 fall back to a straight forward decomposition. We do this to avoid
2068 exponential run times encountered when looking for longer sequences
2069 with alpha_emit_set_const. */
2071 static rtx
2072 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1)
2074 HOST_WIDE_INT d1, d2, d3, d4;
2075 machine_mode mode = GET_MODE (target);
2076 rtx orig_target = target;
2078 /* Decompose the entire word */
2080 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2081 c1 -= d1;
2082 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2083 c1 = (c1 - d2) >> 32;
2084 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2085 c1 -= d3;
2086 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2087 gcc_assert (c1 == d4);
2089 if (mode != DImode)
2090 target = gen_lowpart (DImode, target);
2092 /* Construct the high word */
2093 if (d4)
2095 emit_move_insn (target, GEN_INT (d4));
2096 if (d3)
2097 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2099 else
2100 emit_move_insn (target, GEN_INT (d3));
2102 /* Shift it into place */
2103 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2105 /* Add in the low bits. */
2106 if (d2)
2107 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2108 if (d1)
2109 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2111 return orig_target;
2114 /* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits. */
2116 static HOST_WIDE_INT
2117 alpha_extract_integer (rtx x)
2119 if (GET_CODE (x) == CONST_VECTOR)
2120 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2122 gcc_assert (CONST_INT_P (x));
2124 return INTVAL (x);
2127 /* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which
2128 we are willing to load the value into a register via a move pattern.
2129 Normally this is all symbolic constants, integral constants that
2130 take three or fewer instructions, and floating-point zero. */
2132 bool
2133 alpha_legitimate_constant_p (machine_mode mode, rtx x)
2135 HOST_WIDE_INT i0;
2137 switch (GET_CODE (x))
2139 case LABEL_REF:
2140 case HIGH:
2141 return true;
2143 case CONST:
2144 if (GET_CODE (XEXP (x, 0)) == PLUS
2145 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2146 x = XEXP (XEXP (x, 0), 0);
2147 else
2148 return true;
2150 if (GET_CODE (x) != SYMBOL_REF)
2151 return true;
2152 /* FALLTHRU */
2154 case SYMBOL_REF:
2155 /* TLS symbols are never valid. */
2156 return SYMBOL_REF_TLS_MODEL (x) == 0;
2158 case CONST_WIDE_INT:
2159 if (TARGET_BUILD_CONSTANTS)
2160 return true;
2161 if (x == CONST0_RTX (mode))
2162 return true;
2163 mode = DImode;
2164 gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2);
2165 i0 = CONST_WIDE_INT_ELT (x, 1);
2166 if (alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL)
2167 return false;
2168 i0 = CONST_WIDE_INT_ELT (x, 0);
2169 goto do_integer;
2171 case CONST_DOUBLE:
2172 if (x == CONST0_RTX (mode))
2173 return true;
2174 return false;
2176 case CONST_VECTOR:
2177 if (x == CONST0_RTX (mode))
2178 return true;
2179 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2180 return false;
2181 if (GET_MODE_SIZE (mode) != 8)
2182 return false;
2183 /* FALLTHRU */
2185 case CONST_INT:
2186 if (TARGET_BUILD_CONSTANTS)
2187 return true;
2188 i0 = alpha_extract_integer (x);
2189 do_integer:
2190 return alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL;
2192 default:
2193 return false;
2197 /* Operand 1 is known to be a constant, and should require more than one
2198 instruction to load. Emit that multi-part load. */
2200 bool
2201 alpha_split_const_mov (machine_mode mode, rtx *operands)
2203 HOST_WIDE_INT i0;
2204 rtx temp = NULL_RTX;
2206 i0 = alpha_extract_integer (operands[1]);
2208 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2210 if (!temp && TARGET_BUILD_CONSTANTS)
2211 temp = alpha_emit_set_long_const (operands[0], i0);
2213 if (temp)
2215 if (!rtx_equal_p (operands[0], temp))
2216 emit_move_insn (operands[0], temp);
2217 return true;
2220 return false;
2223 /* Expand a move instruction; return true if all work is done.
2224 We don't handle non-bwx subword loads here. */
2226 bool
2227 alpha_expand_mov (machine_mode mode, rtx *operands)
2229 rtx tmp;
2231 /* If the output is not a register, the input must be. */
2232 if (MEM_P (operands[0])
2233 && ! reg_or_0_operand (operands[1], mode))
2234 operands[1] = force_reg (mode, operands[1]);
2236 /* Allow legitimize_address to perform some simplifications. */
2237 if (mode == Pmode && symbolic_operand (operands[1], mode))
2239 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2240 if (tmp)
2242 if (tmp == operands[0])
2243 return true;
2244 operands[1] = tmp;
2245 return false;
2249 /* Early out for non-constants and valid constants. */
2250 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2251 return false;
2253 /* Split large integers. */
2254 if (CONST_INT_P (operands[1])
2255 || GET_CODE (operands[1]) == CONST_VECTOR)
2257 if (alpha_split_const_mov (mode, operands))
2258 return true;
2261 /* Otherwise we've nothing left but to drop the thing to memory. */
2262 tmp = force_const_mem (mode, operands[1]);
2264 if (tmp == NULL_RTX)
2265 return false;
2267 if (reload_in_progress)
2269 emit_move_insn (operands[0], XEXP (tmp, 0));
2270 operands[1] = replace_equiv_address (tmp, operands[0]);
2272 else
2273 operands[1] = validize_mem (tmp);
2274 return false;
2277 /* Expand a non-bwx QImode or HImode move instruction;
2278 return true if all work is done. */
2280 bool
2281 alpha_expand_mov_nobwx (machine_mode mode, rtx *operands)
2283 rtx seq;
2285 /* If the output is not a register, the input must be. */
2286 if (MEM_P (operands[0]))
2287 operands[1] = force_reg (mode, operands[1]);
2289 /* Handle four memory cases, unaligned and aligned for either the input
2290 or the output. The only case where we can be called during reload is
2291 for aligned loads; all other cases require temporaries. */
2293 if (any_memory_operand (operands[1], mode))
2295 if (aligned_memory_operand (operands[1], mode))
2297 if (reload_in_progress)
2299 seq = gen_reload_in_aligned (mode, operands[0], operands[1]);
2300 emit_insn (seq);
2302 else
2304 rtx aligned_mem, bitnum;
2305 rtx scratch = gen_reg_rtx (SImode);
2306 rtx subtarget;
2307 bool copyout;
2309 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2311 subtarget = operands[0];
2312 if (REG_P (subtarget))
2313 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2314 else
2315 subtarget = gen_reg_rtx (DImode), copyout = true;
2317 if (mode == QImode)
2318 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2319 bitnum, scratch);
2320 else
2321 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2322 bitnum, scratch);
2323 emit_insn (seq);
2325 if (copyout)
2326 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2329 else
2331 /* Don't pass these as parameters since that makes the generated
2332 code depend on parameter evaluation order which will cause
2333 bootstrap failures. */
2335 rtx temp1, temp2, subtarget, ua;
2336 bool copyout;
2338 temp1 = gen_reg_rtx (DImode);
2339 temp2 = gen_reg_rtx (DImode);
2341 subtarget = operands[0];
2342 if (REG_P (subtarget))
2343 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2344 else
2345 subtarget = gen_reg_rtx (DImode), copyout = true;
2347 ua = get_unaligned_address (operands[1]);
2348 if (mode == QImode)
2349 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2350 else
2351 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2353 alpha_set_memflags (seq, operands[1]);
2354 emit_insn (seq);
2356 if (copyout)
2357 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2359 return true;
2362 if (any_memory_operand (operands[0], mode))
2364 if (aligned_memory_operand (operands[0], mode))
2366 rtx aligned_mem, bitnum;
2367 rtx temp1 = gen_reg_rtx (SImode);
2368 rtx temp2 = gen_reg_rtx (SImode);
2370 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2372 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2373 temp1, temp2));
2375 else
2377 rtx temp1 = gen_reg_rtx (DImode);
2378 rtx temp2 = gen_reg_rtx (DImode);
2379 rtx temp3 = gen_reg_rtx (DImode);
2380 rtx ua = get_unaligned_address (operands[0]);
2382 seq = gen_unaligned_store
2383 (mode, ua, operands[1], temp1, temp2, temp3);
2385 alpha_set_memflags (seq, operands[0]);
2386 emit_insn (seq);
2388 return true;
2391 return false;
2394 /* Implement the movmisalign patterns. One of the operands is a memory
2395 that is not naturally aligned. Emit instructions to load it. */
2397 void
2398 alpha_expand_movmisalign (machine_mode mode, rtx *operands)
2400 /* Honor misaligned loads, for those we promised to do so. */
2401 if (MEM_P (operands[1]))
2403 rtx tmp;
2405 if (register_operand (operands[0], mode))
2406 tmp = operands[0];
2407 else
2408 tmp = gen_reg_rtx (mode);
2410 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2411 if (tmp != operands[0])
2412 emit_move_insn (operands[0], tmp);
2414 else if (MEM_P (operands[0]))
2416 if (!reg_or_0_operand (operands[1], mode))
2417 operands[1] = force_reg (mode, operands[1]);
2418 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2420 else
2421 gcc_unreachable ();
2424 /* Generate an unsigned DImode to FP conversion. This is the same code
2425 optabs would emit if we didn't have TFmode patterns.
2427 For SFmode, this is the only construction I've found that can pass
2428 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2429 intermediates will work, because you'll get intermediate rounding
2430 that ruins the end result. Some of this could be fixed by turning
2431 on round-to-positive-infinity, but that requires diddling the fpsr,
2432 which kills performance. I tried turning this around and converting
2433 to a negative number, so that I could turn on /m, but either I did
2434 it wrong or there's something else cause I wound up with the exact
2435 same single-bit error. There is a branch-less form of this same code:
2437 srl $16,1,$1
2438 and $16,1,$2
2439 cmplt $16,0,$3
2440 or $1,$2,$2
2441 cmovge $16,$16,$2
2442 itoft $3,$f10
2443 itoft $2,$f11
2444 cvtqs $f11,$f11
2445 adds $f11,$f11,$f0
2446 fcmoveq $f10,$f11,$f0
2448 I'm not using it because it's the same number of instructions as
2449 this branch-full form, and it has more serialized long latency
2450 instructions on the critical path.
2452 For DFmode, we can avoid rounding errors by breaking up the word
2453 into two pieces, converting them separately, and adding them back:
2455 LC0: .long 0,0x5f800000
2457 itoft $16,$f11
2458 lda $2,LC0
2459 cmplt $16,0,$1
2460 cpyse $f11,$f31,$f10
2461 cpyse $f31,$f11,$f11
2462 s4addq $1,$2,$1
2463 lds $f12,0($1)
2464 cvtqt $f10,$f10
2465 cvtqt $f11,$f11
2466 addt $f12,$f10,$f0
2467 addt $f0,$f11,$f0
2469 This doesn't seem to be a clear-cut win over the optabs form.
2470 It probably all depends on the distribution of numbers being
2471 converted -- in the optabs form, all but high-bit-set has a
2472 much lower minimum execution time. */
2474 void
2475 alpha_emit_floatuns (rtx operands[2])
2477 rtx neglab, donelab, i0, i1, f0, in, out;
2478 machine_mode mode;
2480 out = operands[0];
2481 in = force_reg (DImode, operands[1]);
2482 mode = GET_MODE (out);
2483 neglab = gen_label_rtx ();
2484 donelab = gen_label_rtx ();
2485 i0 = gen_reg_rtx (DImode);
2486 i1 = gen_reg_rtx (DImode);
2487 f0 = gen_reg_rtx (mode);
2489 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2491 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
2492 emit_jump_insn (gen_jump (donelab));
2493 emit_barrier ();
2495 emit_label (neglab);
2497 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2498 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2499 emit_insn (gen_iordi3 (i0, i0, i1));
2500 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
2501 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
2503 emit_label (donelab);
2506 /* Generate the comparison for a conditional branch. */
2508 void
2509 alpha_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
2511 enum rtx_code cmp_code, branch_code;
2512 machine_mode branch_mode = VOIDmode;
2513 enum rtx_code code = GET_CODE (operands[0]);
2514 rtx op0 = operands[1], op1 = operands[2];
2515 rtx tem;
2517 if (cmp_mode == TFmode)
2519 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2520 op1 = const0_rtx;
2521 cmp_mode = DImode;
2524 /* The general case: fold the comparison code to the types of compares
2525 that we have, choosing the branch as necessary. */
2526 switch (code)
2528 case EQ: case LE: case LT: case LEU: case LTU:
2529 case UNORDERED:
2530 /* We have these compares. */
2531 cmp_code = code, branch_code = NE;
2532 break;
2534 case NE:
2535 case ORDERED:
2536 /* These must be reversed. */
2537 cmp_code = reverse_condition (code), branch_code = EQ;
2538 break;
2540 case GE: case GT: case GEU: case GTU:
2541 /* For FP, we swap them, for INT, we reverse them. */
2542 if (cmp_mode == DFmode)
2544 cmp_code = swap_condition (code);
2545 branch_code = NE;
2546 std::swap (op0, op1);
2548 else
2550 cmp_code = reverse_condition (code);
2551 branch_code = EQ;
2553 break;
2555 default:
2556 gcc_unreachable ();
2559 if (cmp_mode == DFmode)
2561 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2563 /* When we are not as concerned about non-finite values, and we
2564 are comparing against zero, we can branch directly. */
2565 if (op1 == CONST0_RTX (DFmode))
2566 cmp_code = UNKNOWN, branch_code = code;
2567 else if (op0 == CONST0_RTX (DFmode))
2569 /* Undo the swap we probably did just above. */
2570 std::swap (op0, op1);
2571 branch_code = swap_condition (cmp_code);
2572 cmp_code = UNKNOWN;
2575 else
2577 /* ??? We mark the branch mode to be CCmode to prevent the
2578 compare and branch from being combined, since the compare
2579 insn follows IEEE rules that the branch does not. */
2580 branch_mode = CCmode;
2583 else
2585 /* The following optimizations are only for signed compares. */
2586 if (code != LEU && code != LTU && code != GEU && code != GTU)
2588 /* Whee. Compare and branch against 0 directly. */
2589 if (op1 == const0_rtx)
2590 cmp_code = UNKNOWN, branch_code = code;
2592 /* If the constants doesn't fit into an immediate, but can
2593 be generated by lda/ldah, we adjust the argument and
2594 compare against zero, so we can use beq/bne directly. */
2595 /* ??? Don't do this when comparing against symbols, otherwise
2596 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2597 be declared false out of hand (at least for non-weak). */
2598 else if (CONST_INT_P (op1)
2599 && (code == EQ || code == NE)
2600 && !(symbolic_operand (op0, VOIDmode)
2601 || (REG_P (op0) && REG_POINTER (op0))))
2603 rtx n_op1 = GEN_INT (-INTVAL (op1));
2605 if (! satisfies_constraint_I (op1)
2606 && (satisfies_constraint_K (n_op1)
2607 || satisfies_constraint_L (n_op1)))
2608 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2612 if (!reg_or_0_operand (op0, DImode))
2613 op0 = force_reg (DImode, op0);
2614 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2615 op1 = force_reg (DImode, op1);
2618 /* Emit an initial compare instruction, if necessary. */
2619 tem = op0;
2620 if (cmp_code != UNKNOWN)
2622 tem = gen_reg_rtx (cmp_mode);
2623 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2626 /* Emit the branch instruction. */
2627 tem = gen_rtx_SET (pc_rtx,
2628 gen_rtx_IF_THEN_ELSE (VOIDmode,
2629 gen_rtx_fmt_ee (branch_code,
2630 branch_mode, tem,
2631 CONST0_RTX (cmp_mode)),
2632 gen_rtx_LABEL_REF (VOIDmode,
2633 operands[3]),
2634 pc_rtx));
2635 emit_jump_insn (tem);
2638 /* Certain simplifications can be done to make invalid setcc operations
2639 valid. Return the final comparison, or NULL if we can't work. */
2641 bool
2642 alpha_emit_setcc (rtx operands[], machine_mode cmp_mode)
2644 enum rtx_code cmp_code;
2645 enum rtx_code code = GET_CODE (operands[1]);
2646 rtx op0 = operands[2], op1 = operands[3];
2647 rtx tmp;
2649 if (cmp_mode == TFmode)
2651 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2652 op1 = const0_rtx;
2653 cmp_mode = DImode;
2656 if (cmp_mode == DFmode && !TARGET_FIX)
2657 return 0;
2659 /* The general case: fold the comparison code to the types of compares
2660 that we have, choosing the branch as necessary. */
2662 cmp_code = UNKNOWN;
2663 switch (code)
2665 case EQ: case LE: case LT: case LEU: case LTU:
2666 case UNORDERED:
2667 /* We have these compares. */
2668 if (cmp_mode == DFmode)
2669 cmp_code = code, code = NE;
2670 break;
2672 case NE:
2673 if (cmp_mode == DImode && op1 == const0_rtx)
2674 break;
2675 /* FALLTHRU */
2677 case ORDERED:
2678 cmp_code = reverse_condition (code);
2679 code = EQ;
2680 break;
2682 case GE: case GT: case GEU: case GTU:
2683 /* These normally need swapping, but for integer zero we have
2684 special patterns that recognize swapped operands. */
2685 if (cmp_mode == DImode && op1 == const0_rtx)
2686 break;
2687 code = swap_condition (code);
2688 if (cmp_mode == DFmode)
2689 cmp_code = code, code = NE;
2690 std::swap (op0, op1);
2691 break;
2693 default:
2694 gcc_unreachable ();
2697 if (cmp_mode == DImode)
2699 if (!register_operand (op0, DImode))
2700 op0 = force_reg (DImode, op0);
2701 if (!reg_or_8bit_operand (op1, DImode))
2702 op1 = force_reg (DImode, op1);
2705 /* Emit an initial compare instruction, if necessary. */
2706 if (cmp_code != UNKNOWN)
2708 tmp = gen_reg_rtx (cmp_mode);
2709 emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2710 op0, op1)));
2712 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2713 op1 = const0_rtx;
2716 /* Emit the setcc instruction. */
2717 emit_insn (gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode,
2718 op0, op1)));
2719 return true;
2723 /* Rewrite a comparison against zero CMP of the form
2724 (CODE (cc0) (const_int 0)) so it can be written validly in
2725 a conditional move (if_then_else CMP ...).
2726 If both of the operands that set cc0 are nonzero we must emit
2727 an insn to perform the compare (it can't be done within
2728 the conditional move). */
2731 alpha_emit_conditional_move (rtx cmp, machine_mode mode)
2733 enum rtx_code code = GET_CODE (cmp);
2734 enum rtx_code cmov_code = NE;
2735 rtx op0 = XEXP (cmp, 0);
2736 rtx op1 = XEXP (cmp, 1);
2737 machine_mode cmp_mode
2738 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2739 machine_mode cmov_mode = VOIDmode;
2740 int local_fast_math = flag_unsafe_math_optimizations;
2741 rtx tem;
2743 if (cmp_mode == TFmode)
2745 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2746 op1 = const0_rtx;
2747 cmp_mode = DImode;
2750 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2752 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2754 enum rtx_code cmp_code;
2756 if (! TARGET_FIX)
2757 return 0;
2759 /* If we have fp<->int register move instructions, do a cmov by
2760 performing the comparison in fp registers, and move the
2761 zero/nonzero value to integer registers, where we can then
2762 use a normal cmov, or vice-versa. */
2764 switch (code)
2766 case EQ: case LE: case LT: case LEU: case LTU:
2767 case UNORDERED:
2768 /* We have these compares. */
2769 cmp_code = code, code = NE;
2770 break;
2772 case NE:
2773 case ORDERED:
2774 /* These must be reversed. */
2775 cmp_code = reverse_condition (code), code = EQ;
2776 break;
2778 case GE: case GT: case GEU: case GTU:
2779 /* These normally need swapping, but for integer zero we have
2780 special patterns that recognize swapped operands. */
2781 if (cmp_mode == DImode && op1 == const0_rtx)
2782 cmp_code = code, code = NE;
2783 else
2785 cmp_code = swap_condition (code);
2786 code = NE;
2787 std::swap (op0, op1);
2789 break;
2791 default:
2792 gcc_unreachable ();
2795 if (cmp_mode == DImode)
2797 if (!reg_or_0_operand (op0, DImode))
2798 op0 = force_reg (DImode, op0);
2799 if (!reg_or_8bit_operand (op1, DImode))
2800 op1 = force_reg (DImode, op1);
2803 tem = gen_reg_rtx (cmp_mode);
2804 emit_insn (gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2805 op0, op1)));
2807 cmp_mode = cmp_mode == DImode ? E_DFmode : E_DImode;
2808 op0 = gen_lowpart (cmp_mode, tem);
2809 op1 = CONST0_RTX (cmp_mode);
2810 cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2811 local_fast_math = 1;
2814 if (cmp_mode == DImode)
2816 if (!reg_or_0_operand (op0, DImode))
2817 op0 = force_reg (DImode, op0);
2818 if (!reg_or_8bit_operand (op1, DImode))
2819 op1 = force_reg (DImode, op1);
2822 /* We may be able to use a conditional move directly.
2823 This avoids emitting spurious compares. */
2824 if (signed_comparison_operator (cmp, VOIDmode)
2825 && (cmp_mode == DImode || local_fast_math)
2826 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2827 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2829 /* We can't put the comparison inside the conditional move;
2830 emit a compare instruction and put that inside the
2831 conditional move. Make sure we emit only comparisons we have;
2832 swap or reverse as necessary. */
2834 if (!can_create_pseudo_p ())
2835 return NULL_RTX;
2837 switch (code)
2839 case EQ: case LE: case LT: case LEU: case LTU:
2840 case UNORDERED:
2841 /* We have these compares: */
2842 break;
2844 case NE:
2845 case ORDERED:
2846 /* These must be reversed. */
2847 code = reverse_condition (code);
2848 cmov_code = EQ;
2849 break;
2851 case GE: case GT: case GEU: case GTU:
2852 /* These normally need swapping, but for integer zero we have
2853 special patterns that recognize swapped operands. */
2854 if (cmp_mode == DImode && op1 == const0_rtx)
2855 break;
2856 code = swap_condition (code);
2857 std::swap (op0, op1);
2858 break;
2860 default:
2861 gcc_unreachable ();
2864 if (cmp_mode == DImode)
2866 if (!reg_or_0_operand (op0, DImode))
2867 op0 = force_reg (DImode, op0);
2868 if (!reg_or_8bit_operand (op1, DImode))
2869 op1 = force_reg (DImode, op1);
2872 /* ??? We mark the branch mode to be CCmode to prevent the compare
2873 and cmov from being combined, since the compare insn follows IEEE
2874 rules that the cmov does not. */
2875 if (cmp_mode == DFmode && !local_fast_math)
2876 cmov_mode = CCmode;
2878 tem = gen_reg_rtx (cmp_mode);
2879 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2880 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2883 /* Simplify a conditional move of two constants into a setcc with
2884 arithmetic. This is done with a splitter since combine would
2885 just undo the work if done during code generation. It also catches
2886 cases we wouldn't have before cse. */
2889 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2890 rtx t_rtx, rtx f_rtx)
2892 HOST_WIDE_INT t, f, diff;
2893 machine_mode mode;
2894 rtx target, subtarget, tmp;
2896 mode = GET_MODE (dest);
2897 t = INTVAL (t_rtx);
2898 f = INTVAL (f_rtx);
2899 diff = t - f;
2901 if (((code == NE || code == EQ) && diff < 0)
2902 || (code == GE || code == GT))
2904 code = reverse_condition (code);
2905 std::swap (t, f);
2906 diff = -diff;
2909 subtarget = target = dest;
2910 if (mode != DImode)
2912 target = gen_lowpart (DImode, dest);
2913 if (can_create_pseudo_p ())
2914 subtarget = gen_reg_rtx (DImode);
2915 else
2916 subtarget = target;
2918 /* Below, we must be careful to use copy_rtx on target and subtarget
2919 in intermediate insns, as they may be a subreg rtx, which may not
2920 be shared. */
2922 if (f == 0 && exact_log2 (diff) > 0
2923 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2924 viable over a longer latency cmove. On EV5, the E0 slot is a
2925 scarce resource, and on EV4 shift has the same latency as a cmove. */
2926 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2928 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2929 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2931 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2932 GEN_INT (exact_log2 (t)));
2933 emit_insn (gen_rtx_SET (target, tmp));
2935 else if (f == 0 && t == -1)
2937 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2938 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2940 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2942 else if (diff == 1 || diff == 4 || diff == 8)
2944 rtx add_op;
2946 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2947 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2949 if (diff == 1)
2950 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2951 else
2953 add_op = GEN_INT (f);
2954 if (sext_add_operand (add_op, mode))
2956 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2957 GEN_INT (exact_log2 (diff)));
2958 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2959 emit_insn (gen_rtx_SET (target, tmp));
2961 else
2962 return 0;
2965 else
2966 return 0;
2968 return 1;
2971 /* Look up the function X_floating library function name for the
2972 given operation. */
2974 struct GTY(()) xfloating_op
2976 const enum rtx_code code;
2977 const char *const GTY((skip)) osf_func;
2978 const char *const GTY((skip)) vms_func;
2979 rtx libcall;
2982 static GTY(()) struct xfloating_op xfloating_ops[] =
2984 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2985 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2986 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2987 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2988 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2989 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2990 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2991 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2992 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2993 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2994 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2995 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2996 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2997 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2998 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
3001 static GTY(()) struct xfloating_op vax_cvt_ops[] =
3003 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
3004 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
3007 static rtx
3008 alpha_lookup_xfloating_lib_func (enum rtx_code code)
3010 struct xfloating_op *ops = xfloating_ops;
3011 long n = ARRAY_SIZE (xfloating_ops);
3012 long i;
3014 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
3016 /* How irritating. Nothing to key off for the main table. */
3017 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
3019 ops = vax_cvt_ops;
3020 n = ARRAY_SIZE (vax_cvt_ops);
3023 for (i = 0; i < n; ++i, ++ops)
3024 if (ops->code == code)
3026 rtx func = ops->libcall;
3027 if (!func)
3029 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
3030 ? ops->vms_func : ops->osf_func);
3031 ops->libcall = func;
3033 return func;
3036 gcc_unreachable ();
3039 /* Most X_floating operations take the rounding mode as an argument.
3040 Compute that here. */
3042 static int
3043 alpha_compute_xfloating_mode_arg (enum rtx_code code,
3044 enum alpha_fp_rounding_mode round)
3046 int mode;
3048 switch (round)
3050 case ALPHA_FPRM_NORM:
3051 mode = 2;
3052 break;
3053 case ALPHA_FPRM_MINF:
3054 mode = 1;
3055 break;
3056 case ALPHA_FPRM_CHOP:
3057 mode = 0;
3058 break;
3059 case ALPHA_FPRM_DYN:
3060 mode = 4;
3061 break;
3062 default:
3063 gcc_unreachable ();
3065 /* XXX For reference, round to +inf is mode = 3. */
3068 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
3069 mode |= 0x10000;
3071 return mode;
3074 /* Emit an X_floating library function call.
3076 Note that these functions do not follow normal calling conventions:
3077 TFmode arguments are passed in two integer registers (as opposed to
3078 indirect); TFmode return values appear in R16+R17.
3080 FUNC is the function to call.
3081 TARGET is where the output belongs.
3082 OPERANDS are the inputs.
3083 NOPERANDS is the count of inputs.
3084 EQUIV is the expression equivalent for the function.
3087 static void
3088 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
3089 int noperands, rtx equiv)
3091 rtx usage = NULL_RTX, reg;
3092 int regno = 16, i;
3094 start_sequence ();
3096 for (i = 0; i < noperands; ++i)
3098 switch (GET_MODE (operands[i]))
3100 case E_TFmode:
3101 reg = gen_rtx_REG (TFmode, regno);
3102 regno += 2;
3103 break;
3105 case E_DFmode:
3106 reg = gen_rtx_REG (DFmode, regno + 32);
3107 regno += 1;
3108 break;
3110 case E_VOIDmode:
3111 gcc_assert (CONST_INT_P (operands[i]));
3112 /* FALLTHRU */
3113 case E_DImode:
3114 reg = gen_rtx_REG (DImode, regno);
3115 regno += 1;
3116 break;
3118 default:
3119 gcc_unreachable ();
3122 emit_move_insn (reg, operands[i]);
3123 use_reg (&usage, reg);
3126 switch (GET_MODE (target))
3128 case E_TFmode:
3129 reg = gen_rtx_REG (TFmode, 16);
3130 break;
3131 case E_DFmode:
3132 reg = gen_rtx_REG (DFmode, 32);
3133 break;
3134 case E_DImode:
3135 reg = gen_rtx_REG (DImode, 0);
3136 break;
3137 default:
3138 gcc_unreachable ();
3141 rtx mem = gen_rtx_MEM (QImode, func);
3142 rtx_insn *tmp = emit_call_insn (gen_call_value (reg, mem, const0_rtx,
3143 const0_rtx, const0_rtx));
3144 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3145 RTL_CONST_CALL_P (tmp) = 1;
3147 tmp = get_insns ();
3148 end_sequence ();
3150 emit_libcall_block (tmp, target, reg, equiv);
3153 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
3155 void
3156 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3158 rtx func;
3159 int mode;
3160 rtx out_operands[3];
3162 func = alpha_lookup_xfloating_lib_func (code);
3163 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3165 out_operands[0] = operands[1];
3166 out_operands[1] = operands[2];
3167 out_operands[2] = GEN_INT (mode);
3168 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3169 gen_rtx_fmt_ee (code, TFmode, operands[1],
3170 operands[2]));
3173 /* Emit an X_floating library function call for a comparison. */
3175 static rtx
3176 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3178 enum rtx_code cmp_code, res_code;
3179 rtx func, out, operands[2], note;
3181 /* X_floating library comparison functions return
3182 -1 unordered
3183 0 false
3184 1 true
3185 Convert the compare against the raw return value. */
3187 cmp_code = *pcode;
3188 switch (cmp_code)
3190 case UNORDERED:
3191 cmp_code = EQ;
3192 res_code = LT;
3193 break;
3194 case ORDERED:
3195 cmp_code = EQ;
3196 res_code = GE;
3197 break;
3198 case NE:
3199 res_code = NE;
3200 break;
3201 case EQ:
3202 case LT:
3203 case GT:
3204 case LE:
3205 case GE:
3206 res_code = GT;
3207 break;
3208 default:
3209 gcc_unreachable ();
3211 *pcode = res_code;
3213 func = alpha_lookup_xfloating_lib_func (cmp_code);
3215 operands[0] = op0;
3216 operands[1] = op1;
3217 out = gen_reg_rtx (DImode);
3219 /* What's actually returned is -1,0,1, not a proper boolean value. */
3220 note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
3221 note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
3222 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3224 return out;
3227 /* Emit an X_floating library function call for a conversion. */
3229 void
3230 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3232 int noperands = 1, mode;
3233 rtx out_operands[2];
3234 rtx func;
3235 enum rtx_code code = orig_code;
3237 if (code == UNSIGNED_FIX)
3238 code = FIX;
3240 func = alpha_lookup_xfloating_lib_func (code);
3242 out_operands[0] = operands[1];
3244 switch (code)
3246 case FIX:
3247 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3248 out_operands[1] = GEN_INT (mode);
3249 noperands = 2;
3250 break;
3251 case FLOAT_TRUNCATE:
3252 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3253 out_operands[1] = GEN_INT (mode);
3254 noperands = 2;
3255 break;
3256 default:
3257 break;
3260 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3261 gen_rtx_fmt_e (orig_code,
3262 GET_MODE (operands[0]),
3263 operands[1]));
3266 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3267 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3268 guarantee that the sequence
3269 set (OP[0] OP[2])
3270 set (OP[1] OP[3])
3271 is valid. Naturally, output operand ordering is little-endian.
3272 This is used by *movtf_internal and *movti_internal. */
3274 void
3275 alpha_split_tmode_pair (rtx operands[4], machine_mode mode,
3276 bool fixup_overlap)
3278 switch (GET_CODE (operands[1]))
3280 case REG:
3281 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3282 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3283 break;
3285 case MEM:
3286 operands[3] = adjust_address (operands[1], DImode, 8);
3287 operands[2] = adjust_address (operands[1], DImode, 0);
3288 break;
3290 CASE_CONST_SCALAR_INT:
3291 case CONST_DOUBLE:
3292 gcc_assert (operands[1] == CONST0_RTX (mode));
3293 operands[2] = operands[3] = const0_rtx;
3294 break;
3296 default:
3297 gcc_unreachable ();
3300 switch (GET_CODE (operands[0]))
3302 case REG:
3303 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3304 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3305 break;
3307 case MEM:
3308 operands[1] = adjust_address (operands[0], DImode, 8);
3309 operands[0] = adjust_address (operands[0], DImode, 0);
3310 break;
3312 default:
3313 gcc_unreachable ();
3316 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3318 std::swap (operands[0], operands[1]);
3319 std::swap (operands[2], operands[3]);
3323 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3324 op2 is a register containing the sign bit, operation is the
3325 logical operation to be performed. */
3327 void
3328 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3330 rtx high_bit = operands[2];
3331 rtx scratch;
3332 int move;
3334 alpha_split_tmode_pair (operands, TFmode, false);
3336 /* Detect three flavors of operand overlap. */
3337 move = 1;
3338 if (rtx_equal_p (operands[0], operands[2]))
3339 move = 0;
3340 else if (rtx_equal_p (operands[1], operands[2]))
3342 if (rtx_equal_p (operands[0], high_bit))
3343 move = 2;
3344 else
3345 move = -1;
3348 if (move < 0)
3349 emit_move_insn (operands[0], operands[2]);
3351 /* ??? If the destination overlaps both source tf and high_bit, then
3352 assume source tf is dead in its entirety and use the other half
3353 for a scratch register. Otherwise "scratch" is just the proper
3354 destination register. */
3355 scratch = operands[move < 2 ? 1 : 3];
3357 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3359 if (move > 0)
3361 emit_move_insn (operands[0], operands[2]);
3362 if (move > 1)
3363 emit_move_insn (operands[1], scratch);
3367 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3368 unaligned data:
3370 unsigned: signed:
3371 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3372 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3373 lda r3,X(r11) lda r3,X+2(r11)
3374 extwl r1,r3,r1 extql r1,r3,r1
3375 extwh r2,r3,r2 extqh r2,r3,r2
3376 or r1.r2.r1 or r1,r2,r1
3377 sra r1,48,r1
3379 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3380 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3381 lda r3,X(r11) lda r3,X(r11)
3382 extll r1,r3,r1 extll r1,r3,r1
3383 extlh r2,r3,r2 extlh r2,r3,r2
3384 or r1.r2.r1 addl r1,r2,r1
3386 quad: ldq_u r1,X(r11)
3387 ldq_u r2,X+7(r11)
3388 lda r3,X(r11)
3389 extql r1,r3,r1
3390 extqh r2,r3,r2
3391 or r1.r2.r1
3394 void
3395 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3396 HOST_WIDE_INT ofs, int sign)
3398 rtx meml, memh, addr, extl, exth, tmp, mema;
3399 machine_mode mode;
3401 if (TARGET_BWX && size == 2)
3403 meml = adjust_address (mem, QImode, ofs);
3404 memh = adjust_address (mem, QImode, ofs+1);
3405 extl = gen_reg_rtx (DImode);
3406 exth = gen_reg_rtx (DImode);
3407 emit_insn (gen_zero_extendqidi2 (extl, meml));
3408 emit_insn (gen_zero_extendqidi2 (exth, memh));
3409 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3410 NULL, 1, OPTAB_LIB_WIDEN);
3411 addr = expand_simple_binop (DImode, IOR, extl, exth,
3412 NULL, 1, OPTAB_LIB_WIDEN);
3414 if (sign && GET_MODE (tgt) != HImode)
3416 addr = gen_lowpart (HImode, addr);
3417 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3419 else
3421 if (GET_MODE (tgt) != DImode)
3422 addr = gen_lowpart (GET_MODE (tgt), addr);
3423 emit_move_insn (tgt, addr);
3425 return;
3428 meml = gen_reg_rtx (DImode);
3429 memh = gen_reg_rtx (DImode);
3430 addr = gen_reg_rtx (DImode);
3431 extl = gen_reg_rtx (DImode);
3432 exth = gen_reg_rtx (DImode);
3434 mema = XEXP (mem, 0);
3435 if (GET_CODE (mema) == LO_SUM)
3436 mema = force_reg (Pmode, mema);
3438 /* AND addresses cannot be in any alias set, since they may implicitly
3439 alias surrounding code. Ideally we'd have some alias set that
3440 covered all types except those with alignment 8 or higher. */
3442 tmp = change_address (mem, DImode,
3443 gen_rtx_AND (DImode,
3444 plus_constant (DImode, mema, ofs),
3445 GEN_INT (-8)));
3446 set_mem_alias_set (tmp, 0);
3447 emit_move_insn (meml, tmp);
3449 tmp = change_address (mem, DImode,
3450 gen_rtx_AND (DImode,
3451 plus_constant (DImode, mema,
3452 ofs + size - 1),
3453 GEN_INT (-8)));
3454 set_mem_alias_set (tmp, 0);
3455 emit_move_insn (memh, tmp);
3457 if (sign && size == 2)
3459 emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
3461 emit_insn (gen_extql (extl, meml, addr));
3462 emit_insn (gen_extqh (exth, memh, addr));
3464 /* We must use tgt here for the target. Alpha-vms port fails if we use
3465 addr for the target, because addr is marked as a pointer and combine
3466 knows that pointers are always sign-extended 32-bit values. */
3467 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3468 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3469 addr, 1, OPTAB_WIDEN);
3471 else
3473 emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
3474 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3475 switch ((int) size)
3477 case 2:
3478 emit_insn (gen_extwh (exth, memh, addr));
3479 mode = HImode;
3480 break;
3481 case 4:
3482 emit_insn (gen_extlh (exth, memh, addr));
3483 mode = SImode;
3484 break;
3485 case 8:
3486 emit_insn (gen_extqh (exth, memh, addr));
3487 mode = DImode;
3488 break;
3489 default:
3490 gcc_unreachable ();
3493 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3494 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3495 sign, OPTAB_WIDEN);
3498 if (addr != tgt)
3499 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3502 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3504 void
3505 alpha_expand_unaligned_store (rtx dst, rtx src,
3506 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3508 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3510 if (TARGET_BWX && size == 2)
3512 if (src != const0_rtx)
3514 dstl = gen_lowpart (QImode, src);
3515 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3516 NULL, 1, OPTAB_LIB_WIDEN);
3517 dsth = gen_lowpart (QImode, dsth);
3519 else
3520 dstl = dsth = const0_rtx;
3522 meml = adjust_address (dst, QImode, ofs);
3523 memh = adjust_address (dst, QImode, ofs+1);
3525 emit_move_insn (meml, dstl);
3526 emit_move_insn (memh, dsth);
3527 return;
3530 dstl = gen_reg_rtx (DImode);
3531 dsth = gen_reg_rtx (DImode);
3532 insl = gen_reg_rtx (DImode);
3533 insh = gen_reg_rtx (DImode);
3535 dsta = XEXP (dst, 0);
3536 if (GET_CODE (dsta) == LO_SUM)
3537 dsta = force_reg (Pmode, dsta);
3539 /* AND addresses cannot be in any alias set, since they may implicitly
3540 alias surrounding code. Ideally we'd have some alias set that
3541 covered all types except those with alignment 8 or higher. */
3543 meml = change_address (dst, DImode,
3544 gen_rtx_AND (DImode,
3545 plus_constant (DImode, dsta, ofs),
3546 GEN_INT (-8)));
3547 set_mem_alias_set (meml, 0);
3549 memh = change_address (dst, DImode,
3550 gen_rtx_AND (DImode,
3551 plus_constant (DImode, dsta,
3552 ofs + size - 1),
3553 GEN_INT (-8)));
3554 set_mem_alias_set (memh, 0);
3556 emit_move_insn (dsth, memh);
3557 emit_move_insn (dstl, meml);
3559 addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
3561 if (src != CONST0_RTX (GET_MODE (src)))
3563 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3564 GEN_INT (size*8), addr));
3566 switch ((int) size)
3568 case 2:
3569 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3570 break;
3571 case 4:
3572 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3573 break;
3574 case 8:
3575 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3576 break;
3577 default:
3578 gcc_unreachable ();
3582 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3584 switch ((int) size)
3586 case 2:
3587 emit_insn (gen_mskwl (dstl, dstl, addr));
3588 break;
3589 case 4:
3590 emit_insn (gen_mskll (dstl, dstl, addr));
3591 break;
3592 case 8:
3593 emit_insn (gen_mskql (dstl, dstl, addr));
3594 break;
3595 default:
3596 gcc_unreachable ();
3599 if (src != CONST0_RTX (GET_MODE (src)))
3601 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3602 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3605 /* Must store high before low for degenerate case of aligned. */
3606 emit_move_insn (memh, dsth);
3607 emit_move_insn (meml, dstl);
3610 /* The block move code tries to maximize speed by separating loads and
3611 stores at the expense of register pressure: we load all of the data
3612 before we store it back out. There are two secondary effects worth
3613 mentioning, that this speeds copying to/from aligned and unaligned
3614 buffers, and that it makes the code significantly easier to write. */
3616 #define MAX_MOVE_WORDS 8
3618 /* Load an integral number of consecutive unaligned quadwords. */
3620 static void
3621 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3622 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3624 rtx const im8 = GEN_INT (-8);
3625 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3626 rtx sreg, areg, tmp, smema;
3627 HOST_WIDE_INT i;
3629 /* Generate all the tmp registers we need. */
3630 for (i = 0; i < words; ++i)
3632 data_regs[i] = out_regs[i];
3633 ext_tmps[i] = gen_reg_rtx (DImode);
3635 data_regs[words] = gen_reg_rtx (DImode);
3637 if (ofs != 0)
3638 smem = adjust_address (smem, GET_MODE (smem), ofs);
3640 smema = XEXP (smem, 0);
3641 if (GET_CODE (smema) == LO_SUM)
3642 smema = force_reg (Pmode, smema);
3644 /* Load up all of the source data. */
3645 for (i = 0; i < words; ++i)
3647 tmp = change_address (smem, DImode,
3648 gen_rtx_AND (DImode,
3649 plus_constant (DImode, smema, 8*i),
3650 im8));
3651 set_mem_alias_set (tmp, 0);
3652 emit_move_insn (data_regs[i], tmp);
3655 tmp = change_address (smem, DImode,
3656 gen_rtx_AND (DImode,
3657 plus_constant (DImode, smema,
3658 8*words - 1),
3659 im8));
3660 set_mem_alias_set (tmp, 0);
3661 emit_move_insn (data_regs[words], tmp);
3663 /* Extract the half-word fragments. Unfortunately DEC decided to make
3664 extxh with offset zero a noop instead of zeroing the register, so
3665 we must take care of that edge condition ourselves with cmov. */
3667 sreg = copy_addr_to_reg (smema);
3668 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3669 1, OPTAB_WIDEN);
3670 for (i = 0; i < words; ++i)
3672 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3673 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3674 emit_insn (gen_rtx_SET (ext_tmps[i],
3675 gen_rtx_IF_THEN_ELSE (DImode,
3676 gen_rtx_EQ (DImode, areg,
3677 const0_rtx),
3678 const0_rtx, ext_tmps[i])));
3681 /* Merge the half-words into whole words. */
3682 for (i = 0; i < words; ++i)
3684 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3685 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3689 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS
3690 may be NULL to store zeros. */
3692 static void
3693 alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3694 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3696 rtx const im8 = GEN_INT (-8);
3697 rtx ins_tmps[MAX_MOVE_WORDS];
3698 rtx st_tmp_1, st_tmp_2, dreg;
3699 rtx st_addr_1, st_addr_2, dmema;
3700 HOST_WIDE_INT i;
3702 /* Generate all the tmp registers we need. */
3703 if (data_regs != NULL)
3704 for (i = 0; i < words; ++i)
3705 ins_tmps[i] = gen_reg_rtx(DImode);
3706 st_tmp_1 = gen_reg_rtx(DImode);
3707 st_tmp_2 = gen_reg_rtx(DImode);
3709 if (ofs != 0)
3710 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3712 dmema = XEXP (dmem, 0);
3713 if (GET_CODE (dmema) == LO_SUM)
3714 dmema = force_reg (Pmode, dmema);
3716 st_addr_2 = change_address (dmem, DImode,
3717 gen_rtx_AND (DImode,
3718 plus_constant (DImode, dmema,
3719 words*8 - 1),
3720 im8));
3721 set_mem_alias_set (st_addr_2, 0);
3723 st_addr_1 = change_address (dmem, DImode,
3724 gen_rtx_AND (DImode, dmema, im8));
3725 set_mem_alias_set (st_addr_1, 0);
3727 /* Load up the destination end bits. */
3728 emit_move_insn (st_tmp_2, st_addr_2);
3729 emit_move_insn (st_tmp_1, st_addr_1);
3731 /* Shift the input data into place. */
3732 dreg = copy_addr_to_reg (dmema);
3733 if (data_regs != NULL)
3735 for (i = words-1; i >= 0; --i)
3737 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3738 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3740 for (i = words-1; i > 0; --i)
3742 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3743 ins_tmps[i-1], ins_tmps[i-1], 1,
3744 OPTAB_WIDEN);
3748 /* Split and merge the ends with the destination data. */
3749 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3750 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3752 if (data_regs != NULL)
3754 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3755 st_tmp_2, 1, OPTAB_WIDEN);
3756 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3757 st_tmp_1, 1, OPTAB_WIDEN);
3760 /* Store it all. */
3761 emit_move_insn (st_addr_2, st_tmp_2);
3762 for (i = words-1; i > 0; --i)
3764 rtx tmp = change_address (dmem, DImode,
3765 gen_rtx_AND (DImode,
3766 plus_constant (DImode,
3767 dmema, i*8),
3768 im8));
3769 set_mem_alias_set (tmp, 0);
3770 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3772 emit_move_insn (st_addr_1, st_tmp_1);
3775 /* Get the base alignment and offset of EXPR in A and O respectively.
3776 Check for any pseudo register pointer alignment and for any tree
3777 node information and return the largest alignment determined and
3778 its associated offset. */
3780 static void
3781 alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
3783 HOST_WIDE_INT tree_offset = 0, reg_offset = 0, mem_offset = 0;
3784 int tree_align = 0, reg_align = 0, mem_align = MEM_ALIGN (expr);
3786 gcc_assert (MEM_P (expr));
3788 rtx addr = XEXP (expr, 0);
3789 switch (GET_CODE (addr))
3791 case REG:
3792 reg_align = REGNO_POINTER_ALIGN (REGNO (addr));
3793 break;
3795 case PLUS:
3796 if (REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
3798 reg_offset = INTVAL (XEXP (addr, 1));
3799 reg_align = REGNO_POINTER_ALIGN (REGNO (XEXP (addr, 0)));
3801 break;
3803 default:
3804 break;
3807 tree mem = MEM_EXPR (expr);
3808 if (mem != NULL_TREE)
3809 switch (TREE_CODE (mem))
3811 case MEM_REF:
3812 tree_offset = mem_ref_offset (mem).force_shwi ();
3813 tree_align = get_object_alignment (get_base_address (mem));
3814 break;
3816 case COMPONENT_REF:
3818 tree byte_offset = component_ref_field_offset (mem);
3819 tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1));
3820 poly_int64 offset;
3821 if (!byte_offset
3822 || !poly_int_tree_p (byte_offset, &offset)
3823 || !tree_fits_shwi_p (bit_offset))
3824 break;
3825 tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
3827 tree_align = get_object_alignment (get_base_address (mem));
3828 break;
3830 default:
3831 break;
3834 if (reg_align > mem_align)
3836 mem_offset = reg_offset;
3837 mem_align = reg_align;
3839 if (tree_align > mem_align)
3841 mem_offset = tree_offset;
3842 mem_align = tree_align;
3844 o = mem_offset;
3845 a = mem_align;
3848 /* Expand string/block move operations.
3850 operands[0] is the pointer to the destination.
3851 operands[1] is the pointer to the source.
3852 operands[2] is the number of bytes to move.
3853 operands[3] is the alignment. */
3856 alpha_expand_block_move (rtx operands[])
3858 rtx bytes_rtx = operands[2];
3859 rtx align_rtx = operands[3];
3860 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3861 HOST_WIDE_INT bytes = orig_bytes;
3862 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3863 HOST_WIDE_INT dst_align = src_align;
3864 rtx orig_src = operands[1];
3865 rtx orig_dst = operands[0];
3866 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3867 rtx tmp;
3868 unsigned int i, words, ofs, nregs = 0;
3870 if (orig_bytes <= 0)
3871 return 1;
3872 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3873 return 0;
3875 /* Look for stricter alignment. */
3876 HOST_WIDE_INT c;
3877 int a;
3879 alpha_get_mem_rtx_alignment_and_offset (orig_src, a, c);
3880 if (a > src_align)
3882 if (a >= 64 && c % 8 == 0)
3883 src_align = 64;
3884 else if (a >= 32 && c % 4 == 0)
3885 src_align = 32;
3886 else if (a >= 16 && c % 2 == 0)
3887 src_align = 16;
3889 if (MEM_P (orig_src) && MEM_ALIGN (orig_src) < src_align)
3891 orig_src = shallow_copy_rtx (orig_src);
3892 set_mem_align (orig_src, src_align);
3896 alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c);
3897 if (a > dst_align)
3899 if (a >= 64 && c % 8 == 0)
3900 dst_align = 64;
3901 else if (a >= 32 && c % 4 == 0)
3902 dst_align = 32;
3903 else if (a >= 16 && c % 2 == 0)
3904 dst_align = 16;
3906 if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < dst_align)
3908 orig_dst = shallow_copy_rtx (orig_dst);
3909 set_mem_align (orig_dst, dst_align);
3913 ofs = 0;
3914 if (src_align >= 64 && bytes >= 8)
3916 words = bytes / 8;
3918 for (i = 0; i < words; ++i)
3919 data_regs[nregs + i] = gen_reg_rtx (DImode);
3921 for (i = 0; i < words; ++i)
3922 emit_move_insn (data_regs[nregs + i],
3923 adjust_address (orig_src, DImode, ofs + i * 8));
3925 nregs += words;
3926 bytes -= words * 8;
3927 ofs += words * 8;
3930 if (src_align >= 32 && bytes >= 4)
3932 words = bytes / 4;
3934 /* Load an even quantity of SImode data pieces only. */
3935 unsigned int hwords = words / 2;
3936 for (i = 0; i / 2 < hwords; ++i)
3938 data_regs[nregs + i] = gen_reg_rtx (SImode);
3939 emit_move_insn (data_regs[nregs + i],
3940 adjust_address (orig_src, SImode, ofs + i * 4));
3943 /* If we'll be using unaligned stores, merge data from pairs
3944 of SImode registers into DImode registers so that we can
3945 store it more efficiently via quadword unaligned stores. */
3946 unsigned int j;
3947 if (dst_align < 32)
3948 for (i = 0, j = 0; i < words / 2; ++i, j = i * 2)
3950 rtx hi = expand_simple_binop (DImode, ASHIFT,
3951 data_regs[nregs + j + 1],
3952 GEN_INT (32), NULL_RTX,
3953 1, OPTAB_WIDEN);
3954 data_regs[nregs + i] = expand_simple_binop (DImode, IOR, hi,
3955 data_regs[nregs + j],
3956 NULL_RTX,
3957 1, OPTAB_WIDEN);
3959 else
3960 j = i;
3962 /* Take care of any remaining odd trailing SImode data piece. */
3963 if (j < words)
3965 data_regs[nregs + i] = gen_reg_rtx (SImode);
3966 emit_move_insn (data_regs[nregs + i],
3967 adjust_address (orig_src, SImode, ofs + j * 4));
3968 ++i;
3971 nregs += i;
3972 bytes -= words * 4;
3973 ofs += words * 4;
3976 if (bytes >= 8)
3978 words = bytes / 8;
3980 for (i = 0; i < words+1; ++i)
3981 data_regs[nregs + i] = gen_reg_rtx (DImode);
3983 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3984 words, ofs);
3986 nregs += words;
3987 bytes -= words * 8;
3988 ofs += words * 8;
3991 if (! TARGET_BWX && bytes >= 4)
3993 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3994 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3995 bytes -= 4;
3996 ofs += 4;
3999 if (bytes >= 2)
4001 if (src_align >= 16)
4004 tmp = gen_reg_rtx (DImode);
4005 emit_move_insn (tmp,
4006 expand_simple_unop (DImode, SET,
4007 adjust_address (orig_src,
4008 HImode, ofs),
4009 NULL_RTX, 1));
4010 data_regs[nregs++] = gen_rtx_SUBREG (HImode, tmp, 0);
4011 bytes -= 2;
4012 ofs += 2;
4014 while (bytes >= 2);
4015 else if (! TARGET_BWX)
4017 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
4018 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
4019 bytes -= 2;
4020 ofs += 2;
4024 while (bytes > 0)
4026 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
4027 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
4028 bytes -= 1;
4029 ofs += 1;
4032 gcc_assert (nregs <= ARRAY_SIZE (data_regs));
4034 /* Now save it back out again. */
4036 i = 0, ofs = 0;
4038 /* Write out the data in whatever chunks reading the source allowed. */
4039 if (dst_align >= 64)
4041 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
4043 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
4044 data_regs[i]);
4045 ofs += 8;
4046 i++;
4050 if (dst_align >= 32)
4052 /* If the source has remaining DImode regs, write them out in
4053 two pieces. */
4054 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
4056 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
4057 NULL_RTX, 1, OPTAB_WIDEN);
4059 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
4060 gen_lowpart (SImode, data_regs[i]));
4061 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
4062 gen_lowpart (SImode, tmp));
4063 ofs += 8;
4064 i++;
4067 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
4069 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
4070 data_regs[i]);
4071 ofs += 4;
4072 i++;
4076 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
4078 /* Write out a remaining block of words using unaligned methods. */
4080 for (words = 1; i + words < nregs; words++)
4081 if (GET_MODE (data_regs[i + words]) != DImode)
4082 break;
4084 if (words == 1)
4085 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
4086 else
4087 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
4088 words, ofs);
4090 i += words;
4091 ofs += words * 8;
4094 /* Due to the above, this won't be aligned. */
4095 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
4097 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
4098 ofs += 4;
4099 i++;
4100 gcc_assert (i == nregs || GET_MODE (data_regs[i]) != SImode);
4103 if (dst_align >= 16)
4104 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4106 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
4107 i++;
4108 ofs += 2;
4110 else
4111 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4113 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
4114 i++;
4115 ofs += 2;
4118 /* The remainder must be byte copies. */
4119 while (i < nregs)
4121 gcc_assert (GET_MODE (data_regs[i]) == QImode);
4122 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
4123 i++;
4124 ofs += 1;
4127 return 1;
4131 alpha_expand_block_clear (rtx operands[])
4133 rtx bytes_rtx = operands[1];
4134 rtx align_rtx = operands[3];
4135 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
4136 HOST_WIDE_INT bytes = orig_bytes;
4137 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
4138 HOST_WIDE_INT alignofs = 0;
4139 rtx orig_dst = operands[0];
4140 int i, words, ofs = 0;
4142 if (orig_bytes <= 0)
4143 return 1;
4144 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
4145 return 0;
4147 /* Look for stricter alignment. */
4148 HOST_WIDE_INT c;
4149 int a;
4151 alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c);
4152 if (a > align)
4154 if (a >= 64)
4155 align = a, alignofs = -c & 7;
4156 else if (a >= 32)
4157 align = a, alignofs = -c & 3;
4158 else if (a >= 16)
4159 align = a, alignofs = -c & 1;
4161 if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < align)
4163 orig_dst = shallow_copy_rtx (orig_dst);
4164 set_mem_align (orig_dst, align);
4168 /* Handle an unaligned prefix first. */
4170 if (alignofs > 0)
4172 /* Given that alignofs is bounded by align, the only time BWX could
4173 generate three stores is for a 7 byte fill. Prefer two individual
4174 stores over a load/mask/store sequence. */
4175 if ((!TARGET_BWX || alignofs == 7)
4176 && align >= 32
4177 && !(alignofs == 4 && bytes >= 4))
4179 machine_mode mode = (align >= 64 ? DImode : SImode);
4180 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
4181 rtx mem, tmp;
4182 HOST_WIDE_INT mask;
4184 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
4185 set_mem_alias_set (mem, 0);
4187 mask = ~(HOST_WIDE_INT_M1U << (inv_alignofs * 8));
4188 if (bytes < alignofs)
4190 mask |= HOST_WIDE_INT_M1U << ((inv_alignofs + bytes) * 8);
4191 ofs += bytes;
4192 bytes = 0;
4194 else
4196 bytes -= alignofs;
4197 ofs += alignofs;
4199 alignofs = 0;
4201 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4202 NULL_RTX, 1, OPTAB_WIDEN);
4204 emit_move_insn (mem, tmp);
4207 if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
4209 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4210 bytes -= 1;
4211 ofs += 1;
4212 alignofs -= 1;
4214 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
4216 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
4217 bytes -= 2;
4218 ofs += 2;
4219 alignofs -= 2;
4221 if (alignofs == 4 && bytes >= 4)
4223 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4224 bytes -= 4;
4225 ofs += 4;
4226 alignofs = 0;
4229 /* If we've not used the extra lead alignment information by now,
4230 we won't be able to. Downgrade align to match what's left over. */
4231 if (alignofs > 0)
4233 alignofs = alignofs & -alignofs;
4234 align = MIN (align, alignofs * BITS_PER_UNIT);
4238 /* Handle a block of contiguous long-words. */
4240 if (align >= 64 && bytes >= 8)
4242 words = bytes / 8;
4244 for (i = 0; i < words; ++i)
4245 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4246 const0_rtx);
4248 bytes -= words * 8;
4249 ofs += words * 8;
4252 /* If the block is large and appropriately aligned, emit a single
4253 store followed by a sequence of stq_u insns. */
4255 if (align >= 32 && bytes > 16)
4257 rtx orig_dsta;
4259 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4260 bytes -= 4;
4261 ofs += 4;
4263 orig_dsta = XEXP (orig_dst, 0);
4264 if (GET_CODE (orig_dsta) == LO_SUM)
4265 orig_dsta = force_reg (Pmode, orig_dsta);
4267 words = bytes / 8;
4268 for (i = 0; i < words; ++i)
4270 rtx mem
4271 = change_address (orig_dst, DImode,
4272 gen_rtx_AND (DImode,
4273 plus_constant (DImode, orig_dsta,
4274 ofs + i*8),
4275 GEN_INT (-8)));
4276 set_mem_alias_set (mem, 0);
4277 emit_move_insn (mem, const0_rtx);
4280 /* Depending on the alignment, the first stq_u may have overlapped
4281 with the initial stl, which means that the last stq_u didn't
4282 write as much as it would appear. Leave those questionable bytes
4283 unaccounted for. */
4284 bytes -= words * 8 - 4;
4285 ofs += words * 8 - 4;
4288 /* Handle a smaller block of aligned words. */
4290 if ((align >= 64 && bytes == 4)
4291 || (align == 32 && bytes >= 4))
4293 words = bytes / 4;
4295 for (i = 0; i < words; ++i)
4296 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4297 const0_rtx);
4299 bytes -= words * 4;
4300 ofs += words * 4;
4303 /* An unaligned block uses stq_u stores for as many as possible. */
4305 if (bytes >= 8)
4307 words = bytes / 8;
4309 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4311 bytes -= words * 8;
4312 ofs += words * 8;
4315 /* Next clean up any trailing pieces. */
4317 /* Count the number of bits in BYTES for which aligned stores could
4318 be emitted. */
4319 words = 0;
4320 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4321 if (bytes & i)
4322 words += 1;
4324 /* If we have appropriate alignment (and it wouldn't take too many
4325 instructions otherwise), mask out the bytes we need. */
4326 if ((TARGET_BWX ? words > 2 : bytes > 0)
4327 && (align >= 64 || (align >= 32 && bytes < 4)))
4329 machine_mode mode = (align >= 64 ? DImode : SImode);
4330 rtx mem, tmp;
4331 HOST_WIDE_INT mask;
4333 mem = adjust_address (orig_dst, mode, ofs);
4334 set_mem_alias_set (mem, 0);
4336 mask = HOST_WIDE_INT_M1U << (bytes * 8);
4338 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4339 NULL_RTX, 1, OPTAB_WIDEN);
4341 emit_move_insn (mem, tmp);
4342 return 1;
4345 if (!TARGET_BWX && bytes >= 4)
4347 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4348 bytes -= 4;
4349 ofs += 4;
4352 if (bytes >= 2)
4354 if (align >= 16)
4356 do {
4357 emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4358 const0_rtx);
4359 bytes -= 2;
4360 ofs += 2;
4361 } while (bytes >= 2);
4363 else if (! TARGET_BWX)
4365 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4366 bytes -= 2;
4367 ofs += 2;
4371 while (bytes > 0)
4373 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4374 bytes -= 1;
4375 ofs += 1;
4378 return 1;
4381 /* Returns a mask so that zap(x, value) == x & mask. */
4384 alpha_expand_zap_mask (HOST_WIDE_INT value)
4386 rtx result;
4387 int i;
4388 HOST_WIDE_INT mask = 0;
4390 for (i = 7; i >= 0; --i)
4392 mask <<= 8;
4393 if (!((value >> i) & 1))
4394 mask |= 0xff;
4397 result = gen_int_mode (mask, DImode);
4398 return result;
4401 void
4402 alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4403 machine_mode mode,
4404 rtx op0, rtx op1, rtx op2)
4406 op0 = gen_lowpart (mode, op0);
4408 if (op1 == const0_rtx)
4409 op1 = CONST0_RTX (mode);
4410 else
4411 op1 = gen_lowpart (mode, op1);
4413 if (op2 == const0_rtx)
4414 op2 = CONST0_RTX (mode);
4415 else
4416 op2 = gen_lowpart (mode, op2);
4418 emit_insn ((*gen) (op0, op1, op2));
4421 /* A subroutine of the atomic operation splitters. Jump to LABEL if
4422 COND is true. Mark the jump as unlikely to be taken. */
4424 static void
4425 emit_unlikely_jump (rtx cond, rtx label)
4427 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4428 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
4429 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
4432 /* Subroutines of the atomic operation splitters. Emit barriers
4433 as needed for the memory MODEL. */
4435 static void
4436 alpha_pre_atomic_barrier (enum memmodel model)
4438 if (need_atomic_barrier_p (model, true))
4439 emit_insn (gen_memory_barrier ());
4442 static void
4443 alpha_post_atomic_barrier (enum memmodel model)
4445 if (need_atomic_barrier_p (model, false))
4446 emit_insn (gen_memory_barrier ());
4449 /* A subroutine of the atomic operation splitters. Emit an insxl
4450 instruction in MODE. */
4452 static rtx
4453 emit_insxl (machine_mode mode, rtx op1, rtx op2)
4455 rtx ret = gen_reg_rtx (DImode);
4456 rtx (*fn) (rtx, rtx, rtx);
4458 switch (mode)
4460 case E_QImode:
4461 fn = gen_insbl;
4462 break;
4463 case E_HImode:
4464 fn = gen_inswl;
4465 break;
4466 case E_SImode:
4467 fn = gen_insll;
4468 break;
4469 case E_DImode:
4470 fn = gen_insql;
4471 break;
4472 default:
4473 gcc_unreachable ();
4476 op1 = force_reg (mode, op1);
4477 emit_insn (fn (ret, op1, op2));
4479 return ret;
4482 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
4483 to perform. MEM is the memory on which to operate. VAL is the second
4484 operand of the binary operator. BEFORE and AFTER are optional locations to
4485 return the value of MEM either before of after the operation. SCRATCH is
4486 a scratch register. */
4488 void
4489 alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4490 rtx after, rtx scratch, enum memmodel model)
4492 machine_mode mode = GET_MODE (mem);
4493 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4495 alpha_pre_atomic_barrier (model);
4497 label = gen_label_rtx ();
4498 emit_label (label);
4499 label = gen_rtx_LABEL_REF (DImode, label);
4501 if (before == NULL)
4502 before = scratch;
4503 emit_insn (gen_load_locked (mode, before, mem));
4505 if (code == NOT)
4507 x = gen_rtx_AND (mode, before, val);
4508 emit_insn (gen_rtx_SET (val, x));
4510 x = gen_rtx_NOT (mode, val);
4512 else
4513 x = gen_rtx_fmt_ee (code, mode, before, val);
4514 if (after)
4515 emit_insn (gen_rtx_SET (after, copy_rtx (x)));
4516 emit_insn (gen_rtx_SET (scratch, x));
4518 emit_insn (gen_store_conditional (mode, cond, mem, scratch));
4520 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4521 emit_unlikely_jump (x, label);
4523 alpha_post_atomic_barrier (model);
4526 /* Expand a compare and swap operation. */
4528 void
4529 alpha_split_compare_and_swap (rtx operands[])
4531 rtx cond, retval, mem, oldval, newval;
4532 bool is_weak;
4533 enum memmodel mod_s, mod_f;
4534 machine_mode mode;
4535 rtx label1, label2, x;
4537 cond = operands[0];
4538 retval = operands[1];
4539 mem = operands[2];
4540 oldval = operands[3];
4541 newval = operands[4];
4542 is_weak = (operands[5] != const0_rtx);
4543 mod_s = memmodel_from_int (INTVAL (operands[6]));
4544 mod_f = memmodel_from_int (INTVAL (operands[7]));
4545 mode = GET_MODE (mem);
4547 alpha_pre_atomic_barrier (mod_s);
4549 label1 = NULL_RTX;
4550 if (!is_weak)
4552 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4553 emit_label (XEXP (label1, 0));
4555 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4557 emit_insn (gen_load_locked (mode, retval, mem));
4559 x = gen_lowpart (DImode, retval);
4560 if (oldval == const0_rtx)
4562 emit_move_insn (cond, const0_rtx);
4563 x = gen_rtx_NE (DImode, x, const0_rtx);
4565 else
4567 x = gen_rtx_EQ (DImode, x, oldval);
4568 emit_insn (gen_rtx_SET (cond, x));
4569 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4571 emit_unlikely_jump (x, label2);
4573 emit_move_insn (cond, newval);
4574 emit_insn (gen_store_conditional
4575 (mode, cond, mem, gen_lowpart (mode, cond)));
4577 if (!is_weak)
4579 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4580 emit_unlikely_jump (x, label1);
4583 if (!is_mm_relaxed (mod_f))
4584 emit_label (XEXP (label2, 0));
4586 alpha_post_atomic_barrier (mod_s);
4588 if (is_mm_relaxed (mod_f))
4589 emit_label (XEXP (label2, 0));
4592 void
4593 alpha_expand_compare_and_swap_12 (rtx operands[])
4595 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4596 machine_mode mode;
4597 rtx addr, align, wdst;
4599 cond = operands[0];
4600 dst = operands[1];
4601 mem = operands[2];
4602 oldval = operands[3];
4603 newval = operands[4];
4604 is_weak = operands[5];
4605 mod_s = operands[6];
4606 mod_f = operands[7];
4607 mode = GET_MODE (mem);
4609 /* We forced the address into a register via mem_noofs_operand. */
4610 addr = XEXP (mem, 0);
4611 gcc_assert (register_operand (addr, DImode));
4613 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4614 NULL_RTX, 1, OPTAB_DIRECT);
4616 oldval = convert_modes (DImode, mode, oldval, 1);
4618 if (newval != const0_rtx)
4619 newval = emit_insxl (mode, newval, addr);
4621 wdst = gen_reg_rtx (DImode);
4622 emit_insn (gen_atomic_compare_and_swap_1
4623 (mode, cond, wdst, mem, oldval, newval, align,
4624 is_weak, mod_s, mod_f));
4626 emit_move_insn (dst, gen_lowpart (mode, wdst));
4629 void
4630 alpha_split_compare_and_swap_12 (rtx operands[])
4632 rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4633 machine_mode mode;
4634 bool is_weak;
4635 enum memmodel mod_s, mod_f;
4636 rtx label1, label2, mem, addr, width, mask, x;
4638 cond = operands[0];
4639 dest = operands[1];
4640 orig_mem = operands[2];
4641 oldval = operands[3];
4642 newval = operands[4];
4643 align = operands[5];
4644 is_weak = (operands[6] != const0_rtx);
4645 mod_s = memmodel_from_int (INTVAL (operands[7]));
4646 mod_f = memmodel_from_int (INTVAL (operands[8]));
4647 scratch = operands[9];
4648 mode = GET_MODE (orig_mem);
4649 addr = XEXP (orig_mem, 0);
4651 mem = gen_rtx_MEM (DImode, align);
4652 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4653 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4654 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4656 alpha_pre_atomic_barrier (mod_s);
4658 label1 = NULL_RTX;
4659 if (!is_weak)
4661 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4662 emit_label (XEXP (label1, 0));
4664 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4666 emit_insn (gen_load_locked (DImode, scratch, mem));
4668 width = GEN_INT (GET_MODE_BITSIZE (mode));
4669 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4670 emit_insn (gen_extxl (dest, scratch, width, addr));
4672 if (oldval == const0_rtx)
4674 emit_move_insn (cond, const0_rtx);
4675 x = gen_rtx_NE (DImode, dest, const0_rtx);
4677 else
4679 x = gen_rtx_EQ (DImode, dest, oldval);
4680 emit_insn (gen_rtx_SET (cond, x));
4681 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4683 emit_unlikely_jump (x, label2);
4685 emit_insn (gen_mskxl (cond, scratch, mask, addr));
4687 if (newval != const0_rtx)
4688 emit_insn (gen_iordi3 (cond, cond, newval));
4690 emit_insn (gen_store_conditional (DImode, cond, mem, cond));
4692 if (!is_weak)
4694 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4695 emit_unlikely_jump (x, label1);
4698 if (!is_mm_relaxed (mod_f))
4699 emit_label (XEXP (label2, 0));
4701 alpha_post_atomic_barrier (mod_s);
4703 if (is_mm_relaxed (mod_f))
4704 emit_label (XEXP (label2, 0));
4707 /* Expand an atomic exchange operation. */
4709 void
4710 alpha_split_atomic_exchange (rtx operands[])
4712 rtx retval, mem, val, scratch;
4713 enum memmodel model;
4714 machine_mode mode;
4715 rtx label, x, cond;
4717 retval = operands[0];
4718 mem = operands[1];
4719 val = operands[2];
4720 model = (enum memmodel) INTVAL (operands[3]);
4721 scratch = operands[4];
4722 mode = GET_MODE (mem);
4723 cond = gen_lowpart (DImode, scratch);
4725 alpha_pre_atomic_barrier (model);
4727 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4728 emit_label (XEXP (label, 0));
4730 emit_insn (gen_load_locked (mode, retval, mem));
4731 emit_move_insn (scratch, val);
4732 emit_insn (gen_store_conditional (mode, cond, mem, scratch));
4734 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4735 emit_unlikely_jump (x, label);
4737 alpha_post_atomic_barrier (model);
4740 void
4741 alpha_expand_atomic_exchange_12 (rtx operands[])
4743 rtx dst, mem, val, model;
4744 machine_mode mode;
4745 rtx addr, align, wdst;
4747 dst = operands[0];
4748 mem = operands[1];
4749 val = operands[2];
4750 model = operands[3];
4751 mode = GET_MODE (mem);
4753 /* We forced the address into a register via mem_noofs_operand. */
4754 addr = XEXP (mem, 0);
4755 gcc_assert (register_operand (addr, DImode));
4757 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4758 NULL_RTX, 1, OPTAB_DIRECT);
4760 /* Insert val into the correct byte location within the word. */
4761 if (val != const0_rtx)
4762 val = emit_insxl (mode, val, addr);
4764 wdst = gen_reg_rtx (DImode);
4765 emit_insn (gen_atomic_exchange_1 (mode, wdst, mem, val, align, model));
4767 emit_move_insn (dst, gen_lowpart (mode, wdst));
4770 void
4771 alpha_split_atomic_exchange_12 (rtx operands[])
4773 rtx dest, orig_mem, addr, val, align, scratch;
4774 rtx label, mem, width, mask, x;
4775 machine_mode mode;
4776 enum memmodel model;
4778 dest = operands[0];
4779 orig_mem = operands[1];
4780 val = operands[2];
4781 align = operands[3];
4782 model = (enum memmodel) INTVAL (operands[4]);
4783 scratch = operands[5];
4784 mode = GET_MODE (orig_mem);
4785 addr = XEXP (orig_mem, 0);
4787 mem = gen_rtx_MEM (DImode, align);
4788 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4789 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4790 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4792 alpha_pre_atomic_barrier (model);
4794 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4795 emit_label (XEXP (label, 0));
4797 emit_insn (gen_load_locked (DImode, scratch, mem));
4799 width = GEN_INT (GET_MODE_BITSIZE (mode));
4800 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4801 emit_insn (gen_extxl (dest, scratch, width, addr));
4802 emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4803 if (val != const0_rtx)
4804 emit_insn (gen_iordi3 (scratch, scratch, val));
4806 emit_insn (gen_store_conditional (DImode, scratch, mem, scratch));
4808 x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4809 emit_unlikely_jump (x, label);
4811 alpha_post_atomic_barrier (model);
4814 /* Adjust the cost of a scheduling dependency. Return the new cost of
4815 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4817 static int
4818 alpha_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4819 unsigned int)
4821 enum attr_type dep_insn_type;
4823 /* If the dependence is an anti-dependence, there is no cost. For an
4824 output dependence, there is sometimes a cost, but it doesn't seem
4825 worth handling those few cases. */
4826 if (dep_type != 0)
4827 return cost;
4829 /* If we can't recognize the insns, we can't really do anything. */
4830 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4831 return cost;
4833 dep_insn_type = get_attr_type (dep_insn);
4835 /* Bring in the user-defined memory latency. */
4836 if (dep_insn_type == TYPE_ILD
4837 || dep_insn_type == TYPE_FLD
4838 || dep_insn_type == TYPE_LDSYM)
4839 cost += alpha_memory_latency-1;
4841 /* Everything else handled in DFA bypasses now. */
4843 return cost;
4846 /* The number of instructions that can be issued per cycle. */
4848 static int
4849 alpha_issue_rate (void)
4851 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4854 /* How many alternative schedules to try. This should be as wide as the
4855 scheduling freedom in the DFA, but no wider. Making this value too
4856 large results extra work for the scheduler.
4858 For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4859 alternative schedules. For EV5, we can choose between E0/E1 and
4860 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */
4862 static int
4863 alpha_multipass_dfa_lookahead (void)
4865 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4868 /* Machine-specific function data. */
4870 struct GTY(()) alpha_links;
4872 struct GTY(()) machine_function
4874 unsigned HOST_WIDE_INT sa_mask;
4875 HOST_WIDE_INT sa_size;
4876 HOST_WIDE_INT frame_size;
4878 /* For flag_reorder_blocks_and_partition. */
4879 rtx gp_save_rtx;
4881 /* For VMS condition handlers. */
4882 bool uses_condition_handler;
4884 /* Linkage entries. */
4885 hash_map<nofree_string_hash, alpha_links *> *links;
4888 /* How to allocate a 'struct machine_function'. */
4890 static struct machine_function *
4891 alpha_init_machine_status (void)
4893 return ggc_cleared_alloc<machine_function> ();
4896 /* Support for frame based VMS condition handlers. */
4898 /* A VMS condition handler may be established for a function with a call to
4899 __builtin_establish_vms_condition_handler, and cancelled with a call to
4900 __builtin_revert_vms_condition_handler.
4902 The VMS Condition Handling Facility knows about the existence of a handler
4903 from the procedure descriptor .handler field. As the VMS native compilers,
4904 we store the user specified handler's address at a fixed location in the
4905 stack frame and point the procedure descriptor at a common wrapper which
4906 fetches the real handler's address and issues an indirect call.
4908 The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
4910 We force the procedure kind to PT_STACK, and the fixed frame location is
4911 fp+8, just before the register save area. We use the handler_data field in
4912 the procedure descriptor to state the fp offset at which the installed
4913 handler address can be found. */
4915 #define VMS_COND_HANDLER_FP_OFFSET 8
4917 /* Expand code to store the currently installed user VMS condition handler
4918 into TARGET and install HANDLER as the new condition handler. */
4920 void
4921 alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
4923 rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx,
4924 VMS_COND_HANDLER_FP_OFFSET);
4926 rtx handler_slot
4927 = gen_rtx_MEM (DImode, handler_slot_address);
4929 emit_move_insn (target, handler_slot);
4930 emit_move_insn (handler_slot, handler);
4932 /* Notify the start/prologue/epilogue emitters that the condition handler
4933 slot is needed. In addition to reserving the slot space, this will force
4934 the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
4935 use above is correct. */
4936 cfun->machine->uses_condition_handler = true;
4939 /* Expand code to store the current VMS condition handler into TARGET and
4940 nullify it. */
4942 void
4943 alpha_expand_builtin_revert_vms_condition_handler (rtx target)
4945 /* We implement this by establishing a null condition handler, with the tiny
4946 side effect of setting uses_condition_handler. This is a little bit
4947 pessimistic if no actual builtin_establish call is ever issued, which is
4948 not a real problem and expected never to happen anyway. */
4950 alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
4953 /* Functions to save and restore alpha_return_addr_rtx. */
4955 /* Start the ball rolling with RETURN_ADDR_RTX. */
4958 alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4960 if (count != 0)
4961 return const0_rtx;
4963 return get_hard_reg_initial_val (Pmode, REG_RA);
4966 /* Return or create a memory slot containing the gp value for the current
4967 function. Needed only if TARGET_LD_BUGGY_LDGP. */
4970 alpha_gp_save_rtx (void)
4972 rtx_insn *seq;
4973 rtx m = cfun->machine->gp_save_rtx;
4975 if (m == NULL)
4977 start_sequence ();
4979 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4980 m = validize_mem (m);
4981 emit_move_insn (m, pic_offset_table_rtx);
4983 seq = get_insns ();
4984 end_sequence ();
4986 /* We used to simply emit the sequence after entry_of_function.
4987 However this breaks the CFG if the first instruction in the
4988 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
4989 label. Emit the sequence properly on the edge. We are only
4990 invoked from dw2_build_landing_pads and finish_eh_generation
4991 will call commit_edge_insertions thanks to a kludge. */
4992 insert_insn_on_edge (seq,
4993 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
4995 cfun->machine->gp_save_rtx = m;
4998 return m;
5001 static void
5002 alpha_instantiate_decls (void)
5004 if (cfun->machine->gp_save_rtx != NULL_RTX)
5005 instantiate_decl_rtl (cfun->machine->gp_save_rtx);
5008 static int
5009 alpha_ra_ever_killed (void)
5011 rtx_insn *top;
5013 if (!has_hard_reg_initial_val (Pmode, REG_RA))
5014 return (int)df_regs_ever_live_p (REG_RA);
5016 push_topmost_sequence ();
5017 top = get_insns ();
5018 pop_topmost_sequence ();
5020 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL);
5024 /* Return the trap mode suffix applicable to the current
5025 instruction, or NULL. */
5027 static const char *
5028 get_trap_mode_suffix (void)
5030 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
5032 switch (s)
5034 case TRAP_SUFFIX_NONE:
5035 return NULL;
5037 case TRAP_SUFFIX_SU:
5038 if (alpha_fptm >= ALPHA_FPTM_SU)
5039 return "su";
5040 return NULL;
5042 case TRAP_SUFFIX_SUI:
5043 if (alpha_fptm >= ALPHA_FPTM_SUI)
5044 return "sui";
5045 return NULL;
5047 case TRAP_SUFFIX_V_SV:
5048 switch (alpha_fptm)
5050 case ALPHA_FPTM_N:
5051 return NULL;
5052 case ALPHA_FPTM_U:
5053 return "v";
5054 case ALPHA_FPTM_SU:
5055 case ALPHA_FPTM_SUI:
5056 return "sv";
5057 default:
5058 gcc_unreachable ();
5061 case TRAP_SUFFIX_V_SV_SVI:
5062 switch (alpha_fptm)
5064 case ALPHA_FPTM_N:
5065 return NULL;
5066 case ALPHA_FPTM_U:
5067 return "v";
5068 case ALPHA_FPTM_SU:
5069 return "sv";
5070 case ALPHA_FPTM_SUI:
5071 return "svi";
5072 default:
5073 gcc_unreachable ();
5075 break;
5077 case TRAP_SUFFIX_U_SU_SUI:
5078 switch (alpha_fptm)
5080 case ALPHA_FPTM_N:
5081 return NULL;
5082 case ALPHA_FPTM_U:
5083 return "u";
5084 case ALPHA_FPTM_SU:
5085 return "su";
5086 case ALPHA_FPTM_SUI:
5087 return "sui";
5088 default:
5089 gcc_unreachable ();
5091 break;
5093 default:
5094 gcc_unreachable ();
5096 gcc_unreachable ();
5099 /* Return the rounding mode suffix applicable to the current
5100 instruction, or NULL. */
5102 static const char *
5103 get_round_mode_suffix (void)
5105 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
5107 switch (s)
5109 case ROUND_SUFFIX_NONE:
5110 return NULL;
5111 case ROUND_SUFFIX_NORMAL:
5112 switch (alpha_fprm)
5114 case ALPHA_FPRM_NORM:
5115 return NULL;
5116 case ALPHA_FPRM_MINF:
5117 return "m";
5118 case ALPHA_FPRM_CHOP:
5119 return "c";
5120 case ALPHA_FPRM_DYN:
5121 return "d";
5122 default:
5123 gcc_unreachable ();
5125 break;
5127 case ROUND_SUFFIX_C:
5128 return "c";
5130 default:
5131 gcc_unreachable ();
5133 gcc_unreachable ();
5136 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5138 static bool
5139 alpha_print_operand_punct_valid_p (unsigned char code)
5141 return (code == '/' || code == ',' || code == '-' || code == '~'
5142 || code == '#' || code == '*' || code == '&');
5145 /* Implement TARGET_PRINT_OPERAND. The alpha-specific
5146 operand codes are documented below. */
5148 static void
5149 alpha_print_operand (FILE *file, rtx x, int code)
5151 int i;
5153 switch (code)
5155 case '~':
5156 /* Print the assembler name of the current function. */
5157 assemble_name (file, alpha_fnname);
5158 break;
5160 case '&':
5161 if (const char *name = get_some_local_dynamic_name ())
5162 assemble_name (file, name);
5163 else
5164 output_operand_lossage ("'%%&' used without any "
5165 "local dynamic TLS references");
5166 break;
5168 case '/':
5169 /* Generates the instruction suffix. The TRAP_SUFFIX and ROUND_SUFFIX
5170 attributes are examined to determine what is appropriate. */
5172 const char *trap = get_trap_mode_suffix ();
5173 const char *round = get_round_mode_suffix ();
5175 if (trap || round)
5176 fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : ""));
5177 break;
5180 case ',':
5181 /* Generates single precision suffix for floating point
5182 instructions (s for IEEE, f for VAX). */
5183 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
5184 break;
5186 case '-':
5187 /* Generates double precision suffix for floating point
5188 instructions (t for IEEE, g for VAX). */
5189 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5190 break;
5192 case '#':
5193 if (alpha_this_literal_sequence_number == 0)
5194 alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5195 fprintf (file, "%d", alpha_this_literal_sequence_number);
5196 break;
5198 case '*':
5199 if (alpha_this_gpdisp_sequence_number == 0)
5200 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5201 fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5202 break;
5204 case 'J':
5206 const char *lituse;
5208 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5210 x = XVECEXP (x, 0, 0);
5211 lituse = "lituse_tlsgd";
5213 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5215 x = XVECEXP (x, 0, 0);
5216 lituse = "lituse_tlsldm";
5218 else if (CONST_INT_P (x))
5219 lituse = "lituse_jsr";
5220 else
5222 output_operand_lossage ("invalid %%J value");
5223 break;
5226 if (x != const0_rtx)
5227 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5229 break;
5231 case 'j':
5233 const char *lituse;
5235 #ifdef HAVE_AS_JSRDIRECT_RELOCS
5236 lituse = "lituse_jsrdirect";
5237 #else
5238 lituse = "lituse_jsr";
5239 #endif
5241 gcc_assert (INTVAL (x) != 0);
5242 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5244 break;
5245 case 'r':
5246 /* If this operand is the constant zero, write it as "$31". */
5247 if (REG_P (x))
5248 fprintf (file, "%s", reg_names[REGNO (x)]);
5249 else if (x == CONST0_RTX (GET_MODE (x)))
5250 fprintf (file, "$31");
5251 else
5252 output_operand_lossage ("invalid %%r value");
5253 break;
5255 case 'R':
5256 /* Similar, but for floating-point. */
5257 if (REG_P (x))
5258 fprintf (file, "%s", reg_names[REGNO (x)]);
5259 else if (x == CONST0_RTX (GET_MODE (x)))
5260 fprintf (file, "$f31");
5261 else
5262 output_operand_lossage ("invalid %%R value");
5263 break;
5265 case 'N':
5266 /* Write the 1's complement of a constant. */
5267 if (!CONST_INT_P (x))
5268 output_operand_lossage ("invalid %%N value");
5270 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5271 break;
5273 case 'P':
5274 /* Write 1 << C, for a constant C. */
5275 if (!CONST_INT_P (x))
5276 output_operand_lossage ("invalid %%P value");
5278 fprintf (file, HOST_WIDE_INT_PRINT_DEC, HOST_WIDE_INT_1 << INTVAL (x));
5279 break;
5281 case 'h':
5282 /* Write the high-order 16 bits of a constant, sign-extended. */
5283 if (!CONST_INT_P (x))
5284 output_operand_lossage ("invalid %%h value");
5286 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5287 break;
5289 case 'L':
5290 /* Write the low-order 16 bits of a constant, sign-extended. */
5291 if (!CONST_INT_P (x))
5292 output_operand_lossage ("invalid %%L value");
5294 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5295 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5296 break;
5298 case 'm':
5299 /* Write mask for ZAP insn. */
5300 if (CONST_INT_P (x))
5302 HOST_WIDE_INT mask = 0, value = INTVAL (x);
5304 for (i = 0; i < 8; i++, value >>= 8)
5305 if (value & 0xff)
5306 mask |= (1 << i);
5308 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5310 else
5311 output_operand_lossage ("invalid %%m value");
5312 break;
5314 case 'M':
5315 /* 'b', 'w', 'l', or 'q' as the value of the constant. */
5316 if (!mode_width_operand (x, VOIDmode))
5317 output_operand_lossage ("invalid %%M value");
5319 fprintf (file, "%s",
5320 (INTVAL (x) == 8 ? "b"
5321 : INTVAL (x) == 16 ? "w"
5322 : INTVAL (x) == 32 ? "l"
5323 : "q"));
5324 break;
5326 case 'U':
5327 /* Similar, except do it from the mask. */
5328 if (CONST_INT_P (x))
5330 HOST_WIDE_INT value = INTVAL (x);
5332 if (value == 0xff)
5334 fputc ('b', file);
5335 break;
5337 if (value == 0xffff)
5339 fputc ('w', file);
5340 break;
5342 if (value == 0xffffffff)
5344 fputc ('l', file);
5345 break;
5347 if (value == -1)
5349 fputc ('q', file);
5350 break;
5354 output_operand_lossage ("invalid %%U value");
5355 break;
5357 case 's':
5358 /* Write the constant value divided by 8. */
5359 if (!CONST_INT_P (x)
5360 || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5361 || (INTVAL (x) & 7) != 0)
5362 output_operand_lossage ("invalid %%s value");
5364 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
5365 break;
5367 case 'C': case 'D': case 'c': case 'd':
5368 /* Write out comparison name. */
5370 enum rtx_code c = GET_CODE (x);
5372 if (!COMPARISON_P (x))
5373 output_operand_lossage ("invalid %%C value");
5375 else if (code == 'D')
5376 c = reverse_condition (c);
5377 else if (code == 'c')
5378 c = swap_condition (c);
5379 else if (code == 'd')
5380 c = swap_condition (reverse_condition (c));
5382 if (c == LEU)
5383 fprintf (file, "ule");
5384 else if (c == LTU)
5385 fprintf (file, "ult");
5386 else if (c == UNORDERED)
5387 fprintf (file, "un");
5388 else
5389 fprintf (file, "%s", GET_RTX_NAME (c));
5391 break;
5393 case 'E':
5394 /* Write the divide or modulus operator. */
5395 switch (GET_CODE (x))
5397 case DIV:
5398 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5399 break;
5400 case UDIV:
5401 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5402 break;
5403 case MOD:
5404 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5405 break;
5406 case UMOD:
5407 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5408 break;
5409 default:
5410 output_operand_lossage ("invalid %%E value");
5411 break;
5413 break;
5415 case 'A':
5416 /* Write "_u" for unaligned access. */
5417 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
5418 fprintf (file, "_u");
5419 break;
5421 case 0:
5422 if (REG_P (x))
5423 fprintf (file, "%s", reg_names[REGNO (x)]);
5424 else if (MEM_P (x))
5425 output_address (GET_MODE (x), XEXP (x, 0));
5426 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5428 switch (XINT (XEXP (x, 0), 1))
5430 case UNSPEC_DTPREL:
5431 case UNSPEC_TPREL:
5432 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5433 break;
5434 default:
5435 output_operand_lossage ("unknown relocation unspec");
5436 break;
5439 else
5440 output_addr_const (file, x);
5441 break;
5443 default:
5444 output_operand_lossage ("invalid %%xn code");
5448 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
5450 static void
5451 alpha_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
5453 int basereg = 31;
5454 HOST_WIDE_INT offset = 0;
5456 if (GET_CODE (addr) == AND)
5457 addr = XEXP (addr, 0);
5459 if (GET_CODE (addr) == PLUS
5460 && CONST_INT_P (XEXP (addr, 1)))
5462 offset = INTVAL (XEXP (addr, 1));
5463 addr = XEXP (addr, 0);
5466 if (GET_CODE (addr) == LO_SUM)
5468 const char *reloc16, *reloclo;
5469 rtx op1 = XEXP (addr, 1);
5471 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5473 op1 = XEXP (op1, 0);
5474 switch (XINT (op1, 1))
5476 case UNSPEC_DTPREL:
5477 reloc16 = NULL;
5478 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5479 break;
5480 case UNSPEC_TPREL:
5481 reloc16 = NULL;
5482 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5483 break;
5484 default:
5485 output_operand_lossage ("unknown relocation unspec");
5486 return;
5489 output_addr_const (file, XVECEXP (op1, 0, 0));
5491 else
5493 reloc16 = "gprel";
5494 reloclo = "gprellow";
5495 output_addr_const (file, op1);
5498 if (offset)
5499 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5501 addr = XEXP (addr, 0);
5502 switch (GET_CODE (addr))
5504 case REG:
5505 basereg = REGNO (addr);
5506 break;
5508 case SUBREG:
5509 basereg = subreg_regno (addr);
5510 break;
5512 default:
5513 gcc_unreachable ();
5516 fprintf (file, "($%d)\t\t!%s", basereg,
5517 (basereg == 29 ? reloc16 : reloclo));
5518 return;
5521 switch (GET_CODE (addr))
5523 case REG:
5524 basereg = REGNO (addr);
5525 break;
5527 case SUBREG:
5528 basereg = subreg_regno (addr);
5529 break;
5531 case CONST_INT:
5532 offset = INTVAL (addr);
5533 break;
5535 case SYMBOL_REF:
5536 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5537 fprintf (file, "%s", XSTR (addr, 0));
5538 return;
5540 case CONST:
5541 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5542 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5543 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5544 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5545 XSTR (XEXP (XEXP (addr, 0), 0), 0),
5546 INTVAL (XEXP (XEXP (addr, 0), 1)));
5547 return;
5549 default:
5550 output_operand_lossage ("invalid operand address");
5551 return;
5554 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5557 /* Emit RTL insns to initialize the variable parts of a trampoline at
5558 M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx
5559 for the static chain value for the function. */
5561 static void
5562 alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5564 rtx fnaddr, mem, word1, word2;
5566 fnaddr = XEXP (DECL_RTL (fndecl), 0);
5568 #ifdef POINTERS_EXTEND_UNSIGNED
5569 fnaddr = convert_memory_address (Pmode, fnaddr);
5570 chain_value = convert_memory_address (Pmode, chain_value);
5571 #endif
5573 if (TARGET_ABI_OPEN_VMS)
5575 const char *fnname;
5576 char *trname;
5578 /* Construct the name of the trampoline entry point. */
5579 fnname = XSTR (fnaddr, 0);
5580 trname = (char *) alloca (strlen (fnname) + 5);
5581 strcpy (trname, fnname);
5582 strcat (trname, "..tr");
5583 fnname = ggc_alloc_string (trname, strlen (trname) + 1);
5584 word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
5586 /* Trampoline (or "bounded") procedure descriptor is constructed from
5587 the function's procedure descriptor with certain fields zeroed IAW
5588 the VMS calling standard. This is stored in the first quadword. */
5589 word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
5590 word1 = expand_and (DImode, word1,
5591 GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)),
5592 NULL);
5594 else
5596 /* These 4 instructions are:
5597 ldq $1,24($27)
5598 ldq $27,16($27)
5599 jmp $31,($27),0
5601 We don't bother setting the HINT field of the jump; the nop
5602 is merely there for padding. */
5603 word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018));
5604 word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000));
5607 /* Store the first two words, as computed above. */
5608 mem = adjust_address (m_tramp, DImode, 0);
5609 emit_move_insn (mem, word1);
5610 mem = adjust_address (m_tramp, DImode, 8);
5611 emit_move_insn (mem, word2);
5613 /* Store function address and static chain value. */
5614 mem = adjust_address (m_tramp, Pmode, 16);
5615 emit_move_insn (mem, fnaddr);
5616 mem = adjust_address (m_tramp, Pmode, 24);
5617 emit_move_insn (mem, chain_value);
5619 if (TARGET_ABI_OSF)
5621 emit_insn (gen_imb ());
5622 #ifdef HAVE_ENABLE_EXECUTE_STACK
5623 emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5624 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
5625 #endif
5629 /* Determine where to put an argument to a function.
5630 Value is zero to push the argument on the stack,
5631 or a hard register in which to store the argument.
5633 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5634 the preceding args and about the function being called.
5635 ARG is a description of the argument.
5637 On Alpha the first 6 words of args are normally in registers
5638 and the rest are pushed. */
5640 static rtx
5641 alpha_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5643 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5644 int basereg;
5645 int num_args;
5647 /* Don't get confused and pass small structures in FP registers. */
5648 if (arg.aggregate_type_p ())
5649 basereg = 16;
5650 else
5652 /* With alpha_split_complex_arg, we shouldn't see any raw complex
5653 values here. */
5654 gcc_checking_assert (!COMPLEX_MODE_P (arg.mode));
5656 /* Set up defaults for FP operands passed in FP registers, and
5657 integral operands passed in integer registers. */
5658 if (TARGET_FPREGS && GET_MODE_CLASS (arg.mode) == MODE_FLOAT)
5659 basereg = 32 + 16;
5660 else
5661 basereg = 16;
5664 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5665 the two platforms, so we can't avoid conditional compilation. */
5666 #if TARGET_ABI_OPEN_VMS
5668 if (arg.end_marker_p ())
5669 return alpha_arg_info_reg_val (*cum);
5671 num_args = cum->num_args;
5672 if (num_args >= 6
5673 || targetm.calls.must_pass_in_stack (arg))
5674 return NULL_RTX;
5676 #elif TARGET_ABI_OSF
5678 if (*cum >= 6)
5679 return NULL_RTX;
5680 num_args = *cum;
5682 if (arg.end_marker_p ())
5683 basereg = 16;
5684 else if (targetm.calls.must_pass_in_stack (arg))
5685 return NULL_RTX;
5687 #else
5688 #error Unhandled ABI
5689 #endif
5691 return gen_rtx_REG (arg.mode, num_args + basereg);
5694 /* Update the data in CUM to advance over argument ARG. */
5696 static void
5697 alpha_function_arg_advance (cumulative_args_t cum_v,
5698 const function_arg_info &arg)
5700 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5701 bool onstack = targetm.calls.must_pass_in_stack (arg);
5702 int increment = onstack ? 6 : ALPHA_ARG_SIZE (arg.mode, arg.type);
5704 #if TARGET_ABI_OSF
5705 *cum += increment;
5706 #else
5707 if (!onstack && cum->num_args < 6)
5708 cum->atypes[cum->num_args] = alpha_arg_type (arg.mode);
5709 cum->num_args += increment;
5710 #endif
5713 static int
5714 alpha_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
5716 int words = 0;
5717 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
5719 #if TARGET_ABI_OPEN_VMS
5720 if (cum->num_args < 6
5721 && 6 < cum->num_args + ALPHA_ARG_SIZE (arg.mode, arg.type))
5722 words = 6 - cum->num_args;
5723 #elif TARGET_ABI_OSF
5724 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (arg.mode, arg.type))
5725 words = 6 - *cum;
5726 #else
5727 #error Unhandled ABI
5728 #endif
5730 return words * UNITS_PER_WORD;
5734 /* Return true if TYPE must be returned in memory, instead of in registers. */
5736 static bool
5737 alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5739 machine_mode mode = VOIDmode;
5740 int size;
5742 if (type)
5744 mode = TYPE_MODE (type);
5746 /* All aggregates are returned in memory, except on OpenVMS where
5747 records that fit 64 bits should be returned by immediate value
5748 as required by section 3.8.7.1 of the OpenVMS Calling Standard. */
5749 if (TARGET_ABI_OPEN_VMS
5750 && TREE_CODE (type) != ARRAY_TYPE
5751 && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
5752 return false;
5754 if (AGGREGATE_TYPE_P (type))
5755 return true;
5758 size = GET_MODE_SIZE (mode);
5759 switch (GET_MODE_CLASS (mode))
5761 case MODE_VECTOR_FLOAT:
5762 /* Pass all float vectors in memory, like an aggregate. */
5763 return true;
5765 case MODE_COMPLEX_FLOAT:
5766 /* We judge complex floats on the size of their element,
5767 not the size of the whole type. */
5768 size = GET_MODE_UNIT_SIZE (mode);
5769 break;
5771 case MODE_INT:
5772 case MODE_FLOAT:
5773 case MODE_COMPLEX_INT:
5774 case MODE_VECTOR_INT:
5775 break;
5777 default:
5778 /* ??? We get called on all sorts of random stuff from
5779 aggregate_value_p. We must return something, but it's not
5780 clear what's safe to return. Pretend it's a struct I
5781 guess. */
5782 return true;
5785 /* Otherwise types must fit in one register. */
5786 return size > UNITS_PER_WORD;
5789 /* Return true if ARG should be passed by invisible reference. */
5791 static bool
5792 alpha_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
5794 /* Pass float and _Complex float variable arguments by reference.
5795 This avoids 64-bit store from a FP register to a pretend args save area
5796 and subsequent 32-bit load from the saved location to a FP register.
5798 Note that 32-bit loads and stores to/from a FP register on alpha reorder
5799 bits to form a canonical 64-bit value in the FP register. This fact
5800 invalidates compiler assumption that 32-bit FP value lives in the lower
5801 32-bits of the passed 64-bit FP value, so loading the 32-bit value from
5802 the stored 64-bit location using 32-bit FP load is invalid on alpha.
5804 This introduces sort of ABI incompatibility, but until _Float32 was
5805 introduced, C-family languages promoted 32-bit float variable arg to
5806 a 64-bit double, and it was not allowed to pass float as a varible
5807 argument. Passing _Complex float as a variable argument never
5808 worked on alpha. Thus, we have no backward compatibility issues
5809 to worry about, and passing unpromoted _Float32 and _Complex float
5810 as a variable argument will actually work in the future. */
5812 if (arg.mode == SFmode || arg.mode == SCmode)
5813 return !arg.named;
5815 return arg.mode == TFmode || arg.mode == TCmode;
5818 /* Define how to find the value returned by a function. VALTYPE is the
5819 data type of the value (as a tree). If the precise function being
5820 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5821 MODE is set instead of VALTYPE for libcalls.
5823 On Alpha the value is found in $0 for integer functions and
5824 $f0 for floating-point functions. */
5826 static rtx
5827 alpha_function_value_1 (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
5828 machine_mode mode)
5830 unsigned int regnum, dummy ATTRIBUTE_UNUSED;
5831 enum mode_class mclass;
5833 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5835 if (valtype)
5836 mode = TYPE_MODE (valtype);
5838 mclass = GET_MODE_CLASS (mode);
5839 switch (mclass)
5841 case MODE_INT:
5842 /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
5843 where we have them returning both SImode and DImode. */
5844 if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
5845 PROMOTE_MODE (mode, dummy, valtype);
5846 /* FALLTHRU */
5848 case MODE_COMPLEX_INT:
5849 case MODE_VECTOR_INT:
5850 regnum = 0;
5851 break;
5853 case MODE_FLOAT:
5854 regnum = 32;
5855 break;
5857 case MODE_COMPLEX_FLOAT:
5859 machine_mode cmode = GET_MODE_INNER (mode);
5861 return gen_rtx_PARALLEL
5862 (VOIDmode,
5863 gen_rtvec (2,
5864 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5865 const0_rtx),
5866 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5867 GEN_INT (GET_MODE_SIZE (cmode)))));
5870 case MODE_RANDOM:
5871 /* We should only reach here for BLKmode on VMS. */
5872 gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
5873 regnum = 0;
5874 break;
5876 default:
5877 gcc_unreachable ();
5880 return gen_rtx_REG (mode, regnum);
5883 /* Implement TARGET_FUNCTION_VALUE. */
5885 static rtx
5886 alpha_function_value (const_tree valtype, const_tree fn_decl_or_type,
5887 bool /*outgoing*/)
5889 return alpha_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
5892 /* Implement TARGET_LIBCALL_VALUE. */
5894 static rtx
5895 alpha_libcall_value (machine_mode mode, const_rtx /*fun*/)
5897 return alpha_function_value_1 (NULL_TREE, NULL_TREE, mode);
5900 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.
5902 On the Alpha, $0 $1 and $f0 $f1 are the only register thus used. */
5904 static bool
5905 alpha_function_value_regno_p (const unsigned int regno)
5907 return (regno == 0 || regno == 1 || regno == 32 || regno == 33);
5910 /* TCmode complex values are passed by invisible reference. We
5911 should not split these values. */
5913 static bool
5914 alpha_split_complex_arg (const_tree type)
5916 return TYPE_MODE (type) != TCmode;
5919 static tree
5920 alpha_build_builtin_va_list (void)
5922 tree base, ofs, space, record, type_decl;
5924 if (TARGET_ABI_OPEN_VMS)
5925 return ptr_type_node;
5927 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5928 type_decl = build_decl (BUILTINS_LOCATION,
5929 TYPE_DECL, get_identifier ("__va_list_tag"), record);
5930 TYPE_STUB_DECL (record) = type_decl;
5931 TYPE_NAME (record) = type_decl;
5933 /* C++? SET_IS_AGGR_TYPE (record, 1); */
5935 /* Dummy field to prevent alignment warnings. */
5936 space = build_decl (BUILTINS_LOCATION,
5937 FIELD_DECL, NULL_TREE, integer_type_node);
5938 DECL_FIELD_CONTEXT (space) = record;
5939 DECL_ARTIFICIAL (space) = 1;
5940 DECL_IGNORED_P (space) = 1;
5942 ofs = build_decl (BUILTINS_LOCATION,
5943 FIELD_DECL, get_identifier ("__offset"),
5944 integer_type_node);
5945 DECL_FIELD_CONTEXT (ofs) = record;
5946 DECL_CHAIN (ofs) = space;
5948 base = build_decl (BUILTINS_LOCATION,
5949 FIELD_DECL, get_identifier ("__base"),
5950 ptr_type_node);
5951 DECL_FIELD_CONTEXT (base) = record;
5952 DECL_CHAIN (base) = ofs;
5954 TYPE_FIELDS (record) = base;
5955 layout_type (record);
5957 va_list_gpr_counter_field = ofs;
5958 return record;
5961 #if TARGET_ABI_OSF
5962 /* Helper function for alpha_stdarg_optimize_hook. Skip over casts
5963 and constant additions. */
5965 static gimple *
5966 va_list_skip_additions (tree lhs)
5968 gimple *stmt;
5970 for (;;)
5972 enum tree_code code;
5974 stmt = SSA_NAME_DEF_STMT (lhs);
5976 if (gimple_code (stmt) == GIMPLE_PHI)
5977 return stmt;
5979 if (!is_gimple_assign (stmt)
5980 || gimple_assign_lhs (stmt) != lhs)
5981 return NULL;
5983 if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
5984 return stmt;
5985 code = gimple_assign_rhs_code (stmt);
5986 if (!CONVERT_EXPR_CODE_P (code)
5987 && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
5988 || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
5989 || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
5990 return stmt;
5992 lhs = gimple_assign_rhs1 (stmt);
5996 /* Check if LHS = RHS statement is
5997 LHS = *(ap.__base + ap.__offset + cst)
5999 LHS = *(ap.__base
6000 + ((ap.__offset + cst <= 47)
6001 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
6002 If the former, indicate that GPR registers are needed,
6003 if the latter, indicate that FPR registers are needed.
6005 Also look for LHS = (*ptr).field, where ptr is one of the forms
6006 listed above.
6008 On alpha, cfun->va_list_gpr_size is used as size of the needed
6009 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
6010 registers are needed and bit 1 set if FPR registers are needed.
6011 Return true if va_list references should not be scanned for the
6012 current statement. */
6014 static bool
6015 alpha_stdarg_optimize_hook (struct stdarg_info *si, const gimple *stmt)
6017 tree base, offset, rhs;
6018 int offset_arg = 1;
6019 gimple *base_stmt;
6021 if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
6022 != GIMPLE_SINGLE_RHS)
6023 return false;
6025 rhs = gimple_assign_rhs1 (stmt);
6026 while (handled_component_p (rhs))
6027 rhs = TREE_OPERAND (rhs, 0);
6028 if (TREE_CODE (rhs) != MEM_REF
6029 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
6030 return false;
6032 stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
6033 if (stmt == NULL
6034 || !is_gimple_assign (stmt)
6035 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
6036 return false;
6038 base = gimple_assign_rhs1 (stmt);
6039 if (TREE_CODE (base) == SSA_NAME)
6041 base_stmt = va_list_skip_additions (base);
6042 if (base_stmt
6043 && is_gimple_assign (base_stmt)
6044 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
6045 base = gimple_assign_rhs1 (base_stmt);
6048 if (TREE_CODE (base) != COMPONENT_REF
6049 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
6051 base = gimple_assign_rhs2 (stmt);
6052 if (TREE_CODE (base) == SSA_NAME)
6054 base_stmt = va_list_skip_additions (base);
6055 if (base_stmt
6056 && is_gimple_assign (base_stmt)
6057 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
6058 base = gimple_assign_rhs1 (base_stmt);
6061 if (TREE_CODE (base) != COMPONENT_REF
6062 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
6063 return false;
6065 offset_arg = 0;
6068 base = get_base_address (base);
6069 if (TREE_CODE (base) != VAR_DECL
6070 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
6071 return false;
6073 offset = gimple_op (stmt, 1 + offset_arg);
6074 if (TREE_CODE (offset) == SSA_NAME)
6076 gimple *offset_stmt = va_list_skip_additions (offset);
6078 if (offset_stmt
6079 && gimple_code (offset_stmt) == GIMPLE_PHI)
6081 HOST_WIDE_INT sub;
6082 gimple *arg1_stmt, *arg2_stmt;
6083 tree arg1, arg2;
6084 enum tree_code code1, code2;
6086 if (gimple_phi_num_args (offset_stmt) != 2)
6087 goto escapes;
6089 arg1_stmt
6090 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
6091 arg2_stmt
6092 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
6093 if (arg1_stmt == NULL
6094 || !is_gimple_assign (arg1_stmt)
6095 || arg2_stmt == NULL
6096 || !is_gimple_assign (arg2_stmt))
6097 goto escapes;
6099 code1 = gimple_assign_rhs_code (arg1_stmt);
6100 code2 = gimple_assign_rhs_code (arg2_stmt);
6101 if (code1 == COMPONENT_REF
6102 && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
6103 /* Do nothing. */;
6104 else if (code2 == COMPONENT_REF
6105 && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
6107 std::swap (arg1_stmt, arg2_stmt);
6108 code2 = code1;
6110 else
6111 goto escapes;
6113 if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
6114 goto escapes;
6116 sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
6117 if (code2 == MINUS_EXPR)
6118 sub = -sub;
6119 if (sub < -48 || sub > -32)
6120 goto escapes;
6122 arg1 = gimple_assign_rhs1 (arg1_stmt);
6123 arg2 = gimple_assign_rhs1 (arg2_stmt);
6124 if (TREE_CODE (arg2) == SSA_NAME)
6126 arg2_stmt = va_list_skip_additions (arg2);
6127 if (arg2_stmt == NULL
6128 || !is_gimple_assign (arg2_stmt)
6129 || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
6130 goto escapes;
6131 arg2 = gimple_assign_rhs1 (arg2_stmt);
6133 if (arg1 != arg2)
6134 goto escapes;
6136 if (TREE_CODE (arg1) != COMPONENT_REF
6137 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
6138 || get_base_address (arg1) != base)
6139 goto escapes;
6141 /* Need floating point regs. */
6142 cfun->va_list_fpr_size |= 2;
6143 return false;
6145 if (offset_stmt
6146 && is_gimple_assign (offset_stmt)
6147 && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
6148 offset = gimple_assign_rhs1 (offset_stmt);
6150 if (TREE_CODE (offset) != COMPONENT_REF
6151 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
6152 || get_base_address (offset) != base)
6153 goto escapes;
6154 else
6155 /* Need general regs. */
6156 cfun->va_list_fpr_size |= 1;
6157 return false;
6159 escapes:
6160 si->va_list_escapes = true;
6161 return false;
6163 #endif
6165 /* Perform any needed actions needed for a function that is receiving a
6166 variable number of arguments. */
6168 static void
6169 alpha_setup_incoming_varargs (cumulative_args_t pcum,
6170 const function_arg_info &arg,
6171 int *pretend_size, int no_rtl)
6173 CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
6175 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
6176 || arg.type != NULL_TREE)
6177 /* Skip the current argument. */
6178 targetm.calls.function_arg_advance (pack_cumulative_args (&cum), arg);
6180 #if TARGET_ABI_OPEN_VMS
6181 /* For VMS, we allocate space for all 6 arg registers plus a count.
6183 However, if NO registers need to be saved, don't allocate any space.
6184 This is not only because we won't need the space, but because AP
6185 includes the current_pretend_args_size and we don't want to mess up
6186 any ap-relative addresses already made. */
6187 if (cum.num_args < 6)
6189 if (!no_rtl)
6191 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6192 emit_insn (gen_arg_home ());
6194 *pretend_size = 7 * UNITS_PER_WORD;
6196 #else
6197 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6198 only push those that are remaining. However, if NO registers need to
6199 be saved, don't allocate any space. This is not only because we won't
6200 need the space, but because AP includes the current_pretend_args_size
6201 and we don't want to mess up any ap-relative addresses already made.
6203 If we are not to use the floating-point registers, save the integer
6204 registers where we would put the floating-point registers. This is
6205 not the most efficient way to implement varargs with just one register
6206 class, but it isn't worth doing anything more efficient in this rare
6207 case. */
6208 if (cum >= 6)
6209 return;
6211 if (!no_rtl)
6213 int count;
6214 alias_set_type set = get_varargs_alias_set ();
6215 rtx tmp;
6217 count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6218 if (count > 6 - cum)
6219 count = 6 - cum;
6221 /* Detect whether integer registers or floating-point registers
6222 are needed by the detected va_arg statements. See above for
6223 how these values are computed. Note that the "escape" value
6224 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6225 these bits set. */
6226 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6228 if (cfun->va_list_fpr_size & 1)
6230 tmp = gen_rtx_MEM (BLKmode,
6231 plus_constant (Pmode, virtual_incoming_args_rtx,
6232 (cum + 6) * UNITS_PER_WORD));
6233 MEM_NOTRAP_P (tmp) = 1;
6234 set_mem_alias_set (tmp, set);
6235 move_block_from_reg (16 + cum, tmp, count);
6238 if (cfun->va_list_fpr_size & 2)
6240 tmp = gen_rtx_MEM (BLKmode,
6241 plus_constant (Pmode, virtual_incoming_args_rtx,
6242 cum * UNITS_PER_WORD));
6243 MEM_NOTRAP_P (tmp) = 1;
6244 set_mem_alias_set (tmp, set);
6245 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6248 *pretend_size = 12 * UNITS_PER_WORD;
6249 #endif
6252 static void
6253 alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6255 HOST_WIDE_INT offset;
6256 tree t, offset_field, base_field;
6258 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6259 return;
6261 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6262 up by 48, storing fp arg registers in the first 48 bytes, and the
6263 integer arg registers in the next 48 bytes. This is only done,
6264 however, if any integer registers need to be stored.
6266 If no integer registers need be stored, then we must subtract 48
6267 in order to account for the integer arg registers which are counted
6268 in argsize above, but which are not actually stored on the stack.
6269 Must further be careful here about structures straddling the last
6270 integer argument register; that futzes with pretend_args_size,
6271 which changes the meaning of AP. */
6273 if (NUM_ARGS < 6)
6274 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6275 else
6276 offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
6278 if (TARGET_ABI_OPEN_VMS)
6280 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6281 t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
6282 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6283 TREE_SIDE_EFFECTS (t) = 1;
6284 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6286 else
6288 base_field = TYPE_FIELDS (TREE_TYPE (valist));
6289 offset_field = DECL_CHAIN (base_field);
6291 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6292 valist, base_field, NULL_TREE);
6293 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6294 valist, offset_field, NULL_TREE);
6296 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6297 t = fold_build_pointer_plus_hwi (t, offset);
6298 t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6299 TREE_SIDE_EFFECTS (t) = 1;
6300 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6302 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6303 t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6304 TREE_SIDE_EFFECTS (t) = 1;
6305 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6309 static tree
6310 alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
6311 gimple_seq *pre_p)
6313 tree type_size, ptr_type, addend, t, addr;
6314 gimple_seq internal_post;
6316 /* If the type could not be passed in registers, skip the block
6317 reserved for the registers. */
6318 if (must_pass_va_arg_in_stack (type))
6320 t = build_int_cst (TREE_TYPE (offset), 6*8);
6321 gimplify_assign (offset,
6322 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
6323 pre_p);
6326 addend = offset;
6327 ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
6329 if (TREE_CODE (type) == COMPLEX_TYPE)
6331 tree real_part, imag_part, real_temp;
6333 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6334 offset, pre_p);
6336 /* Copy the value into a new temporary, lest the formal temporary
6337 be reused out from under us. */
6338 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6340 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6341 offset, pre_p);
6343 return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6345 else if (SCALAR_FLOAT_TYPE_P (type))
6347 tree fpaddend, cond, fourtyeight;
6349 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6350 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6351 addend, fourtyeight);
6352 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6353 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6354 fpaddend, addend);
6357 /* Build the final address and force that value into a temporary. */
6358 addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
6359 internal_post = NULL;
6360 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6361 gimple_seq_add_seq (pre_p, internal_post);
6363 /* Update the offset field. */
6364 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6365 if (type_size == NULL || TREE_OVERFLOW (type_size))
6366 t = size_zero_node;
6367 else
6369 t = size_binop (PLUS_EXPR, type_size, size_int (7));
6370 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6371 t = size_binop (MULT_EXPR, t, size_int (8));
6373 t = fold_convert (TREE_TYPE (offset), t);
6374 gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
6375 pre_p);
6377 return build_va_arg_indirect_ref (addr);
6380 static tree
6381 alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6382 gimple_seq *post_p)
6384 tree offset_field, base_field, offset, base, t, r;
6385 bool indirect;
6387 if (TARGET_ABI_OPEN_VMS)
6388 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6390 base_field = TYPE_FIELDS (va_list_type_node);
6391 offset_field = DECL_CHAIN (base_field);
6392 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6393 valist, base_field, NULL_TREE);
6394 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6395 valist, offset_field, NULL_TREE);
6397 /* Pull the fields of the structure out into temporaries. Since we never
6398 modify the base field, we can use a formal temporary. Sign-extend the
6399 offset field so that it's the proper width for pointer arithmetic. */
6400 base = get_formal_tmp_var (base_field, pre_p);
6402 t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
6403 offset = get_initialized_tmp_var (t, pre_p, NULL);
6405 indirect = pass_va_arg_by_reference (type);
6407 if (indirect)
6409 if (TREE_CODE (type) == COMPLEX_TYPE
6410 && targetm.calls.split_complex_arg (type))
6412 tree real_part, imag_part, real_temp;
6414 tree ptr_type = build_pointer_type_for_mode (TREE_TYPE (type),
6415 ptr_mode, true);
6417 real_part = alpha_gimplify_va_arg_1 (ptr_type, base,
6418 offset, pre_p);
6419 real_part = build_va_arg_indirect_ref (real_part);
6421 /* Copy the value into a new temporary, lest the formal temporary
6422 be reused out from under us. */
6423 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6425 imag_part = alpha_gimplify_va_arg_1 (ptr_type, base,
6426 offset, pre_p);
6427 imag_part = build_va_arg_indirect_ref (imag_part);
6429 r = build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6431 /* Stuff the offset temporary back into its field. */
6432 gimplify_assign (unshare_expr (offset_field),
6433 fold_convert (TREE_TYPE (offset_field), offset),
6434 pre_p);
6435 return r;
6437 else
6438 type = build_pointer_type_for_mode (type, ptr_mode, true);
6441 /* Find the value. Note that this will be a stable indirection, or
6442 a composite of stable indirections in the case of complex. */
6443 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6445 /* Stuff the offset temporary back into its field. */
6446 gimplify_assign (unshare_expr (offset_field),
6447 fold_convert (TREE_TYPE (offset_field), offset), pre_p);
6449 if (indirect)
6450 r = build_va_arg_indirect_ref (r);
6452 return r;
6455 /* Builtins. */
6457 enum alpha_builtin
6459 ALPHA_BUILTIN_CMPBGE,
6460 ALPHA_BUILTIN_EXTBL,
6461 ALPHA_BUILTIN_EXTWL,
6462 ALPHA_BUILTIN_EXTLL,
6463 ALPHA_BUILTIN_EXTQL,
6464 ALPHA_BUILTIN_EXTWH,
6465 ALPHA_BUILTIN_EXTLH,
6466 ALPHA_BUILTIN_EXTQH,
6467 ALPHA_BUILTIN_INSBL,
6468 ALPHA_BUILTIN_INSWL,
6469 ALPHA_BUILTIN_INSLL,
6470 ALPHA_BUILTIN_INSQL,
6471 ALPHA_BUILTIN_INSWH,
6472 ALPHA_BUILTIN_INSLH,
6473 ALPHA_BUILTIN_INSQH,
6474 ALPHA_BUILTIN_MSKBL,
6475 ALPHA_BUILTIN_MSKWL,
6476 ALPHA_BUILTIN_MSKLL,
6477 ALPHA_BUILTIN_MSKQL,
6478 ALPHA_BUILTIN_MSKWH,
6479 ALPHA_BUILTIN_MSKLH,
6480 ALPHA_BUILTIN_MSKQH,
6481 ALPHA_BUILTIN_UMULH,
6482 ALPHA_BUILTIN_ZAP,
6483 ALPHA_BUILTIN_ZAPNOT,
6484 ALPHA_BUILTIN_AMASK,
6485 ALPHA_BUILTIN_IMPLVER,
6486 ALPHA_BUILTIN_RPCC,
6487 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6488 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
6490 /* TARGET_MAX */
6491 ALPHA_BUILTIN_MINUB8,
6492 ALPHA_BUILTIN_MINSB8,
6493 ALPHA_BUILTIN_MINUW4,
6494 ALPHA_BUILTIN_MINSW4,
6495 ALPHA_BUILTIN_MAXUB8,
6496 ALPHA_BUILTIN_MAXSB8,
6497 ALPHA_BUILTIN_MAXUW4,
6498 ALPHA_BUILTIN_MAXSW4,
6499 ALPHA_BUILTIN_PERR,
6500 ALPHA_BUILTIN_PKLB,
6501 ALPHA_BUILTIN_PKWB,
6502 ALPHA_BUILTIN_UNPKBL,
6503 ALPHA_BUILTIN_UNPKBW,
6505 /* TARGET_CIX */
6506 ALPHA_BUILTIN_CTTZ,
6507 ALPHA_BUILTIN_CTLZ,
6508 ALPHA_BUILTIN_CTPOP,
6510 ALPHA_BUILTIN_max
6513 static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
6514 CODE_FOR_builtin_cmpbge,
6515 CODE_FOR_extbl,
6516 CODE_FOR_extwl,
6517 CODE_FOR_extll,
6518 CODE_FOR_extql,
6519 CODE_FOR_extwh,
6520 CODE_FOR_extlh,
6521 CODE_FOR_extqh,
6522 CODE_FOR_builtin_insbl,
6523 CODE_FOR_builtin_inswl,
6524 CODE_FOR_builtin_insll,
6525 CODE_FOR_insql,
6526 CODE_FOR_inswh,
6527 CODE_FOR_inslh,
6528 CODE_FOR_insqh,
6529 CODE_FOR_mskbl,
6530 CODE_FOR_mskwl,
6531 CODE_FOR_mskll,
6532 CODE_FOR_mskql,
6533 CODE_FOR_mskwh,
6534 CODE_FOR_msklh,
6535 CODE_FOR_mskqh,
6536 CODE_FOR_umuldi3_highpart,
6537 CODE_FOR_builtin_zap,
6538 CODE_FOR_builtin_zapnot,
6539 CODE_FOR_builtin_amask,
6540 CODE_FOR_builtin_implver,
6541 CODE_FOR_builtin_rpcc,
6542 CODE_FOR_builtin_establish_vms_condition_handler,
6543 CODE_FOR_builtin_revert_vms_condition_handler,
6545 /* TARGET_MAX */
6546 CODE_FOR_builtin_minub8,
6547 CODE_FOR_builtin_minsb8,
6548 CODE_FOR_builtin_minuw4,
6549 CODE_FOR_builtin_minsw4,
6550 CODE_FOR_builtin_maxub8,
6551 CODE_FOR_builtin_maxsb8,
6552 CODE_FOR_builtin_maxuw4,
6553 CODE_FOR_builtin_maxsw4,
6554 CODE_FOR_builtin_perr,
6555 CODE_FOR_builtin_pklb,
6556 CODE_FOR_builtin_pkwb,
6557 CODE_FOR_builtin_unpkbl,
6558 CODE_FOR_builtin_unpkbw,
6560 /* TARGET_CIX */
6561 CODE_FOR_ctzdi2,
6562 CODE_FOR_clzdi2,
6563 CODE_FOR_popcountdi2
6566 struct alpha_builtin_def
6568 const char *name;
6569 enum alpha_builtin code;
6570 unsigned int target_mask;
6571 bool is_const;
6574 static struct alpha_builtin_def const zero_arg_builtins[] = {
6575 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true },
6576 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false }
6579 static struct alpha_builtin_def const one_arg_builtins[] = {
6580 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true },
6581 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true },
6582 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true },
6583 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true },
6584 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true },
6585 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true },
6586 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true },
6587 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true }
6590 static struct alpha_builtin_def const two_arg_builtins[] = {
6591 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true },
6592 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true },
6593 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true },
6594 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true },
6595 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true },
6596 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true },
6597 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true },
6598 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true },
6599 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true },
6600 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true },
6601 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true },
6602 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true },
6603 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true },
6604 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true },
6605 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true },
6606 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true },
6607 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true },
6608 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true },
6609 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true },
6610 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true },
6611 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true },
6612 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true },
6613 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true },
6614 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true },
6615 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true },
6616 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true },
6617 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true },
6618 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true },
6619 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true },
6620 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true },
6621 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true },
6622 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true },
6623 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true },
6624 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true }
6627 static GTY(()) tree alpha_dimode_u;
6628 static GTY(()) tree alpha_v8qi_u;
6629 static GTY(()) tree alpha_v8qi_s;
6630 static GTY(()) tree alpha_v4hi_u;
6631 static GTY(()) tree alpha_v4hi_s;
6633 static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
6635 /* Return the alpha builtin for CODE. */
6637 static tree
6638 alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6640 if (code >= ALPHA_BUILTIN_max)
6641 return error_mark_node;
6642 return alpha_builtins[code];
6645 /* Helper function of alpha_init_builtins. Add the built-in specified
6646 by NAME, TYPE, CODE, and ECF. */
6648 static void
6649 alpha_builtin_function (const char *name, tree ftype,
6650 enum alpha_builtin code, unsigned ecf)
6652 tree decl = add_builtin_function (name, ftype, (int) code,
6653 BUILT_IN_MD, NULL, NULL_TREE);
6655 if (ecf & ECF_CONST)
6656 TREE_READONLY (decl) = 1;
6657 if (ecf & ECF_NOTHROW)
6658 TREE_NOTHROW (decl) = 1;
6660 alpha_builtins [(int) code] = decl;
6663 /* Helper function of alpha_init_builtins. Add the COUNT built-in
6664 functions pointed to by P, with function type FTYPE. */
6666 static void
6667 alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6668 tree ftype)
6670 size_t i;
6672 for (i = 0; i < count; ++i, ++p)
6673 if ((target_flags & p->target_mask) == p->target_mask)
6674 alpha_builtin_function (p->name, ftype, p->code,
6675 (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
6678 static void
6679 alpha_init_builtins (void)
6681 tree ftype;
6683 alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
6684 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6685 alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6686 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6687 alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6689 ftype = build_function_type_list (alpha_dimode_u, NULL_TREE);
6690 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
6692 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE);
6693 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
6695 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u,
6696 alpha_dimode_u, NULL_TREE);
6697 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
6699 if (TARGET_ABI_OPEN_VMS)
6701 ftype = build_function_type_list (ptr_type_node, ptr_type_node,
6702 NULL_TREE);
6703 alpha_builtin_function ("__builtin_establish_vms_condition_handler",
6704 ftype,
6705 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6708 ftype = build_function_type_list (ptr_type_node, void_type_node,
6709 NULL_TREE);
6710 alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
6711 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
6713 vms_patch_builtins ();
6717 /* Expand an expression EXP that calls a built-in function,
6718 with result going to TARGET if that's convenient
6719 (and in mode MODE if that's convenient).
6720 SUBTARGET may be used as the target for computing one of EXP's operands.
6721 IGNORE is nonzero if the value is to be ignored. */
6723 static rtx
6724 alpha_expand_builtin (tree exp, rtx target,
6725 rtx subtarget ATTRIBUTE_UNUSED,
6726 machine_mode mode ATTRIBUTE_UNUSED,
6727 int ignore ATTRIBUTE_UNUSED)
6729 #define MAX_ARGS 2
6731 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6732 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
6733 tree arg;
6734 call_expr_arg_iterator iter;
6735 enum insn_code icode;
6736 rtx op[MAX_ARGS], pat;
6737 int arity;
6738 bool nonvoid;
6740 if (fcode >= ALPHA_BUILTIN_max)
6741 internal_error ("bad builtin fcode");
6742 icode = code_for_builtin[fcode];
6743 if (icode == 0)
6744 internal_error ("bad builtin fcode");
6746 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6748 arity = 0;
6749 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6751 const struct insn_operand_data *insn_op;
6753 if (arg == error_mark_node)
6754 return NULL_RTX;
6755 if (arity > MAX_ARGS)
6756 return NULL_RTX;
6758 insn_op = &insn_data[icode].operand[arity + nonvoid];
6760 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
6762 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6763 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6764 arity++;
6767 if (nonvoid)
6769 machine_mode tmode = insn_data[icode].operand[0].mode;
6770 if (!target
6771 || GET_MODE (target) != tmode
6772 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6773 target = gen_reg_rtx (tmode);
6776 switch (arity)
6778 case 0:
6779 pat = GEN_FCN (icode) (target);
6780 break;
6781 case 1:
6782 if (nonvoid)
6783 pat = GEN_FCN (icode) (target, op[0]);
6784 else
6785 pat = GEN_FCN (icode) (op[0]);
6786 break;
6787 case 2:
6788 pat = GEN_FCN (icode) (target, op[0], op[1]);
6789 break;
6790 default:
6791 gcc_unreachable ();
6793 if (!pat)
6794 return NULL_RTX;
6795 emit_insn (pat);
6797 if (nonvoid)
6798 return target;
6799 else
6800 return const0_rtx;
6803 /* Fold the builtin for the CMPBGE instruction. This is a vector comparison
6804 with an 8-bit output vector. OPINT contains the integer operands; bit N
6805 of OP_CONST is set if OPINT[N] is valid. */
6807 static tree
6808 alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6810 if (op_const == 3)
6812 int i, val;
6813 for (i = 0, val = 0; i < 8; ++i)
6815 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6816 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6817 if (c0 >= c1)
6818 val |= 1 << i;
6820 return build_int_cst (alpha_dimode_u, val);
6822 else if (op_const == 2 && opint[1] == 0)
6823 return build_int_cst (alpha_dimode_u, 0xff);
6824 return NULL;
6827 /* Fold the builtin for the ZAPNOT instruction. This is essentially a
6828 specialized form of an AND operation. Other byte manipulation instructions
6829 are defined in terms of this instruction, so this is also used as a
6830 subroutine for other builtins.
6832 OP contains the tree operands; OPINT contains the extracted integer values.
6833 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only
6834 OPINT may be considered. */
6836 static tree
6837 alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6838 long op_const)
6840 if (op_const & 2)
6842 unsigned HOST_WIDE_INT mask = 0;
6843 int i;
6845 for (i = 0; i < 8; ++i)
6846 if ((opint[1] >> i) & 1)
6847 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6849 if (op_const & 1)
6850 return build_int_cst (alpha_dimode_u, opint[0] & mask);
6852 if (op)
6853 return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0],
6854 build_int_cst (alpha_dimode_u, mask));
6856 else if ((op_const & 1) && opint[0] == 0)
6857 return build_int_cst (alpha_dimode_u, 0);
6858 return NULL;
6861 /* Fold the builtins for the EXT family of instructions. */
6863 static tree
6864 alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6865 long op_const, unsigned HOST_WIDE_INT bytemask,
6866 bool is_high)
6868 long zap_const = 2;
6869 tree *zap_op = NULL;
6871 if (op_const & 2)
6873 unsigned HOST_WIDE_INT loc;
6875 loc = opint[1] & 7;
6876 loc *= BITS_PER_UNIT;
6878 if (loc != 0)
6880 if (op_const & 1)
6882 unsigned HOST_WIDE_INT temp = opint[0];
6883 if (is_high)
6884 temp <<= loc;
6885 else
6886 temp >>= loc;
6887 opint[0] = temp;
6888 zap_const = 3;
6891 else
6892 zap_op = op;
6895 opint[1] = bytemask;
6896 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6899 /* Fold the builtins for the INS family of instructions. */
6901 static tree
6902 alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6903 long op_const, unsigned HOST_WIDE_INT bytemask,
6904 bool is_high)
6906 if ((op_const & 1) && opint[0] == 0)
6907 return build_int_cst (alpha_dimode_u, 0);
6909 if (op_const & 2)
6911 unsigned HOST_WIDE_INT temp, loc, byteloc;
6912 tree *zap_op = NULL;
6914 loc = opint[1] & 7;
6915 bytemask <<= loc;
6917 temp = opint[0];
6918 if (is_high)
6920 byteloc = (64 - (loc * 8)) & 0x3f;
6921 if (byteloc == 0)
6922 zap_op = op;
6923 else
6924 temp >>= byteloc;
6925 bytemask >>= 8;
6927 else
6929 byteloc = loc * 8;
6930 if (byteloc == 0)
6931 zap_op = op;
6932 else
6933 temp <<= byteloc;
6936 opint[0] = temp;
6937 opint[1] = bytemask;
6938 return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6941 return NULL;
6944 static tree
6945 alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6946 long op_const, unsigned HOST_WIDE_INT bytemask,
6947 bool is_high)
6949 if (op_const & 2)
6951 unsigned HOST_WIDE_INT loc;
6953 loc = opint[1] & 7;
6954 bytemask <<= loc;
6956 if (is_high)
6957 bytemask >>= 8;
6959 opint[1] = bytemask ^ 0xff;
6962 return alpha_fold_builtin_zapnot (op, opint, op_const);
6965 static tree
6966 alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6968 tree op0 = fold_convert (vtype, op[0]);
6969 tree op1 = fold_convert (vtype, op[1]);
6970 tree val = fold_build2 (code, vtype, op0, op1);
6971 return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val);
6974 static tree
6975 alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6977 unsigned HOST_WIDE_INT temp = 0;
6978 int i;
6980 if (op_const != 3)
6981 return NULL;
6983 for (i = 0; i < 8; ++i)
6985 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6986 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6987 if (a >= b)
6988 temp += a - b;
6989 else
6990 temp += b - a;
6993 return build_int_cst (alpha_dimode_u, temp);
6996 static tree
6997 alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6999 unsigned HOST_WIDE_INT temp;
7001 if (op_const == 0)
7002 return NULL;
7004 temp = opint[0] & 0xff;
7005 temp |= (opint[0] >> 24) & 0xff00;
7007 return build_int_cst (alpha_dimode_u, temp);
7010 static tree
7011 alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
7013 unsigned HOST_WIDE_INT temp;
7015 if (op_const == 0)
7016 return NULL;
7018 temp = opint[0] & 0xff;
7019 temp |= (opint[0] >> 8) & 0xff00;
7020 temp |= (opint[0] >> 16) & 0xff0000;
7021 temp |= (opint[0] >> 24) & 0xff000000;
7023 return build_int_cst (alpha_dimode_u, temp);
7026 static tree
7027 alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
7029 unsigned HOST_WIDE_INT temp;
7031 if (op_const == 0)
7032 return NULL;
7034 temp = opint[0] & 0xff;
7035 temp |= (opint[0] & 0xff00) << 24;
7037 return build_int_cst (alpha_dimode_u, temp);
7040 static tree
7041 alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
7043 unsigned HOST_WIDE_INT temp;
7045 if (op_const == 0)
7046 return NULL;
7048 temp = opint[0] & 0xff;
7049 temp |= (opint[0] & 0x0000ff00) << 8;
7050 temp |= (opint[0] & 0x00ff0000) << 16;
7051 temp |= (opint[0] & 0xff000000) << 24;
7053 return build_int_cst (alpha_dimode_u, temp);
7056 static tree
7057 alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
7059 unsigned HOST_WIDE_INT temp;
7061 if (op_const == 0)
7062 return NULL;
7064 if (opint[0] == 0)
7065 temp = 64;
7066 else
7067 temp = exact_log2 (opint[0] & -opint[0]);
7069 return build_int_cst (alpha_dimode_u, temp);
7072 static tree
7073 alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
7075 unsigned HOST_WIDE_INT temp;
7077 if (op_const == 0)
7078 return NULL;
7080 if (opint[0] == 0)
7081 temp = 64;
7082 else
7083 temp = 64 - floor_log2 (opint[0]) - 1;
7085 return build_int_cst (alpha_dimode_u, temp);
7088 static tree
7089 alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
7091 unsigned HOST_WIDE_INT temp, op;
7093 if (op_const == 0)
7094 return NULL;
7096 op = opint[0];
7097 temp = 0;
7098 while (op)
7099 temp++, op &= op - 1;
7101 return build_int_cst (alpha_dimode_u, temp);
7104 /* Fold one of our builtin functions. */
7106 static tree
7107 alpha_fold_builtin (tree fndecl, int n_args, tree *op,
7108 bool ignore ATTRIBUTE_UNUSED)
7110 unsigned HOST_WIDE_INT opint[MAX_ARGS];
7111 long op_const = 0;
7112 int i;
7114 if (n_args > MAX_ARGS)
7115 return NULL;
7117 for (i = 0; i < n_args; i++)
7119 tree arg = op[i];
7120 if (arg == error_mark_node)
7121 return NULL;
7123 opint[i] = 0;
7124 if (TREE_CODE (arg) == INTEGER_CST)
7126 op_const |= 1L << i;
7127 opint[i] = int_cst_value (arg);
7131 switch (DECL_MD_FUNCTION_CODE (fndecl))
7133 case ALPHA_BUILTIN_CMPBGE:
7134 return alpha_fold_builtin_cmpbge (opint, op_const);
7136 case ALPHA_BUILTIN_EXTBL:
7137 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
7138 case ALPHA_BUILTIN_EXTWL:
7139 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
7140 case ALPHA_BUILTIN_EXTLL:
7141 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
7142 case ALPHA_BUILTIN_EXTQL:
7143 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
7144 case ALPHA_BUILTIN_EXTWH:
7145 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
7146 case ALPHA_BUILTIN_EXTLH:
7147 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
7148 case ALPHA_BUILTIN_EXTQH:
7149 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
7151 case ALPHA_BUILTIN_INSBL:
7152 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7153 case ALPHA_BUILTIN_INSWL:
7154 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7155 case ALPHA_BUILTIN_INSLL:
7156 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7157 case ALPHA_BUILTIN_INSQL:
7158 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7159 case ALPHA_BUILTIN_INSWH:
7160 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7161 case ALPHA_BUILTIN_INSLH:
7162 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7163 case ALPHA_BUILTIN_INSQH:
7164 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7166 case ALPHA_BUILTIN_MSKBL:
7167 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7168 case ALPHA_BUILTIN_MSKWL:
7169 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7170 case ALPHA_BUILTIN_MSKLL:
7171 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7172 case ALPHA_BUILTIN_MSKQL:
7173 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7174 case ALPHA_BUILTIN_MSKWH:
7175 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7176 case ALPHA_BUILTIN_MSKLH:
7177 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7178 case ALPHA_BUILTIN_MSKQH:
7179 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7181 case ALPHA_BUILTIN_ZAP:
7182 opint[1] ^= 0xff;
7183 /* FALLTHRU */
7184 case ALPHA_BUILTIN_ZAPNOT:
7185 return alpha_fold_builtin_zapnot (op, opint, op_const);
7187 case ALPHA_BUILTIN_MINUB8:
7188 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7189 case ALPHA_BUILTIN_MINSB8:
7190 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7191 case ALPHA_BUILTIN_MINUW4:
7192 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7193 case ALPHA_BUILTIN_MINSW4:
7194 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7195 case ALPHA_BUILTIN_MAXUB8:
7196 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7197 case ALPHA_BUILTIN_MAXSB8:
7198 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7199 case ALPHA_BUILTIN_MAXUW4:
7200 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7201 case ALPHA_BUILTIN_MAXSW4:
7202 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7204 case ALPHA_BUILTIN_PERR:
7205 return alpha_fold_builtin_perr (opint, op_const);
7206 case ALPHA_BUILTIN_PKLB:
7207 return alpha_fold_builtin_pklb (opint, op_const);
7208 case ALPHA_BUILTIN_PKWB:
7209 return alpha_fold_builtin_pkwb (opint, op_const);
7210 case ALPHA_BUILTIN_UNPKBL:
7211 return alpha_fold_builtin_unpkbl (opint, op_const);
7212 case ALPHA_BUILTIN_UNPKBW:
7213 return alpha_fold_builtin_unpkbw (opint, op_const);
7215 case ALPHA_BUILTIN_CTTZ:
7216 return alpha_fold_builtin_cttz (opint, op_const);
7217 case ALPHA_BUILTIN_CTLZ:
7218 return alpha_fold_builtin_ctlz (opint, op_const);
7219 case ALPHA_BUILTIN_CTPOP:
7220 return alpha_fold_builtin_ctpop (opint, op_const);
7222 case ALPHA_BUILTIN_AMASK:
7223 case ALPHA_BUILTIN_IMPLVER:
7224 case ALPHA_BUILTIN_RPCC:
7225 /* None of these are foldable at compile-time. */
7226 default:
7227 return NULL;
7231 bool
7232 alpha_gimple_fold_builtin (gimple_stmt_iterator *gsi)
7234 bool changed = false;
7235 gimple *stmt = gsi_stmt (*gsi);
7236 tree call = gimple_call_fn (stmt);
7237 gimple *new_stmt = NULL;
7239 if (call)
7241 tree fndecl = gimple_call_fndecl (stmt);
7243 if (fndecl)
7245 tree arg0, arg1;
7247 switch (DECL_MD_FUNCTION_CODE (fndecl))
7249 case ALPHA_BUILTIN_UMULH:
7250 arg0 = gimple_call_arg (stmt, 0);
7251 arg1 = gimple_call_arg (stmt, 1);
7253 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
7254 MULT_HIGHPART_EXPR, arg0, arg1);
7255 break;
7256 default:
7257 break;
7262 if (new_stmt)
7264 gsi_replace (gsi, new_stmt, true);
7265 changed = true;
7268 return changed;
7271 /* This page contains routines that are used to determine what the function
7272 prologue and epilogue code will do and write them out. */
7274 /* Compute the size of the save area in the stack. */
7276 /* These variables are used for communication between the following functions.
7277 They indicate various things about the current function being compiled
7278 that are used to tell what kind of prologue, epilogue and procedure
7279 descriptor to generate. */
7281 /* Nonzero if we need a stack procedure. */
7282 enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7283 static enum alpha_procedure_types alpha_procedure_type;
7285 /* Register number (either FP or SP) that is used to unwind the frame. */
7286 static int vms_unwind_regno;
7288 /* Register number used to save FP. We need not have one for RA since
7289 we don't modify it for register procedures. This is only defined
7290 for register frame procedures. */
7291 static int vms_save_fp_regno;
7293 /* Register number used to reference objects off our PV. */
7294 static int vms_base_regno;
7296 /* Compute register masks for saved registers, register save area size,
7297 and total frame size. */
7298 static void
7299 alpha_compute_frame_layout (void)
7301 unsigned HOST_WIDE_INT sa_mask = 0;
7302 HOST_WIDE_INT frame_size;
7303 int sa_size;
7305 /* When outputting a thunk, we don't have valid register life info,
7306 but assemble_start_function wants to output .frame and .mask
7307 directives. */
7308 if (!cfun->is_thunk)
7310 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7311 sa_mask |= HOST_WIDE_INT_1U << HARD_FRAME_POINTER_REGNUM;
7313 /* One for every register we have to save. */
7314 for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7315 if (! call_used_or_fixed_reg_p (i)
7316 && df_regs_ever_live_p (i) && i != REG_RA)
7317 sa_mask |= HOST_WIDE_INT_1U << i;
7319 /* We need to restore these for the handler. */
7320 if (crtl->calls_eh_return)
7322 for (unsigned i = 0; ; ++i)
7324 unsigned regno = EH_RETURN_DATA_REGNO (i);
7325 if (regno == INVALID_REGNUM)
7326 break;
7327 sa_mask |= HOST_WIDE_INT_1U << regno;
7331 /* If any register spilled, then spill the return address also. */
7332 /* ??? This is required by the Digital stack unwind specification
7333 and isn't needed if we're doing Dwarf2 unwinding. */
7334 if (sa_mask || alpha_ra_ever_killed ())
7335 sa_mask |= HOST_WIDE_INT_1U << REG_RA;
7338 sa_size = popcount_hwi(sa_mask);
7339 frame_size = get_frame_size ();
7341 if (TARGET_ABI_OPEN_VMS)
7343 /* Start with a stack procedure if we make any calls (REG_RA used), or
7344 need a frame pointer, with a register procedure if we otherwise need
7345 at least a slot, and with a null procedure in other cases. */
7346 if ((sa_mask >> REG_RA) & 1 || frame_pointer_needed)
7347 alpha_procedure_type = PT_STACK;
7348 else if (frame_size != 0)
7349 alpha_procedure_type = PT_REGISTER;
7350 else
7351 alpha_procedure_type = PT_NULL;
7353 /* Don't reserve space for saving FP & RA yet. Do that later after we've
7354 made the final decision on stack procedure vs register procedure. */
7355 if (alpha_procedure_type == PT_STACK)
7356 sa_size -= 2;
7358 /* Decide whether to refer to objects off our PV via FP or PV.
7359 If we need FP for something else or if we receive a nonlocal
7360 goto (which expects PV to contain the value), we must use PV.
7361 Otherwise, start by assuming we can use FP. */
7363 vms_base_regno
7364 = (frame_pointer_needed
7365 || cfun->has_nonlocal_label
7366 || alpha_procedure_type == PT_STACK
7367 || crtl->outgoing_args_size)
7368 ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7370 /* If we want to copy PV into FP, we need to find some register
7371 in which to save FP. */
7372 vms_save_fp_regno = -1;
7373 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7374 for (unsigned i = 0; i < 32; i++)
7375 if (! fixed_regs[i] && call_used_or_fixed_reg_p (i)
7376 && ! df_regs_ever_live_p (i))
7378 vms_save_fp_regno = i;
7379 break;
7382 /* A VMS condition handler requires a stack procedure in our
7383 implementation. (not required by the calling standard). */
7384 if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7385 || cfun->machine->uses_condition_handler)
7386 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7387 else if (alpha_procedure_type == PT_NULL)
7388 vms_base_regno = REG_PV;
7390 /* Stack unwinding should be done via FP unless we use it for PV. */
7391 vms_unwind_regno = (vms_base_regno == REG_PV
7392 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7394 /* If this is a stack procedure, allow space for saving FP, RA and
7395 a condition handler slot if needed. */
7396 if (alpha_procedure_type == PT_STACK)
7397 sa_size += 2 + cfun->machine->uses_condition_handler;
7399 else
7401 /* Our size must be even (multiple of 16 bytes). */
7402 if (sa_size & 1)
7403 sa_size++;
7405 sa_size *= 8;
7407 if (TARGET_ABI_OPEN_VMS)
7408 frame_size = ALPHA_ROUND (sa_size
7409 + (alpha_procedure_type == PT_STACK ? 8 : 0)
7410 + frame_size
7411 + crtl->args.pretend_args_size);
7412 else
7413 frame_size = (ALPHA_ROUND (crtl->outgoing_args_size)
7414 + sa_size
7415 + ALPHA_ROUND (frame_size + crtl->args.pretend_args_size));
7417 cfun->machine->sa_mask = sa_mask;
7418 cfun->machine->sa_size = sa_size;
7419 cfun->machine->frame_size = frame_size;
7422 #undef TARGET_COMPUTE_FRAME_LAYOUT
7423 #define TARGET_COMPUTE_FRAME_LAYOUT alpha_compute_frame_layout
7425 /* Return 1 if this function can directly return via $26. */
7427 bool
7428 direct_return (void)
7430 return (TARGET_ABI_OSF
7431 && reload_completed
7432 && cfun->machine->frame_size == 0);
7435 /* Define the offset between two registers, one to be eliminated,
7436 and the other its replacement, at the start of a routine. */
7438 HOST_WIDE_INT
7439 alpha_initial_elimination_offset (unsigned int from,
7440 unsigned int to ATTRIBUTE_UNUSED)
7442 HOST_WIDE_INT ret;
7444 ret = cfun->machine->sa_size;
7445 ret += ALPHA_ROUND (crtl->outgoing_args_size);
7447 switch (from)
7449 case FRAME_POINTER_REGNUM:
7450 break;
7452 case ARG_POINTER_REGNUM:
7453 ret += (ALPHA_ROUND (get_frame_size ()
7454 + crtl->args.pretend_args_size)
7455 - crtl->args.pretend_args_size);
7456 break;
7458 default:
7459 gcc_unreachable ();
7462 return ret;
7465 #if TARGET_ABI_OPEN_VMS
7467 /* Worker function for TARGET_CAN_ELIMINATE. */
7469 static bool
7470 alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7472 switch (alpha_procedure_type)
7474 case PT_NULL:
7475 /* NULL procedures have no frame of their own and we only
7476 know how to resolve from the current stack pointer. */
7477 return to == STACK_POINTER_REGNUM;
7479 case PT_REGISTER:
7480 case PT_STACK:
7481 /* We always eliminate except to the stack pointer if there is no
7482 usable frame pointer at hand. */
7483 return (to != STACK_POINTER_REGNUM
7484 || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
7487 gcc_unreachable ();
7490 /* FROM is to be eliminated for TO. Return the offset so that TO+offset
7491 designates the same location as FROM. */
7493 HOST_WIDE_INT
7494 alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
7496 /* The only possible attempts we ever expect are ARG or FRAME_PTR to
7497 HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide
7498 on the proper computations and will need the register save area size
7499 in most cases. */
7501 HOST_WIDE_INT sa_size = cfun->machine->sa_size;
7503 /* PT_NULL procedures have no frame of their own and we only allow
7504 elimination to the stack pointer. This is the argument pointer and we
7505 resolve the soft frame pointer to that as well. */
7507 if (alpha_procedure_type == PT_NULL)
7508 return 0;
7510 /* For a PT_STACK procedure the frame layout looks as follows
7512 -----> decreasing addresses
7514 < size rounded up to 16 | likewise >
7515 --------------#------------------------------+++--------------+++-------#
7516 incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
7517 --------------#---------------------------------------------------------#
7518 ^ ^ ^ ^
7519 ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR
7522 PT_REGISTER procedures are similar in that they may have a frame of their
7523 own. They have no regs-sa/pv/outgoing-args area.
7525 We first compute offset to HARD_FRAME_PTR, then add what we need to get
7526 to STACK_PTR if need be. */
7529 HOST_WIDE_INT offset;
7530 HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
7532 switch (from)
7534 case FRAME_POINTER_REGNUM:
7535 offset = ALPHA_ROUND (sa_size + pv_save_size);
7536 break;
7537 case ARG_POINTER_REGNUM:
7538 offset = (ALPHA_ROUND (sa_size + pv_save_size
7539 + get_frame_size ()
7540 + crtl->args.pretend_args_size)
7541 - crtl->args.pretend_args_size);
7542 break;
7543 default:
7544 gcc_unreachable ();
7547 if (to == STACK_POINTER_REGNUM)
7548 offset += ALPHA_ROUND (crtl->outgoing_args_size);
7550 return offset;
7554 #define COMMON_OBJECT "common_object"
7556 static tree
7557 common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
7558 tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
7559 bool *no_add_attrs ATTRIBUTE_UNUSED)
7561 tree decl = *node;
7562 gcc_assert (DECL_P (decl));
7564 DECL_COMMON (decl) = 1;
7565 return NULL_TREE;
7568 TARGET_GNU_ATTRIBUTES (vms_attribute_table,
7570 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
7571 affects_type_identity, handler, exclude } */
7572 { COMMON_OBJECT, 0, 1, true, false, false, false, common_object_handler,
7573 NULL }
7576 void
7577 vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
7578 unsigned HOST_WIDE_INT size,
7579 unsigned int align)
7581 tree attr = DECL_ATTRIBUTES (decl);
7582 fprintf (file, "%s", COMMON_ASM_OP);
7583 assemble_name (file, name);
7584 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
7585 /* ??? Unlike on OSF/1, the alignment factor is not in log units. */
7586 fprintf (file, ",%u", align / BITS_PER_UNIT);
7587 if (attr)
7589 attr = lookup_attribute (COMMON_OBJECT, attr);
7590 if (attr)
7591 fprintf (file, ",%s",
7592 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
7594 fputc ('\n', file);
7597 #undef COMMON_OBJECT
7599 #endif
7601 bool
7602 alpha_find_lo_sum_using_gp (rtx insn)
7604 subrtx_iterator::array_type array;
7605 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
7607 const_rtx x = *iter;
7608 if (GET_CODE (x) == LO_SUM && XEXP (x, 0) == pic_offset_table_rtx)
7609 return true;
7611 return false;
7614 static int
7615 alpha_does_function_need_gp (void)
7617 rtx_insn *insn;
7619 /* The GP being variable is an OSF abi thing. */
7620 if (! TARGET_ABI_OSF)
7621 return 0;
7623 /* We need the gp to load the address of __mcount. */
7624 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7625 return 1;
7627 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */
7628 if (cfun->is_thunk)
7629 return 1;
7631 /* The nonlocal receiver pattern assumes that the gp is valid for
7632 the nested function. Reasonable because it's almost always set
7633 correctly already. For the cases where that's wrong, make sure
7634 the nested function loads its gp on entry. */
7635 if (crtl->has_nonlocal_goto)
7636 return 1;
7638 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7639 Even if we are a static function, we still need to do this in case
7640 our address is taken and passed to something like qsort. */
7642 push_topmost_sequence ();
7643 insn = get_insns ();
7644 pop_topmost_sequence ();
7646 for (; insn; insn = NEXT_INSN (insn))
7647 if (NONDEBUG_INSN_P (insn)
7648 && GET_CODE (PATTERN (insn)) != USE
7649 && GET_CODE (PATTERN (insn)) != CLOBBER
7650 && get_attr_usegp (insn))
7651 return 1;
7653 return 0;
7656 /* Helper function for alpha_store_data_bypass_p, handle just a single SET
7657 IN_SET. */
7659 static bool
7660 alpha_store_data_bypass_p_1 (rtx_insn *out_insn, rtx in_set)
7662 if (!MEM_P (SET_DEST (in_set)))
7663 return false;
7665 rtx out_set = single_set (out_insn);
7666 if (out_set)
7667 return !reg_mentioned_p (SET_DEST (out_set), SET_DEST (in_set));
7669 rtx out_pat = PATTERN (out_insn);
7670 if (GET_CODE (out_pat) != PARALLEL)
7671 return false;
7673 for (int i = 0; i < XVECLEN (out_pat, 0); i++)
7675 rtx out_exp = XVECEXP (out_pat, 0, i);
7677 if (GET_CODE (out_exp) == CLOBBER || GET_CODE (out_exp) == USE
7678 || GET_CODE (out_exp) == TRAP_IF)
7679 continue;
7681 gcc_assert (GET_CODE (out_exp) == SET);
7683 if (reg_mentioned_p (SET_DEST (out_exp), SET_DEST (in_set)))
7684 return false;
7687 return true;
7690 /* True if the dependency between OUT_INSN and IN_INSN is on the store
7691 data not the address operand(s) of the store. IN_INSN and OUT_INSN
7692 must be either a single_set or a PARALLEL with SETs inside.
7694 This alpha-specific version of store_data_bypass_p ignores TRAP_IF
7695 that would result in assertion failure (and internal compiler error)
7696 in the generic store_data_bypass_p function. */
7699 alpha_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
7701 rtx in_set = single_set (in_insn);
7702 if (in_set)
7703 return alpha_store_data_bypass_p_1 (out_insn, in_set);
7705 rtx in_pat = PATTERN (in_insn);
7706 if (GET_CODE (in_pat) != PARALLEL)
7707 return false;
7709 for (int i = 0; i < XVECLEN (in_pat, 0); i++)
7711 rtx in_exp = XVECEXP (in_pat, 0, i);
7713 if (GET_CODE (in_exp) == CLOBBER || GET_CODE (in_exp) == USE
7714 || GET_CODE (in_exp) == TRAP_IF)
7715 continue;
7717 gcc_assert (GET_CODE (in_exp) == SET);
7719 if (!alpha_store_data_bypass_p_1 (out_insn, in_exp))
7720 return false;
7723 return true;
7726 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7727 sequences. */
7729 static rtx_insn *
7730 set_frame_related_p (void)
7732 rtx_insn *seq = get_insns ();
7733 rtx_insn *insn;
7735 end_sequence ();
7737 if (!seq)
7738 return NULL;
7740 if (INSN_P (seq))
7742 insn = seq;
7743 while (insn != NULL_RTX)
7745 RTX_FRAME_RELATED_P (insn) = 1;
7746 insn = NEXT_INSN (insn);
7748 seq = emit_insn (seq);
7750 else
7752 seq = emit_insn (seq);
7753 RTX_FRAME_RELATED_P (seq) = 1;
7755 return seq;
7758 #define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
7760 /* Generates a store with the proper unwind info attached. VALUE is
7761 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG
7762 contains SP+FRAME_BIAS, and that is the unwind info that should be
7763 generated. If FRAME_REG != VALUE, then VALUE is being stored on
7764 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */
7766 static void
7767 emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7768 HOST_WIDE_INT base_ofs, rtx frame_reg)
7770 rtx addr, mem;
7771 rtx_insn *insn;
7773 addr = plus_constant (Pmode, base_reg, base_ofs);
7774 mem = gen_frame_mem (DImode, addr);
7776 insn = emit_move_insn (mem, value);
7777 RTX_FRAME_RELATED_P (insn) = 1;
7779 if (frame_bias || value != frame_reg)
7781 if (frame_bias)
7783 addr = plus_constant (Pmode, stack_pointer_rtx,
7784 frame_bias + base_ofs);
7785 mem = gen_rtx_MEM (DImode, addr);
7788 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7789 gen_rtx_SET (mem, frame_reg));
7793 static void
7794 emit_frame_store (unsigned int regno, rtx base_reg,
7795 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7797 rtx reg = gen_rtx_REG (DImode, regno);
7798 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7801 /* Write function prologue. */
7803 /* On vms we have two kinds of functions:
7805 - stack frame (PROC_STACK)
7806 these are 'normal' functions with local vars and which are
7807 calling other functions
7808 - register frame (PROC_REGISTER)
7809 keeps all data in registers, needs no stack
7811 We must pass this to the assembler so it can generate the
7812 proper pdsc (procedure descriptor)
7813 This is done with the '.pdesc' command.
7815 On not-vms, we don't really differentiate between the two, as we can
7816 simply allocate stack without saving registers. */
7818 void
7819 alpha_expand_prologue (void)
7821 /* Registers to save. */
7822 unsigned HOST_WIDE_INT sa_mask = cfun->machine->sa_mask;
7823 /* Stack space needed for pushing registers clobbered by us. */
7824 HOST_WIDE_INT sa_size = cfun->machine->sa_size;
7825 /* Complete stack size needed. */
7826 HOST_WIDE_INT frame_size = cfun->machine->frame_size;
7827 /* Probed stack size; it additionally includes the size of
7828 the "reserve region" if any. */
7829 HOST_WIDE_INT probed_size, sa_bias;
7830 /* Offset from base reg to register save area. */
7831 HOST_WIDE_INT reg_offset;
7832 rtx sa_reg;
7834 if (flag_stack_usage_info)
7835 current_function_static_stack_size = frame_size;
7837 if (TARGET_ABI_OPEN_VMS)
7838 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7839 else
7840 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7842 /* Emit an insn to reload GP, if needed. */
7843 if (TARGET_ABI_OSF)
7845 alpha_function_needs_gp = alpha_does_function_need_gp ();
7846 if (alpha_function_needs_gp)
7847 emit_insn (gen_prologue_ldgp ());
7850 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7851 the call to mcount ourselves, rather than having the linker do it
7852 magically in response to -pg. Since _mcount has special linkage,
7853 don't represent the call as a call. */
7854 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7855 emit_insn (gen_prologue_mcount ());
7857 /* Adjust the stack by the frame size. If the frame size is > 4096
7858 bytes, we need to be sure we probe somewhere in the first and last
7859 4096 bytes (we can probably get away without the latter test) and
7860 every 8192 bytes in between. If the frame size is > 32768, we
7861 do this in a loop. Otherwise, we generate the explicit probe
7862 instructions.
7864 Note that we are only allowed to adjust sp once in the prologue. */
7866 probed_size = frame_size;
7867 if (flag_stack_check || flag_stack_clash_protection)
7868 probed_size += get_stack_check_protect ();
7870 if (probed_size <= 32768)
7872 if (probed_size > 4096)
7874 int probed;
7876 for (probed = 4096; probed < probed_size; probed += 8192)
7877 emit_insn (gen_stack_probe_internal (GEN_INT (-probed)));
7879 /* We only have to do this probe if we aren't saving registers or
7880 if we are probing beyond the frame because of -fstack-check. */
7881 if ((sa_size == 0 && probed_size > probed - 4096)
7882 || flag_stack_check || flag_stack_clash_protection)
7883 emit_insn (gen_stack_probe_internal (GEN_INT (-probed_size)));
7886 if (frame_size != 0)
7887 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7888 GEN_INT (-frame_size))));
7890 else
7892 /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7893 number of 8192 byte blocks to probe. We then probe each block
7894 in the loop and then set SP to the proper location. If the
7895 amount remaining is > 4096, we have to do one more probe if we
7896 are not saving any registers or if we are probing beyond the
7897 frame because of -fstack-check. */
7899 HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
7900 HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
7901 rtx ptr = gen_rtx_REG (DImode, 22);
7902 rtx count = gen_rtx_REG (DImode, 23);
7903 rtx seq;
7905 emit_move_insn (count, GEN_INT (blocks));
7906 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
7908 /* Because of the difficulty in emitting a new basic block this
7909 late in the compilation, generate the loop as a single insn. */
7910 emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7912 if ((leftover > 4096 && sa_size == 0)
7913 || flag_stack_check || flag_stack_clash_protection)
7915 rtx last = gen_rtx_MEM (DImode,
7916 plus_constant (Pmode, ptr, -leftover));
7917 MEM_VOLATILE_P (last) = 1;
7918 emit_move_insn (last, const0_rtx);
7921 if (flag_stack_check || flag_stack_clash_protection)
7923 /* If -fstack-check is specified we have to load the entire
7924 constant into a register and subtract from the sp in one go,
7925 because the probed stack size is not equal to the frame size. */
7926 HOST_WIDE_INT lo, hi;
7927 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7928 hi = frame_size - lo;
7930 emit_move_insn (ptr, GEN_INT (hi));
7931 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7932 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7933 ptr));
7935 else
7937 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7938 GEN_INT (-leftover)));
7941 /* This alternative is special, because the DWARF code cannot
7942 possibly intuit through the loop above. So we invent this
7943 note it looks at instead. */
7944 RTX_FRAME_RELATED_P (seq) = 1;
7945 add_reg_note (seq, REG_FRAME_RELATED_EXPR,
7946 gen_rtx_SET (stack_pointer_rtx,
7947 plus_constant (Pmode, stack_pointer_rtx,
7948 -frame_size)));
7951 /* Cope with very large offsets to the register save area. */
7952 sa_bias = 0;
7953 sa_reg = stack_pointer_rtx;
7954 if (reg_offset + sa_size > 0x8000)
7956 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7957 rtx sa_bias_rtx;
7959 if (low + sa_size <= 0x8000)
7960 sa_bias = reg_offset - low, reg_offset = low;
7961 else
7962 sa_bias = reg_offset, reg_offset = 0;
7964 sa_reg = gen_rtx_REG (DImode, 24);
7965 sa_bias_rtx = GEN_INT (sa_bias);
7967 if (add_operand (sa_bias_rtx, DImode))
7968 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7969 else
7971 emit_move_insn (sa_reg, sa_bias_rtx);
7972 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7976 /* Save regs in stack order. Beginning with VMS PV. */
7977 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7978 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7980 /* Save register RA next, followed by any other registers
7981 that need to be saved. */
7982 for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi(sa_mask))
7984 emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7985 reg_offset += 8;
7986 sa_mask &= ~(HOST_WIDE_INT_1U << i);
7989 if (TARGET_ABI_OPEN_VMS)
7991 /* Register frame procedures save the fp. */
7992 if (alpha_procedure_type == PT_REGISTER)
7994 rtx_insn *insn =
7995 emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7996 hard_frame_pointer_rtx);
7997 add_reg_note (insn, REG_CFA_REGISTER, NULL);
7998 RTX_FRAME_RELATED_P (insn) = 1;
8001 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
8002 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
8003 gen_rtx_REG (DImode, REG_PV)));
8005 if (alpha_procedure_type != PT_NULL
8006 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
8007 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
8009 /* If we have to allocate space for outgoing args, do it now. */
8010 if (crtl->outgoing_args_size != 0)
8012 rtx_insn *seq
8013 = emit_move_insn (stack_pointer_rtx,
8014 plus_constant
8015 (Pmode, hard_frame_pointer_rtx,
8016 - (ALPHA_ROUND
8017 (crtl->outgoing_args_size))));
8019 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
8020 if ! frame_pointer_needed. Setting the bit will change the CFA
8021 computation rule to use sp again, which would be wrong if we had
8022 frame_pointer_needed, as this means sp might move unpredictably
8023 later on.
8025 Also, note that
8026 frame_pointer_needed
8027 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8029 crtl->outgoing_args_size != 0
8030 => alpha_procedure_type != PT_NULL,
8032 so when we are not setting the bit here, we are guaranteed to
8033 have emitted an FRP frame pointer update just before. */
8034 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
8037 else
8039 /* If we need a frame pointer, set it from the stack pointer. */
8040 if (frame_pointer_needed)
8042 if (TARGET_CAN_FAULT_IN_PROLOGUE)
8043 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
8044 else
8045 /* This must always be the last instruction in the
8046 prologue, thus we emit a special move + clobber. */
8047 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
8048 stack_pointer_rtx, sa_reg)));
8052 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
8053 the prologue, for exception handling reasons, we cannot do this for
8054 any insn that might fault. We could prevent this for mems with a
8055 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we
8056 have to prevent all such scheduling with a blockage.
8058 Linux, on the other hand, never bothered to implement OSF/1's
8059 exception handling, and so doesn't care about such things. Anyone
8060 planning to use dwarf2 frame-unwind info can also omit the blockage. */
8062 if (! TARGET_CAN_FAULT_IN_PROLOGUE)
8063 emit_insn (gen_blockage ());
8066 /* Count the number of .file directives, so that .loc is up to date. */
8067 int num_source_filenames = 0;
8069 /* Output the textual info surrounding the prologue. */
8071 void
8072 alpha_start_function (FILE *file, const char *fnname, tree decl)
8074 unsigned long imask, fmask;
8075 /* Complete stack size needed. */
8076 HOST_WIDE_INT frame_size = cfun->machine->frame_size;
8077 /* The maximum debuggable frame size. */
8078 const HOST_WIDE_INT max_frame_size = HOST_WIDE_INT_1 << 31;
8079 /* Offset from base reg to register save area. */
8080 HOST_WIDE_INT reg_offset;
8081 char *entry_label = (char *) alloca (strlen (fnname) + 6);
8082 char *tramp_label = (char *) alloca (strlen (fnname) + 6);
8083 int i;
8085 #if TARGET_ABI_OPEN_VMS
8086 vms_start_function (fnname);
8087 #endif
8089 alpha_fnname = fnname;
8091 if (TARGET_ABI_OPEN_VMS)
8092 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8093 else
8094 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8096 imask = cfun->machine->sa_mask & 0xffffffffu;
8097 fmask = cfun->machine->sa_mask >> 32;
8099 /* Issue function start and label. */
8100 if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
8102 fputs ("\t.ent ", file);
8103 assemble_name (file, fnname);
8104 putc ('\n', file);
8106 /* If the function needs GP, we'll write the "..ng" label there.
8107 Otherwise, do it here. */
8108 if (TARGET_ABI_OSF
8109 && ! alpha_function_needs_gp
8110 && ! cfun->is_thunk)
8112 putc ('$', file);
8113 assemble_name (file, fnname);
8114 fputs ("..ng:\n", file);
8117 /* Nested functions on VMS that are potentially called via trampoline
8118 get a special transfer entry point that loads the called functions
8119 procedure descriptor and static chain. */
8120 if (TARGET_ABI_OPEN_VMS
8121 && !TREE_PUBLIC (decl)
8122 && DECL_CONTEXT (decl)
8123 && !TYPE_P (DECL_CONTEXT (decl))
8124 && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
8126 strcpy (tramp_label, fnname);
8127 strcat (tramp_label, "..tr");
8128 ASM_OUTPUT_LABEL (file, tramp_label);
8129 fprintf (file, "\tldq $1,24($27)\n");
8130 fprintf (file, "\tldq $27,16($27)\n");
8133 strcpy (entry_label, fnname);
8134 if (TARGET_ABI_OPEN_VMS)
8135 strcat (entry_label, "..en");
8137 ASM_OUTPUT_FUNCTION_LABEL (file, entry_label, decl);
8138 inside_function = TRUE;
8140 if (TARGET_ABI_OPEN_VMS)
8141 fprintf (file, "\t.base $%d\n", vms_base_regno);
8143 if (TARGET_ABI_OSF
8144 && TARGET_IEEE_CONFORMANT
8145 && !flag_inhibit_size_directive)
8147 /* Set flags in procedure descriptor to request IEEE-conformant
8148 math-library routines. The value we set it to is PDSC_EXC_IEEE
8149 (/usr/include/pdsc.h). */
8150 fputs ("\t.eflag 48\n", file);
8153 /* Set up offsets to alpha virtual arg/local debugging pointer. */
8154 alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
8155 alpha_arg_offset = -frame_size + 48;
8157 /* Describe our frame. If the frame size is larger than an integer,
8158 print it as zero to avoid an assembler error. We won't be
8159 properly describing such a frame, but that's the best we can do. */
8160 if (TARGET_ABI_OPEN_VMS)
8161 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
8162 HOST_WIDE_INT_PRINT_DEC "\n",
8163 vms_unwind_regno,
8164 frame_size >= max_frame_size ? 0 : frame_size,
8165 reg_offset);
8166 else if (!flag_inhibit_size_directive)
8167 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
8168 (frame_pointer_needed
8169 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
8170 frame_size >= max_frame_size ? 0 : frame_size,
8171 crtl->args.pretend_args_size);
8173 /* Describe which registers were spilled. */
8174 if (TARGET_ABI_OPEN_VMS)
8176 if (imask)
8177 /* ??? Does VMS care if mask contains ra? The old code didn't
8178 set it, so I don't here. */
8179 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
8180 if (fmask)
8181 fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
8182 if (alpha_procedure_type == PT_REGISTER)
8183 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
8185 else if (!flag_inhibit_size_directive)
8187 if (imask)
8189 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
8190 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8192 for (i = 0; i < 32; ++i)
8193 if (imask & (1UL << i))
8194 reg_offset += 8;
8197 if (fmask)
8198 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
8199 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8202 #if TARGET_ABI_OPEN_VMS
8203 /* If a user condition handler has been installed at some point, emit
8204 the procedure descriptor bits to point the Condition Handling Facility
8205 at the indirection wrapper, and state the fp offset at which the user
8206 handler may be found. */
8207 if (cfun->machine->uses_condition_handler)
8209 fprintf (file, "\t.handler __gcc_shell_handler\n");
8210 fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
8213 #ifdef TARGET_VMS_CRASH_DEBUG
8214 /* Support of minimal traceback info. */
8215 switch_to_section (readonly_data_section);
8216 fprintf (file, "\t.align 3\n");
8217 assemble_name (file, fnname); fputs ("..na:\n", file);
8218 fputs ("\t.ascii \"", file);
8219 assemble_name (file, fnname);
8220 fputs ("\\0\"\n", file);
8221 switch_to_section (text_section);
8222 #endif
8223 #endif /* TARGET_ABI_OPEN_VMS */
8226 /* Emit the .prologue note at the scheduled end of the prologue. */
8228 static void
8229 alpha_output_function_end_prologue (FILE *file)
8231 if (TARGET_ABI_OPEN_VMS)
8232 fputs ("\t.prologue\n", file);
8233 else if (!flag_inhibit_size_directive)
8234 fprintf (file, "\t.prologue %d\n",
8235 alpha_function_needs_gp || cfun->is_thunk);
8238 /* Write function epilogue. */
8240 void
8241 alpha_expand_epilogue (void)
8243 /* Registers to save. */
8244 unsigned HOST_WIDE_INT sa_mask = cfun->machine->sa_mask;
8245 /* Stack space needed for pushing registers clobbered by us. */
8246 HOST_WIDE_INT sa_size = cfun->machine->sa_size;
8247 /* Complete stack size needed. */
8248 HOST_WIDE_INT frame_size = cfun->machine->frame_size;
8249 /* Offset from base reg to register save area. */
8250 HOST_WIDE_INT reg_offset;
8251 int fp_is_frame_pointer, fp_offset;
8252 rtx sa_reg, sa_reg_exp = NULL;
8253 rtx sp_adj1, sp_adj2, mem, reg, insn;
8254 rtx eh_ofs;
8255 rtx cfa_restores = NULL_RTX;
8257 if (TARGET_ABI_OPEN_VMS)
8259 if (alpha_procedure_type == PT_STACK)
8260 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8261 else
8262 reg_offset = 0;
8264 else
8265 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8267 fp_is_frame_pointer
8268 = (TARGET_ABI_OPEN_VMS
8269 ? alpha_procedure_type == PT_STACK
8270 : frame_pointer_needed);
8271 fp_offset = 0;
8272 sa_reg = stack_pointer_rtx;
8274 if (crtl->calls_eh_return)
8275 eh_ofs = EH_RETURN_STACKADJ_RTX;
8276 else
8277 eh_ofs = NULL_RTX;
8279 if (sa_size)
8281 /* If we have a frame pointer, restore SP from it. */
8282 if (TARGET_ABI_OPEN_VMS
8283 ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8284 : frame_pointer_needed)
8285 emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
8287 /* Cope with very large offsets to the register save area. */
8288 if (reg_offset + sa_size > 0x8000)
8290 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8291 HOST_WIDE_INT bias;
8293 if (low + sa_size <= 0x8000)
8294 bias = reg_offset - low, reg_offset = low;
8295 else
8296 bias = reg_offset, reg_offset = 0;
8298 sa_reg = gen_rtx_REG (DImode, 22);
8299 sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
8301 emit_move_insn (sa_reg, sa_reg_exp);
8304 /* Restore registers in order, excepting a true frame pointer. */
8305 for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi(sa_mask))
8307 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8308 fp_offset = reg_offset;
8309 else
8311 mem = gen_frame_mem (DImode,
8312 plus_constant (Pmode, sa_reg,
8313 reg_offset));
8314 reg = gen_rtx_REG (DImode, i);
8315 emit_move_insn (reg, mem);
8316 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
8317 cfa_restores);
8319 reg_offset += 8;
8320 sa_mask &= ~(HOST_WIDE_INT_1U << i);
8324 if (frame_size || eh_ofs)
8326 sp_adj1 = stack_pointer_rtx;
8328 if (eh_ofs)
8330 sp_adj1 = gen_rtx_REG (DImode, 23);
8331 emit_move_insn (sp_adj1,
8332 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8335 /* If the stack size is large, begin computation into a temporary
8336 register so as not to interfere with a potential fp restore,
8337 which must be consecutive with an SP restore. */
8338 if (frame_size < 32768 && !cfun->calls_alloca)
8339 sp_adj2 = GEN_INT (frame_size);
8340 else if (frame_size < 0x40007fffL)
8342 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8344 sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
8345 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8346 sp_adj1 = sa_reg;
8347 else
8349 sp_adj1 = gen_rtx_REG (DImode, 23);
8350 emit_move_insn (sp_adj1, sp_adj2);
8352 sp_adj2 = GEN_INT (low);
8354 else
8356 rtx tmp = gen_rtx_REG (DImode, 23);
8357 sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
8358 if (!sp_adj2)
8360 /* We can't drop new things to memory this late, afaik,
8361 so build it up by pieces. */
8362 sp_adj2 = alpha_emit_set_long_const (tmp, frame_size);
8363 gcc_assert (sp_adj2);
8367 /* From now on, things must be in order. So emit blockages. */
8369 /* Restore the frame pointer. */
8370 if (fp_is_frame_pointer)
8372 emit_insn (gen_blockage ());
8373 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg,
8374 fp_offset));
8375 emit_move_insn (hard_frame_pointer_rtx, mem);
8376 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8377 hard_frame_pointer_rtx, cfa_restores);
8379 else if (TARGET_ABI_OPEN_VMS)
8381 emit_insn (gen_blockage ());
8382 emit_move_insn (hard_frame_pointer_rtx,
8383 gen_rtx_REG (DImode, vms_save_fp_regno));
8384 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8385 hard_frame_pointer_rtx, cfa_restores);
8388 /* Restore the stack pointer. */
8389 emit_insn (gen_blockage ());
8390 if (sp_adj2 == const0_rtx)
8391 insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
8392 else
8393 insn = emit_move_insn (stack_pointer_rtx,
8394 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
8395 REG_NOTES (insn) = cfa_restores;
8396 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
8397 RTX_FRAME_RELATED_P (insn) = 1;
8399 else
8401 gcc_assert (cfa_restores == NULL);
8403 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8405 emit_insn (gen_blockage ());
8406 insn = emit_move_insn (hard_frame_pointer_rtx,
8407 gen_rtx_REG (DImode, vms_save_fp_regno));
8408 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8409 RTX_FRAME_RELATED_P (insn) = 1;
8414 /* Output the rest of the textual info surrounding the epilogue. */
8416 void
8417 alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8419 rtx_insn *insn;
8421 /* We output a nop after noreturn calls at the very end of the function to
8422 ensure that the return address always remains in the caller's code range,
8423 as not doing so might confuse unwinding engines. */
8424 insn = get_last_insn ();
8425 if (!INSN_P (insn))
8426 insn = prev_active_insn (insn);
8427 if (insn && CALL_P (insn))
8428 output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
8430 #if TARGET_ABI_OPEN_VMS
8431 /* Write the linkage entries. */
8432 alpha_write_linkage (file, fnname);
8433 #endif
8435 /* End the function. */
8436 if (TARGET_ABI_OPEN_VMS
8437 || !flag_inhibit_size_directive)
8439 fputs ("\t.end ", file);
8440 assemble_name (file, fnname);
8441 putc ('\n', file);
8443 inside_function = FALSE;
8446 #if TARGET_ABI_OSF
8447 /* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8449 In order to avoid the hordes of differences between generated code
8450 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8451 lots of code loading up large constants, generate rtl and emit it
8452 instead of going straight to text.
8454 Not sure why this idea hasn't been explored before... */
8456 static void
8457 alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8458 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8459 tree function)
8461 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8462 HOST_WIDE_INT hi, lo;
8463 rtx this_rtx, funexp;
8464 rtx_insn *insn;
8466 /* We always require a valid GP. */
8467 emit_insn (gen_prologue_ldgp ());
8468 emit_note (NOTE_INSN_PROLOGUE_END);
8470 /* Find the "this" pointer. If the function returns a structure,
8471 the structure return pointer is in $16. */
8472 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8473 this_rtx = gen_rtx_REG (Pmode, 17);
8474 else
8475 this_rtx = gen_rtx_REG (Pmode, 16);
8477 /* Add DELTA. When possible we use ldah+lda. Otherwise load the
8478 entire constant for the add. */
8479 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8480 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8481 if (hi + lo == delta)
8483 if (hi)
8484 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
8485 if (lo)
8486 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
8488 else
8490 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0), delta);
8491 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8494 /* Add a delta stored in the vtable at VCALL_OFFSET. */
8495 if (vcall_offset)
8497 rtx tmp, tmp2;
8499 tmp = gen_rtx_REG (Pmode, 0);
8500 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
8502 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8503 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8504 if (hi + lo == vcall_offset)
8506 if (hi)
8507 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8509 else
8511 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8512 vcall_offset);
8513 emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8514 lo = 0;
8516 if (lo)
8517 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8518 else
8519 tmp2 = tmp;
8520 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8522 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8525 /* Generate a tail call to the target function. */
8526 if (! TREE_USED (function))
8528 assemble_external (function);
8529 TREE_USED (function) = 1;
8531 funexp = XEXP (DECL_RTL (function), 0);
8532 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8533 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8534 SIBLING_CALL_P (insn) = 1;
8536 /* Run just enough of rest_of_compilation to get the insns emitted.
8537 There's not really enough bulk here to make other passes such as
8538 instruction scheduling worth while. */
8539 insn = get_insns ();
8540 shorten_branches (insn);
8541 assemble_start_function (thunk_fndecl, fnname);
8542 final_start_function (insn, file, 1);
8543 final (insn, file, 1);
8544 final_end_function ();
8545 assemble_end_function (thunk_fndecl, fnname);
8547 #endif /* TARGET_ABI_OSF */
8549 /* Name of the file containing the current function. */
8551 static const char *current_function_file = "";
8553 /* Offsets to alpha virtual arg/local debugging pointers. */
8555 long alpha_arg_offset;
8556 long alpha_auto_offset;
8558 /* Emit a new filename to a stream. */
8560 void
8561 alpha_output_filename (FILE *stream, const char *name)
8563 static int first_time = TRUE;
8565 if (first_time)
8567 first_time = FALSE;
8568 ++num_source_filenames;
8569 current_function_file = name;
8570 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8571 output_quoted_string (stream, name);
8572 fprintf (stream, "\n");
8575 else if (name != current_function_file
8576 && strcmp (name, current_function_file) != 0)
8578 ++num_source_filenames;
8579 current_function_file = name;
8580 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8582 output_quoted_string (stream, name);
8583 fprintf (stream, "\n");
8587 /* Structure to show the current status of registers and memory. */
8589 struct shadow_summary
8591 struct {
8592 unsigned int i : 31; /* Mask of int regs */
8593 unsigned int fp : 31; /* Mask of fp regs */
8594 unsigned int mem : 1; /* mem == imem | fpmem */
8595 } used, defd;
8598 /* Summary the effects of expression X on the machine. Update SUM, a pointer
8599 to the summary structure. SET is nonzero if the insn is setting the
8600 object, otherwise zero. */
8602 static void
8603 summarize_insn (rtx x, struct shadow_summary *sum, int set)
8605 const char *format_ptr;
8606 int i, j;
8608 if (x == 0)
8609 return;
8611 switch (GET_CODE (x))
8613 /* ??? Note that this case would be incorrect if the Alpha had a
8614 ZERO_EXTRACT in SET_DEST. */
8615 case SET:
8616 summarize_insn (SET_SRC (x), sum, 0);
8617 summarize_insn (SET_DEST (x), sum, 1);
8618 break;
8620 case CLOBBER:
8621 summarize_insn (XEXP (x, 0), sum, 1);
8622 break;
8624 case USE:
8625 summarize_insn (XEXP (x, 0), sum, 0);
8626 break;
8628 case ASM_OPERANDS:
8629 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8630 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8631 break;
8633 case PARALLEL:
8634 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8635 summarize_insn (XVECEXP (x, 0, i), sum, 0);
8636 break;
8638 case SUBREG:
8639 summarize_insn (SUBREG_REG (x), sum, 0);
8640 break;
8642 case REG:
8644 int regno = REGNO (x);
8645 unsigned long mask = ((unsigned long) 1) << (regno % 32);
8647 if (regno == 31 || regno == 63)
8648 break;
8650 if (set)
8652 if (regno < 32)
8653 sum->defd.i |= mask;
8654 else
8655 sum->defd.fp |= mask;
8657 else
8659 if (regno < 32)
8660 sum->used.i |= mask;
8661 else
8662 sum->used.fp |= mask;
8665 break;
8667 case MEM:
8668 if (set)
8669 sum->defd.mem = 1;
8670 else
8671 sum->used.mem = 1;
8673 /* Find the regs used in memory address computation: */
8674 summarize_insn (XEXP (x, 0), sum, 0);
8675 break;
8677 case CONST_INT: case CONST_WIDE_INT: case CONST_DOUBLE:
8678 case SYMBOL_REF: case LABEL_REF: case CONST:
8679 case SCRATCH: case ASM_INPUT:
8680 break;
8682 /* Handle common unary and binary ops for efficiency. */
8683 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
8684 case MOD: case UDIV: case UMOD: case AND: case IOR:
8685 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
8686 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
8687 case NE: case EQ: case GE: case GT: case LE:
8688 case LT: case GEU: case GTU: case LEU: case LTU:
8689 summarize_insn (XEXP (x, 0), sum, 0);
8690 summarize_insn (XEXP (x, 1), sum, 0);
8691 break;
8693 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
8694 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
8695 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
8696 case SQRT: case FFS:
8697 summarize_insn (XEXP (x, 0), sum, 0);
8698 break;
8700 default:
8701 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8702 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8703 switch (format_ptr[i])
8705 case 'e':
8706 summarize_insn (XEXP (x, i), sum, 0);
8707 break;
8709 case 'E':
8710 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8711 summarize_insn (XVECEXP (x, i, j), sum, 0);
8712 break;
8714 case 'i':
8715 case 'L':
8716 break;
8718 default:
8719 gcc_unreachable ();
8724 /* Ensure a sufficient number of `trapb' insns are in the code when
8725 the user requests code with a trap precision of functions or
8726 instructions.
8728 In naive mode, when the user requests a trap-precision of
8729 "instruction", a trapb is needed after every instruction that may
8730 generate a trap. This ensures that the code is resumption safe but
8731 it is also slow.
8733 When optimizations are turned on, we delay issuing a trapb as long
8734 as possible. In this context, a trap shadow is the sequence of
8735 instructions that starts with a (potentially) trap generating
8736 instruction and extends to the next trapb or call_pal instruction
8737 (but GCC never generates call_pal by itself). We can delay (and
8738 therefore sometimes omit) a trapb subject to the following
8739 conditions:
8741 (a) On entry to the trap shadow, if any Alpha register or memory
8742 location contains a value that is used as an operand value by some
8743 instruction in the trap shadow (live on entry), then no instruction
8744 in the trap shadow may modify the register or memory location.
8746 (b) Within the trap shadow, the computation of the base register
8747 for a memory load or store instruction may not involve using the
8748 result of an instruction that might generate an UNPREDICTABLE
8749 result.
8751 (c) Within the trap shadow, no register may be used more than once
8752 as a destination register. (This is to make life easier for the
8753 trap-handler.)
8755 (d) The trap shadow may not include any branch instructions. */
8757 static void
8758 alpha_handle_trap_shadows (void)
8760 struct shadow_summary shadow;
8761 int trap_pending, exception_nesting;
8762 rtx_insn *i, *n;
8764 trap_pending = 0;
8765 exception_nesting = 0;
8766 shadow.used.i = 0;
8767 shadow.used.fp = 0;
8768 shadow.used.mem = 0;
8769 shadow.defd = shadow.used;
8771 for (i = get_insns (); i ; i = NEXT_INSN (i))
8773 if (NOTE_P (i))
8775 switch (NOTE_KIND (i))
8777 case NOTE_INSN_EH_REGION_BEG:
8778 exception_nesting++;
8779 if (trap_pending)
8780 goto close_shadow;
8781 break;
8783 case NOTE_INSN_EH_REGION_END:
8784 exception_nesting--;
8785 if (trap_pending)
8786 goto close_shadow;
8787 break;
8789 case NOTE_INSN_EPILOGUE_BEG:
8790 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8791 goto close_shadow;
8792 break;
8795 else if (trap_pending)
8797 if (alpha_tp == ALPHA_TP_FUNC)
8799 if (JUMP_P (i)
8800 && GET_CODE (PATTERN (i)) == RETURN)
8801 goto close_shadow;
8803 else if (alpha_tp == ALPHA_TP_INSN)
8805 if (optimize > 0)
8807 struct shadow_summary sum;
8809 sum.used.i = 0;
8810 sum.used.fp = 0;
8811 sum.used.mem = 0;
8812 sum.defd = sum.used;
8814 switch (GET_CODE (i))
8816 case INSN:
8817 /* Annoyingly, get_attr_trap will die on these. */
8818 if (GET_CODE (PATTERN (i)) == USE
8819 || GET_CODE (PATTERN (i)) == CLOBBER)
8820 break;
8822 summarize_insn (PATTERN (i), &sum, 0);
8824 if ((sum.defd.i & shadow.defd.i)
8825 || (sum.defd.fp & shadow.defd.fp))
8827 /* (c) would be violated */
8828 goto close_shadow;
8831 /* Combine shadow with summary of current insn: */
8832 shadow.used.i |= sum.used.i;
8833 shadow.used.fp |= sum.used.fp;
8834 shadow.used.mem |= sum.used.mem;
8835 shadow.defd.i |= sum.defd.i;
8836 shadow.defd.fp |= sum.defd.fp;
8837 shadow.defd.mem |= sum.defd.mem;
8839 if ((sum.defd.i & shadow.used.i)
8840 || (sum.defd.fp & shadow.used.fp)
8841 || (sum.defd.mem & shadow.used.mem))
8843 /* (a) would be violated (also takes care of (b)) */
8844 gcc_assert (get_attr_trap (i) != TRAP_YES
8845 || (!(sum.defd.i & sum.used.i)
8846 && !(sum.defd.fp & sum.used.fp)));
8848 goto close_shadow;
8850 break;
8852 case BARRIER:
8853 /* __builtin_unreachable can expand to no code at all,
8854 leaving (barrier) RTXes in the instruction stream. */
8855 goto close_shadow_notrapb;
8857 case JUMP_INSN:
8858 case CALL_INSN:
8859 case CODE_LABEL:
8860 goto close_shadow;
8862 case DEBUG_INSN:
8863 break;
8865 default:
8866 gcc_unreachable ();
8869 else
8871 close_shadow:
8872 n = emit_insn_before (gen_trapb (), i);
8873 PUT_MODE (n, TImode);
8874 PUT_MODE (i, TImode);
8875 close_shadow_notrapb:
8876 trap_pending = 0;
8877 shadow.used.i = 0;
8878 shadow.used.fp = 0;
8879 shadow.used.mem = 0;
8880 shadow.defd = shadow.used;
8885 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8886 && NONJUMP_INSN_P (i)
8887 && GET_CODE (PATTERN (i)) != USE
8888 && GET_CODE (PATTERN (i)) != CLOBBER
8889 && get_attr_trap (i) == TRAP_YES)
8891 if (optimize && !trap_pending)
8892 summarize_insn (PATTERN (i), &shadow, 0);
8893 trap_pending = 1;
8898 /* Alpha can only issue instruction groups simultaneously if they are
8899 suitably aligned. This is very processor-specific. */
8900 /* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8901 that are marked "fake". These instructions do not exist on that target,
8902 but it is possible to see these insns with deranged combinations of
8903 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting,
8904 choose a result at random. */
8906 enum alphaev4_pipe {
8907 EV4_STOP = 0,
8908 EV4_IB0 = 1,
8909 EV4_IB1 = 2,
8910 EV4_IBX = 4
8913 enum alphaev5_pipe {
8914 EV5_STOP = 0,
8915 EV5_NONE = 1,
8916 EV5_E01 = 2,
8917 EV5_E0 = 4,
8918 EV5_E1 = 8,
8919 EV5_FAM = 16,
8920 EV5_FA = 32,
8921 EV5_FM = 64
8924 static enum alphaev4_pipe
8925 alphaev4_insn_pipe (rtx_insn *insn)
8927 if (recog_memoized (insn) < 0)
8928 return EV4_STOP;
8929 if (get_attr_length (insn) != 4)
8930 return EV4_STOP;
8932 switch (get_attr_type (insn))
8934 case TYPE_ILD:
8935 case TYPE_LDSYM:
8936 case TYPE_FLD:
8937 case TYPE_LD_L:
8938 return EV4_IBX;
8940 case TYPE_IADD:
8941 case TYPE_ILOG:
8942 case TYPE_ICMOV:
8943 case TYPE_ICMP:
8944 case TYPE_FST:
8945 case TYPE_SHIFT:
8946 case TYPE_IMUL:
8947 case TYPE_FBR:
8948 case TYPE_MVI: /* fake */
8949 return EV4_IB0;
8951 case TYPE_IST:
8952 case TYPE_MISC:
8953 case TYPE_IBR:
8954 case TYPE_JSR:
8955 case TYPE_CALLPAL:
8956 case TYPE_FCPYS:
8957 case TYPE_FCMOV:
8958 case TYPE_FADD:
8959 case TYPE_FDIV:
8960 case TYPE_FMUL:
8961 case TYPE_ST_C:
8962 case TYPE_MB:
8963 case TYPE_FSQRT: /* fake */
8964 case TYPE_FTOI: /* fake */
8965 case TYPE_ITOF: /* fake */
8966 return EV4_IB1;
8968 default:
8969 gcc_unreachable ();
8973 static enum alphaev5_pipe
8974 alphaev5_insn_pipe (rtx_insn *insn)
8976 if (recog_memoized (insn) < 0)
8977 return EV5_STOP;
8978 if (get_attr_length (insn) != 4)
8979 return EV5_STOP;
8981 switch (get_attr_type (insn))
8983 case TYPE_ILD:
8984 case TYPE_FLD:
8985 case TYPE_LDSYM:
8986 case TYPE_IADD:
8987 case TYPE_ILOG:
8988 case TYPE_ICMOV:
8989 case TYPE_ICMP:
8990 return EV5_E01;
8992 case TYPE_IST:
8993 case TYPE_FST:
8994 case TYPE_SHIFT:
8995 case TYPE_IMUL:
8996 case TYPE_MISC:
8997 case TYPE_MVI:
8998 case TYPE_LD_L:
8999 case TYPE_ST_C:
9000 case TYPE_MB:
9001 case TYPE_FTOI: /* fake */
9002 case TYPE_ITOF: /* fake */
9003 return EV5_E0;
9005 case TYPE_IBR:
9006 case TYPE_JSR:
9007 case TYPE_CALLPAL:
9008 return EV5_E1;
9010 case TYPE_FCPYS:
9011 return EV5_FAM;
9013 case TYPE_FBR:
9014 case TYPE_FCMOV:
9015 case TYPE_FADD:
9016 case TYPE_FDIV:
9017 case TYPE_FSQRT: /* fake */
9018 return EV5_FA;
9020 case TYPE_FMUL:
9021 return EV5_FM;
9023 default:
9024 gcc_unreachable ();
9028 /* IN_USE is a mask of the slots currently filled within the insn group.
9029 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then
9030 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
9032 LEN is, of course, the length of the group in bytes. */
9034 static rtx_insn *
9035 alphaev4_next_group (rtx_insn *insn, int *pin_use, int *plen)
9037 int len, in_use;
9039 len = in_use = 0;
9041 if (! INSN_P (insn)
9042 || GET_CODE (PATTERN (insn)) == CLOBBER
9043 || GET_CODE (PATTERN (insn)) == USE)
9044 goto next_and_done;
9046 while (1)
9048 enum alphaev4_pipe pipe;
9050 pipe = alphaev4_insn_pipe (insn);
9051 switch (pipe)
9053 case EV4_STOP:
9054 /* Force complex instructions to start new groups. */
9055 if (in_use)
9056 goto done;
9058 /* If this is a completely unrecognized insn, it's an asm.
9059 We don't know how long it is, so record length as -1 to
9060 signal a needed realignment. */
9061 if (recog_memoized (insn) < 0)
9062 len = -1;
9063 else
9064 len = get_attr_length (insn);
9065 goto next_and_done;
9067 case EV4_IBX:
9068 if (in_use & EV4_IB0)
9070 if (in_use & EV4_IB1)
9071 goto done;
9072 in_use |= EV4_IB1;
9074 else
9075 in_use |= EV4_IB0 | EV4_IBX;
9076 break;
9078 case EV4_IB0:
9079 if (in_use & EV4_IB0)
9081 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
9082 goto done;
9083 in_use |= EV4_IB1;
9085 in_use |= EV4_IB0;
9086 break;
9088 case EV4_IB1:
9089 if (in_use & EV4_IB1)
9090 goto done;
9091 in_use |= EV4_IB1;
9092 break;
9094 default:
9095 gcc_unreachable ();
9097 len += 4;
9099 /* Haifa doesn't do well scheduling branches. */
9100 if (JUMP_P (insn))
9101 goto next_and_done;
9103 next:
9104 insn = next_nonnote_insn (insn);
9106 if (!insn || ! INSN_P (insn))
9107 goto done;
9109 /* Let Haifa tell us where it thinks insn group boundaries are. */
9110 if (GET_MODE (insn) == TImode)
9111 goto done;
9113 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9114 goto next;
9117 next_and_done:
9118 insn = next_nonnote_insn (insn);
9120 done:
9121 *plen = len;
9122 *pin_use = in_use;
9123 return insn;
9126 /* IN_USE is a mask of the slots currently filled within the insn group.
9127 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then
9128 the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
9130 LEN is, of course, the length of the group in bytes. */
9132 static rtx_insn *
9133 alphaev5_next_group (rtx_insn *insn, int *pin_use, int *plen)
9135 int len, in_use;
9137 len = in_use = 0;
9139 if (! INSN_P (insn)
9140 || GET_CODE (PATTERN (insn)) == CLOBBER
9141 || GET_CODE (PATTERN (insn)) == USE)
9142 goto next_and_done;
9144 while (1)
9146 enum alphaev5_pipe pipe;
9148 pipe = alphaev5_insn_pipe (insn);
9149 switch (pipe)
9151 case EV5_STOP:
9152 /* Force complex instructions to start new groups. */
9153 if (in_use)
9154 goto done;
9156 /* If this is a completely unrecognized insn, it's an asm.
9157 We don't know how long it is, so record length as -1 to
9158 signal a needed realignment. */
9159 if (recog_memoized (insn) < 0)
9160 len = -1;
9161 else
9162 len = get_attr_length (insn);
9163 goto next_and_done;
9165 /* ??? Most of the places below, we would like to assert never
9166 happen, as it would indicate an error either in Haifa, or
9167 in the scheduling description. Unfortunately, Haifa never
9168 schedules the last instruction of the BB, so we don't have
9169 an accurate TI bit to go off. */
9170 case EV5_E01:
9171 if (in_use & EV5_E0)
9173 if (in_use & EV5_E1)
9174 goto done;
9175 in_use |= EV5_E1;
9177 else
9178 in_use |= EV5_E0 | EV5_E01;
9179 break;
9181 case EV5_E0:
9182 if (in_use & EV5_E0)
9184 if (!(in_use & EV5_E01) || (in_use & EV5_E1))
9185 goto done;
9186 in_use |= EV5_E1;
9188 in_use |= EV5_E0;
9189 break;
9191 case EV5_E1:
9192 if (in_use & EV5_E1)
9193 goto done;
9194 in_use |= EV5_E1;
9195 break;
9197 case EV5_FAM:
9198 if (in_use & EV5_FA)
9200 if (in_use & EV5_FM)
9201 goto done;
9202 in_use |= EV5_FM;
9204 else
9205 in_use |= EV5_FA | EV5_FAM;
9206 break;
9208 case EV5_FA:
9209 if (in_use & EV5_FA)
9210 goto done;
9211 in_use |= EV5_FA;
9212 break;
9214 case EV5_FM:
9215 if (in_use & EV5_FM)
9216 goto done;
9217 in_use |= EV5_FM;
9218 break;
9220 case EV5_NONE:
9221 break;
9223 default:
9224 gcc_unreachable ();
9226 len += 4;
9228 /* Haifa doesn't do well scheduling branches. */
9229 /* ??? If this is predicted not-taken, slotting continues, except
9230 that no more IBR, FBR, or JSR insns may be slotted. */
9231 if (JUMP_P (insn))
9232 goto next_and_done;
9234 next:
9235 insn = next_nonnote_insn (insn);
9237 if (!insn || ! INSN_P (insn))
9238 goto done;
9240 /* Let Haifa tell us where it thinks insn group boundaries are. */
9241 if (GET_MODE (insn) == TImode)
9242 goto done;
9244 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9245 goto next;
9248 next_and_done:
9249 insn = next_nonnote_insn (insn);
9251 done:
9252 *plen = len;
9253 *pin_use = in_use;
9254 return insn;
9257 static rtx
9258 alphaev4_next_nop (int *pin_use)
9260 int in_use = *pin_use;
9261 rtx nop;
9263 if (!(in_use & EV4_IB0))
9265 in_use |= EV4_IB0;
9266 nop = gen_nop ();
9268 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9270 in_use |= EV4_IB1;
9271 nop = gen_nop ();
9273 else if (TARGET_FP && !(in_use & EV4_IB1))
9275 in_use |= EV4_IB1;
9276 nop = gen_fnop ();
9278 else
9279 nop = gen_unop ();
9281 *pin_use = in_use;
9282 return nop;
9285 static rtx
9286 alphaev5_next_nop (int *pin_use)
9288 int in_use = *pin_use;
9289 rtx nop;
9291 if (!(in_use & EV5_E1))
9293 in_use |= EV5_E1;
9294 nop = gen_nop ();
9296 else if (TARGET_FP && !(in_use & EV5_FA))
9298 in_use |= EV5_FA;
9299 nop = gen_fnop ();
9301 else if (TARGET_FP && !(in_use & EV5_FM))
9303 in_use |= EV5_FM;
9304 nop = gen_fnop ();
9306 else
9307 nop = gen_unop ();
9309 *pin_use = in_use;
9310 return nop;
9313 /* The instruction group alignment main loop. */
9315 static void
9316 alpha_align_insns_1 (unsigned int max_align,
9317 rtx_insn *(*next_group) (rtx_insn *, int *, int *),
9318 rtx (*next_nop) (int *))
9320 /* ALIGN is the known alignment for the insn group. */
9321 unsigned int align;
9322 /* OFS is the offset of the current insn in the insn group. */
9323 int ofs;
9324 int prev_in_use, in_use, len, ldgp;
9325 rtx_insn *i, *next;
9327 /* Let shorten branches care for assigning alignments to code labels. */
9328 shorten_branches (get_insns ());
9330 unsigned int option_alignment = align_functions.levels[0].get_value ();
9331 if (option_alignment < 4)
9332 align = 4;
9333 else if ((unsigned int) option_alignment < max_align)
9334 align = option_alignment;
9335 else
9336 align = max_align;
9338 ofs = prev_in_use = 0;
9339 i = get_insns ();
9340 if (NOTE_P (i))
9341 i = next_nonnote_insn (i);
9343 ldgp = alpha_function_needs_gp ? 8 : 0;
9345 while (i)
9347 next = (*next_group) (i, &in_use, &len);
9349 /* When we see a label, resync alignment etc. */
9350 if (LABEL_P (i))
9352 unsigned int new_align
9353 = label_to_alignment (i).levels[0].get_value ();
9355 if (new_align >= align)
9357 align = new_align < max_align ? new_align : max_align;
9358 ofs = 0;
9361 else if (ofs & (new_align-1))
9362 ofs = (ofs | (new_align-1)) + 1;
9363 gcc_assert (!len);
9366 /* Handle complex instructions special. */
9367 else if (in_use == 0)
9369 /* Asms will have length < 0. This is a signal that we have
9370 lost alignment knowledge. Assume, however, that the asm
9371 will not mis-align instructions. */
9372 if (len < 0)
9374 ofs = 0;
9375 align = 4;
9376 len = 0;
9380 /* If the known alignment is smaller than the recognized insn group,
9381 realign the output. */
9382 else if ((int) align < len)
9384 unsigned int new_log_align = len > 8 ? 4 : 3;
9385 rtx_insn *prev, *where;
9387 where = prev = prev_nonnote_insn (i);
9388 if (!where || !LABEL_P (where))
9389 where = i;
9391 /* Can't realign between a call and its gp reload. */
9392 if (! (TARGET_EXPLICIT_RELOCS
9393 && prev && CALL_P (prev)))
9395 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9396 align = 1 << new_log_align;
9397 ofs = 0;
9401 /* We may not insert padding inside the initial ldgp sequence. */
9402 else if (ldgp > 0)
9403 ldgp -= len;
9405 /* If the group won't fit in the same INT16 as the previous,
9406 we need to add padding to keep the group together. Rather
9407 than simply leaving the insn filling to the assembler, we
9408 can make use of the knowledge of what sorts of instructions
9409 were issued in the previous group to make sure that all of
9410 the added nops are really free. */
9411 else if (ofs + len > (int) align)
9413 int nop_count = (align - ofs) / 4;
9414 rtx_insn *where;
9416 /* Insert nops before labels, branches, and calls to truly merge
9417 the execution of the nops with the previous instruction group. */
9418 where = prev_nonnote_insn (i);
9419 if (where)
9421 if (LABEL_P (where))
9423 rtx_insn *where2 = prev_nonnote_insn (where);
9424 if (where2 && JUMP_P (where2))
9425 where = where2;
9427 else if (NONJUMP_INSN_P (where))
9428 where = i;
9430 else
9431 where = i;
9434 emit_insn_before ((*next_nop)(&prev_in_use), where);
9435 while (--nop_count);
9436 ofs = 0;
9439 ofs = (ofs + len) & (align - 1);
9440 prev_in_use = in_use;
9441 i = next;
9445 static void
9446 alpha_align_insns (void)
9448 if (alpha_tune == PROCESSOR_EV4)
9449 alpha_align_insns_1 (8, alphaev4_next_group, alphaev4_next_nop);
9450 else if (alpha_tune == PROCESSOR_EV5)
9451 alpha_align_insns_1 (16, alphaev5_next_group, alphaev5_next_nop);
9452 else
9453 gcc_unreachable ();
9456 /* Insert an unop between sibcall or noreturn function call and GP load. */
9458 static void
9459 alpha_pad_function_end (void)
9461 rtx_insn *insn, *next;
9463 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9465 if (!CALL_P (insn)
9466 || !(SIBLING_CALL_P (insn)
9467 || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
9468 continue;
9470 next = next_active_insn (insn);
9471 if (next)
9473 rtx pat = PATTERN (next);
9475 if (GET_CODE (pat) == SET
9476 && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
9477 && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
9478 emit_insn_after (gen_unop (), insn);
9483 /* Machine dependent reorg pass. */
9485 static void
9486 alpha_reorg (void)
9488 /* Workaround for a linker error that triggers when an exception
9489 handler immediatelly follows a sibcall or a noreturn function.
9491 In the sibcall case:
9493 The instruction stream from an object file:
9495 1d8: 00 00 fb 6b jmp (t12)
9496 1dc: 00 00 ba 27 ldah gp,0(ra)
9497 1e0: 00 00 bd 23 lda gp,0(gp)
9498 1e4: 00 00 7d a7 ldq t12,0(gp)
9499 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec>
9501 was converted in the final link pass to:
9503 12003aa88: 67 fa ff c3 br 120039428 <...>
9504 12003aa8c: 00 00 fe 2f unop
9505 12003aa90: 00 00 fe 2f unop
9506 12003aa94: 48 83 7d a7 ldq t12,-31928(gp)
9507 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec>
9509 And in the noreturn case:
9511 The instruction stream from an object file:
9513 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58>
9514 58: 00 00 ba 27 ldah gp,0(ra)
9515 5c: 00 00 bd 23 lda gp,0(gp)
9516 60: 00 00 7d a7 ldq t12,0(gp)
9517 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68>
9519 was converted in the final link pass to:
9521 fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8>
9522 fdb28: 00 00 fe 2f unop
9523 fdb2c: 00 00 fe 2f unop
9524 fdb30: 30 82 7d a7 ldq t12,-32208(gp)
9525 fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68>
9527 GP load instructions were wrongly cleared by the linker relaxation
9528 pass. This workaround prevents removal of GP loads by inserting
9529 an unop instruction between a sibcall or noreturn function call and
9530 exception handler prologue. */
9532 if (current_function_has_exception_handlers ())
9533 alpha_pad_function_end ();
9535 /* CALL_PAL that implements trap insn, updates program counter to point
9536 after the insn. In case trap is the last insn in the function,
9537 emit NOP to guarantee that PC remains inside function boundaries.
9538 This workaround is needed to get reliable backtraces. */
9540 rtx_insn *insn = prev_active_insn (get_last_insn ());
9542 if (insn && NONJUMP_INSN_P (insn))
9544 rtx pat = PATTERN (insn);
9545 if (GET_CODE (pat) == PARALLEL)
9547 rtx vec = XVECEXP (pat, 0, 0);
9548 if (GET_CODE (vec) == TRAP_IF
9549 && XEXP (vec, 0) == const1_rtx)
9550 emit_insn_after (gen_unop (), insn);
9555 static void
9556 alpha_file_start (void)
9558 default_file_start ();
9560 fputs ("\t.set noreorder\n", asm_out_file);
9561 fputs ("\t.set volatile\n", asm_out_file);
9562 if (TARGET_ABI_OSF)
9563 fputs ("\t.set noat\n", asm_out_file);
9564 if (TARGET_EXPLICIT_RELOCS)
9565 fputs ("\t.set nomacro\n", asm_out_file);
9566 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9568 const char *arch;
9570 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9571 arch = "ev6";
9572 else if (TARGET_MAX)
9573 arch = "pca56";
9574 else if (TARGET_BWX)
9575 arch = "ev56";
9576 else if (alpha_cpu == PROCESSOR_EV5)
9577 arch = "ev5";
9578 else
9579 arch = "ev4";
9581 fprintf (asm_out_file, "\t.arch %s\n", arch);
9585 /* Since we don't have a .dynbss section, we should not allow global
9586 relocations in the .rodata section. */
9588 static int
9589 alpha_elf_reloc_rw_mask (void)
9591 return flag_pic ? 3 : 2;
9594 /* Return a section for X. The only special thing we do here is to
9595 honor small data. */
9597 static section *
9598 alpha_elf_select_rtx_section (machine_mode mode, rtx x,
9599 unsigned HOST_WIDE_INT align)
9601 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9602 /* ??? Consider using mergeable sdata sections. */
9603 return sdata_section;
9604 else
9605 return default_elf_select_rtx_section (mode, x, align);
9608 static unsigned int
9609 alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9611 unsigned int flags = 0;
9613 if (strcmp (name, ".sdata") == 0
9614 || startswith (name, ".sdata.")
9615 || startswith (name, ".gnu.linkonce.s.")
9616 || strcmp (name, ".sbss") == 0
9617 || startswith (name, ".sbss.")
9618 || startswith (name, ".gnu.linkonce.sb."))
9619 flags = SECTION_SMALL;
9621 flags |= default_section_type_flags (decl, name, reloc);
9622 return flags;
9625 /* Structure to collect function names for final output in link section. */
9626 /* Note that items marked with GTY can't be ifdef'ed out. */
9628 enum reloc_kind
9630 KIND_LINKAGE,
9631 KIND_CODEADDR
9634 struct GTY(()) alpha_links
9636 rtx func;
9637 rtx linkage;
9638 enum reloc_kind rkind;
9641 #if TARGET_ABI_OPEN_VMS
9643 /* Return the VMS argument type corresponding to MODE. */
9645 enum avms_arg_type
9646 alpha_arg_type (machine_mode mode)
9648 switch (mode)
9650 case E_SFmode:
9651 return TARGET_FLOAT_VAX ? FF : FS;
9652 case E_DFmode:
9653 return TARGET_FLOAT_VAX ? FD : FT;
9654 default:
9655 return I64;
9659 /* Return an rtx for an integer representing the VMS Argument Information
9660 register value. */
9663 alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9665 unsigned HOST_WIDE_INT regval = cum.num_args;
9666 int i;
9668 for (i = 0; i < 6; i++)
9669 regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9671 return GEN_INT (regval);
9675 /* Return a SYMBOL_REF representing the reference to the .linkage entry
9676 of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if
9677 this is the reference to the linkage pointer value, 0 if this is the
9678 reference to the function entry value. RFLAG is 1 if this a reduced
9679 reference (code address only), 0 if this is a full reference. */
9682 alpha_use_linkage (rtx func, bool lflag, bool rflag)
9684 struct alpha_links *al = NULL;
9685 const char *name = XSTR (func, 0);
9687 if (cfun->machine->links)
9689 /* Is this name already defined? */
9690 alpha_links **slot = cfun->machine->links->get (name);
9691 if (slot)
9692 al = *slot;
9694 else
9695 cfun->machine->links
9696 = hash_map<nofree_string_hash, alpha_links *>::create_ggc (64);
9698 if (al == NULL)
9700 size_t buf_len;
9701 char *linksym;
9702 tree id;
9704 if (name[0] == '*')
9705 name++;
9707 /* Follow transparent alias, as this is used for CRTL translations. */
9708 id = maybe_get_identifier (name);
9709 if (id)
9711 while (IDENTIFIER_TRANSPARENT_ALIAS (id))
9712 id = TREE_CHAIN (id);
9713 name = IDENTIFIER_POINTER (id);
9716 buf_len = strlen (name) + 8 + 9;
9717 linksym = (char *) alloca (buf_len);
9718 snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
9720 al = ggc_alloc<alpha_links> ();
9721 al->func = func;
9722 al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
9724 cfun->machine->links->put (ggc_strdup (name), al);
9727 al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
9729 if (lflag)
9730 return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8));
9731 else
9732 return al->linkage;
9735 static int
9736 alpha_write_one_linkage (const char *name, alpha_links *link, FILE *stream)
9738 ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
9739 if (link->rkind == KIND_CODEADDR)
9741 /* External and used, request code address. */
9742 fprintf (stream, "\t.code_address ");
9744 else
9746 if (!SYMBOL_REF_EXTERNAL_P (link->func)
9747 && SYMBOL_REF_LOCAL_P (link->func))
9749 /* Locally defined, build linkage pair. */
9750 fprintf (stream, "\t.quad %s..en\n", name);
9751 fprintf (stream, "\t.quad ");
9753 else
9755 /* External, request linkage pair. */
9756 fprintf (stream, "\t.linkage ");
9759 assemble_name (stream, name);
9760 fputs ("\n", stream);
9762 return 0;
9765 static void
9766 alpha_write_linkage (FILE *stream, const char *funname)
9768 fprintf (stream, "\t.link\n");
9769 fprintf (stream, "\t.align 3\n");
9770 in_section = NULL;
9772 #ifdef TARGET_VMS_CRASH_DEBUG
9773 fputs ("\t.name ", stream);
9774 assemble_name (stream, funname);
9775 fputs ("..na\n", stream);
9776 #endif
9778 ASM_OUTPUT_LABEL (stream, funname);
9779 fprintf (stream, "\t.pdesc ");
9780 assemble_name (stream, funname);
9781 fprintf (stream, "..en,%s\n",
9782 alpha_procedure_type == PT_STACK ? "stack"
9783 : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9785 if (cfun->machine->links)
9787 hash_map<nofree_string_hash, alpha_links *>::iterator iter
9788 = cfun->machine->links->begin ();
9789 for (; iter != cfun->machine->links->end (); ++iter)
9790 alpha_write_one_linkage ((*iter).first, (*iter).second, stream);
9794 /* Switch to an arbitrary section NAME with attributes as specified
9795 by FLAGS. ALIGN specifies any known alignment requirements for
9796 the section; 0 if the default should be used. */
9798 static void
9799 vms_asm_named_section (const char *name, unsigned int flags,
9800 tree decl ATTRIBUTE_UNUSED)
9802 fputc ('\n', asm_out_file);
9803 fprintf (asm_out_file, ".section\t%s", name);
9805 if (flags & SECTION_DEBUG)
9806 fprintf (asm_out_file, ",NOWRT");
9808 fputc ('\n', asm_out_file);
9811 /* Record an element in the table of global constructors. SYMBOL is
9812 a SYMBOL_REF of the function to be called; PRIORITY is a number
9813 between 0 and MAX_INIT_PRIORITY.
9815 Differs from default_ctors_section_asm_out_constructor in that the
9816 width of the .ctors entry is always 64 bits, rather than the 32 bits
9817 used by a normal pointer. */
9819 static void
9820 vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9822 switch_to_section (ctors_section);
9823 assemble_align (BITS_PER_WORD);
9824 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9827 static void
9828 vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9830 switch_to_section (dtors_section);
9831 assemble_align (BITS_PER_WORD);
9832 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9834 #else
9836 alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
9837 bool lflag ATTRIBUTE_UNUSED,
9838 bool rflag ATTRIBUTE_UNUSED)
9840 return NULL_RTX;
9843 #endif /* TARGET_ABI_OPEN_VMS */
9845 static void
9846 alpha_init_libfuncs (void)
9848 if (TARGET_ABI_OPEN_VMS)
9850 /* Use the VMS runtime library functions for division and
9851 remainder. */
9852 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9853 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9854 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9855 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9856 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9857 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9858 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9859 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9860 #ifdef MEM_LIBFUNCS_INIT
9861 MEM_LIBFUNCS_INIT;
9862 #endif
9866 /* On the Alpha, we use this to disable the floating-point registers
9867 when they don't exist. */
9869 static void
9870 alpha_conditional_register_usage (void)
9872 int i;
9873 if (! TARGET_FPREGS)
9874 for (i = 32; i < 63; i++)
9875 fixed_regs[i] = call_used_regs[i] = 1;
9878 /* Canonicalize a comparison from one we don't have to one we do have. */
9880 static void
9881 alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
9882 bool op0_preserve_value)
9884 if (!op0_preserve_value
9885 && (*code == GE || *code == GT || *code == GEU || *code == GTU)
9886 && (REG_P (*op1) || *op1 == const0_rtx))
9888 std::swap (*op0, *op1);
9889 *code = (int)swap_condition ((enum rtx_code)*code);
9892 if ((*code == LT || *code == LTU)
9893 && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
9895 *code = *code == LT ? LE : LEU;
9896 *op1 = GEN_INT (255);
9900 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
9902 static void
9903 alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
9905 const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
9907 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
9908 tree new_fenv_var, reload_fenv, restore_fnenv;
9909 tree update_call, atomic_feraiseexcept, hold_fnclex;
9911 /* Assume OSF/1 compatible interfaces. */
9912 if (!TARGET_ABI_OSF)
9913 return;
9915 /* Generate the equivalent of :
9916 unsigned long fenv_var;
9917 fenv_var = __ieee_get_fp_control ();
9919 unsigned long masked_fenv;
9920 masked_fenv = fenv_var & mask;
9922 __ieee_set_fp_control (masked_fenv); */
9924 fenv_var = create_tmp_var_raw (long_unsigned_type_node);
9925 get_fpscr
9926 = build_fn_decl ("__ieee_get_fp_control",
9927 build_function_type_list (long_unsigned_type_node, NULL));
9928 set_fpscr
9929 = build_fn_decl ("__ieee_set_fp_control",
9930 build_function_type_list (void_type_node, NULL));
9931 mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
9932 ld_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, fenv_var,
9933 build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE);
9934 masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
9935 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
9936 *hold = build2 (COMPOUND_EXPR, void_type_node,
9937 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
9938 hold_fnclex);
9940 /* Store the value of masked_fenv to clear the exceptions:
9941 __ieee_set_fp_control (masked_fenv); */
9943 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
9945 /* Generate the equivalent of :
9946 unsigned long new_fenv_var;
9947 new_fenv_var = __ieee_get_fp_control ();
9949 __ieee_set_fp_control (fenv_var);
9951 __atomic_feraiseexcept (new_fenv_var); */
9953 new_fenv_var = create_tmp_var_raw (long_unsigned_type_node);
9954 reload_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, new_fenv_var,
9955 build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE);
9956 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
9957 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
9958 update_call
9959 = build_call_expr (atomic_feraiseexcept, 1,
9960 fold_convert (integer_type_node, new_fenv_var));
9961 *update = build2 (COMPOUND_EXPR, void_type_node,
9962 build2 (COMPOUND_EXPR, void_type_node,
9963 reload_fenv, restore_fnenv), update_call);
9966 /* Implement TARGET_HARD_REGNO_MODE_OK. On Alpha, the integer registers
9967 can hold any mode. The floating-point registers can hold 64-bit
9968 integers as well, but not smaller values. */
9970 static bool
9971 alpha_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
9973 if (IN_RANGE (regno, 32, 62))
9974 return (mode == SFmode
9975 || mode == DFmode
9976 || mode == DImode
9977 || mode == SCmode
9978 || mode == DCmode);
9979 return true;
9982 /* Implement TARGET_MODES_TIEABLE_P. This asymmetric test is true when
9983 MODE1 could be put in an FP register but MODE2 could not. */
9985 static bool
9986 alpha_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9988 return (alpha_hard_regno_mode_ok (32, mode1)
9989 ? alpha_hard_regno_mode_ok (32, mode2)
9990 : true);
9993 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
9995 static bool
9996 alpha_can_change_mode_class (machine_mode from, machine_mode to,
9997 reg_class_t rclass)
9999 return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
10000 || !reg_classes_intersect_p (FLOAT_REGS, rclass));
10003 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return TFmode or DFmode
10004 for TI_LONG_DOUBLE_TYPE which is for long double type, go with the
10005 default one for the others. */
10007 static machine_mode
10008 alpha_c_mode_for_floating_type (enum tree_index ti)
10010 if (ti == TI_LONG_DOUBLE_TYPE)
10011 return TARGET_LONG_DOUBLE_128 ? TFmode : DFmode;
10012 return default_mode_for_floating_type (ti);
10015 /* Initialize the GCC target structure. */
10016 #if TARGET_ABI_OPEN_VMS
10017 # undef TARGET_ATTRIBUTE_TABLE
10018 # define TARGET_ATTRIBUTE_TABLE vms_attribute_table
10019 # undef TARGET_CAN_ELIMINATE
10020 # define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
10021 #endif
10023 #undef TARGET_IN_SMALL_DATA_P
10024 #define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
10026 #undef TARGET_ASM_ALIGNED_HI_OP
10027 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
10028 #undef TARGET_ASM_ALIGNED_DI_OP
10029 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
10031 /* Default unaligned ops are provided for ELF systems. To get unaligned
10032 data for non-ELF systems, we have to turn off auto alignment. */
10033 #if TARGET_ABI_OPEN_VMS
10034 #undef TARGET_ASM_UNALIGNED_HI_OP
10035 #define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
10036 #undef TARGET_ASM_UNALIGNED_SI_OP
10037 #define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
10038 #undef TARGET_ASM_UNALIGNED_DI_OP
10039 #define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
10040 #endif
10042 #undef TARGET_ASM_RELOC_RW_MASK
10043 #define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask
10044 #undef TARGET_ASM_SELECT_RTX_SECTION
10045 #define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section
10046 #undef TARGET_SECTION_TYPE_FLAGS
10047 #define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags
10049 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
10050 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
10052 #undef TARGET_INIT_LIBFUNCS
10053 #define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
10055 #undef TARGET_LEGITIMIZE_ADDRESS
10056 #define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
10057 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
10058 #define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p
10060 #undef TARGET_ASM_FILE_START
10061 #define TARGET_ASM_FILE_START alpha_file_start
10063 #undef TARGET_SCHED_ADJUST_COST
10064 #define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
10065 #undef TARGET_SCHED_ISSUE_RATE
10066 #define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
10067 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
10068 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
10069 alpha_multipass_dfa_lookahead
10071 #undef TARGET_HAVE_TLS
10072 #define TARGET_HAVE_TLS HAVE_AS_TLS
10074 #undef TARGET_BUILTIN_DECL
10075 #define TARGET_BUILTIN_DECL alpha_builtin_decl
10076 #undef TARGET_INIT_BUILTINS
10077 #define TARGET_INIT_BUILTINS alpha_init_builtins
10078 #undef TARGET_EXPAND_BUILTIN
10079 #define TARGET_EXPAND_BUILTIN alpha_expand_builtin
10080 #undef TARGET_FOLD_BUILTIN
10081 #define TARGET_FOLD_BUILTIN alpha_fold_builtin
10082 #undef TARGET_GIMPLE_FOLD_BUILTIN
10083 #define TARGET_GIMPLE_FOLD_BUILTIN alpha_gimple_fold_builtin
10085 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
10086 #define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
10087 #undef TARGET_CANNOT_COPY_INSN_P
10088 #define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
10089 #undef TARGET_LEGITIMATE_CONSTANT_P
10090 #define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
10091 #undef TARGET_CANNOT_FORCE_CONST_MEM
10092 #define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
10094 #if TARGET_ABI_OSF
10095 #undef TARGET_ASM_OUTPUT_MI_THUNK
10096 #define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
10097 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10098 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
10099 #undef TARGET_STDARG_OPTIMIZE_HOOK
10100 #define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
10101 #endif
10103 #undef TARGET_PRINT_OPERAND
10104 #define TARGET_PRINT_OPERAND alpha_print_operand
10105 #undef TARGET_PRINT_OPERAND_ADDRESS
10106 #define TARGET_PRINT_OPERAND_ADDRESS alpha_print_operand_address
10107 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
10108 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P alpha_print_operand_punct_valid_p
10110 /* Use 16-bits anchor. */
10111 #undef TARGET_MIN_ANCHOR_OFFSET
10112 #define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
10113 #undef TARGET_MAX_ANCHOR_OFFSET
10114 #define TARGET_MAX_ANCHOR_OFFSET 0x7fff
10115 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10116 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
10118 #undef TARGET_REGISTER_MOVE_COST
10119 #define TARGET_REGISTER_MOVE_COST alpha_register_move_cost
10120 #undef TARGET_MEMORY_MOVE_COST
10121 #define TARGET_MEMORY_MOVE_COST alpha_memory_move_cost
10122 #undef TARGET_RTX_COSTS
10123 #define TARGET_RTX_COSTS alpha_rtx_costs
10124 #undef TARGET_ADDRESS_COST
10125 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
10127 #undef TARGET_MACHINE_DEPENDENT_REORG
10128 #define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
10130 #undef TARGET_PROMOTE_FUNCTION_MODE
10131 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
10132 #undef TARGET_PROMOTE_PROTOTYPES
10133 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
10135 #undef TARGET_FUNCTION_VALUE
10136 #define TARGET_FUNCTION_VALUE alpha_function_value
10137 #undef TARGET_LIBCALL_VALUE
10138 #define TARGET_LIBCALL_VALUE alpha_libcall_value
10139 #undef TARGET_FUNCTION_VALUE_REGNO_P
10140 #define TARGET_FUNCTION_VALUE_REGNO_P alpha_function_value_regno_p
10141 #undef TARGET_RETURN_IN_MEMORY
10142 #define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
10143 #undef TARGET_PASS_BY_REFERENCE
10144 #define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
10145 #undef TARGET_SETUP_INCOMING_VARARGS
10146 #define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
10147 #undef TARGET_STRICT_ARGUMENT_NAMING
10148 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10149 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
10150 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
10151 #undef TARGET_SPLIT_COMPLEX_ARG
10152 #define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
10153 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
10154 #define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
10155 #undef TARGET_ARG_PARTIAL_BYTES
10156 #define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
10157 #undef TARGET_FUNCTION_ARG
10158 #define TARGET_FUNCTION_ARG alpha_function_arg
10159 #undef TARGET_FUNCTION_ARG_ADVANCE
10160 #define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
10161 #undef TARGET_TRAMPOLINE_INIT
10162 #define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
10164 #undef TARGET_INSTANTIATE_DECLS
10165 #define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
10167 #undef TARGET_SECONDARY_RELOAD
10168 #define TARGET_SECONDARY_RELOAD alpha_secondary_reload
10169 #undef TARGET_SECONDARY_MEMORY_NEEDED
10170 #define TARGET_SECONDARY_MEMORY_NEEDED alpha_secondary_memory_needed
10171 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
10172 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE alpha_secondary_memory_needed_mode
10174 #undef TARGET_SCALAR_MODE_SUPPORTED_P
10175 #define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
10176 #undef TARGET_VECTOR_MODE_SUPPORTED_P
10177 #define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
10179 #undef TARGET_BUILD_BUILTIN_VA_LIST
10180 #define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
10182 #undef TARGET_EXPAND_BUILTIN_VA_START
10183 #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
10185 #undef TARGET_OPTION_OVERRIDE
10186 #define TARGET_OPTION_OVERRIDE alpha_option_override
10188 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
10189 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
10190 alpha_override_options_after_change
10192 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
10193 #undef TARGET_MANGLE_TYPE
10194 #define TARGET_MANGLE_TYPE alpha_mangle_type
10195 #endif
10197 #undef TARGET_LRA_P
10198 #define TARGET_LRA_P hook_bool_void_false
10200 #undef TARGET_LEGITIMATE_ADDRESS_P
10201 #define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
10203 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10204 #define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
10206 #undef TARGET_CANONICALIZE_COMPARISON
10207 #define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
10209 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10210 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
10212 #undef TARGET_HARD_REGNO_MODE_OK
10213 #define TARGET_HARD_REGNO_MODE_OK alpha_hard_regno_mode_ok
10215 #undef TARGET_MODES_TIEABLE_P
10216 #define TARGET_MODES_TIEABLE_P alpha_modes_tieable_p
10218 #undef TARGET_CAN_CHANGE_MODE_CLASS
10219 #define TARGET_CAN_CHANGE_MODE_CLASS alpha_can_change_mode_class
10221 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
10222 #define TARGET_C_MODE_FOR_FLOATING_TYPE alpha_c_mode_for_floating_type
10224 struct gcc_target targetm = TARGET_INITIALIZER;
10227 #include "gt-alpha.h"