gcc/config/aarch64/aarch64-protos.h

   1 /* Machine description for AArch64 architecture.
   2    Copyright (C) 2009-2025 Free Software Foundation, Inc.
   3    Contributed by ARM Ltd.
   4
   5    This file is part of GCC.
   6
   7    GCC is free software; you can redistribute it and/or modify it
   8    under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    GCC is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GCC; see the file COPYING3.  If not see
  19    <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #ifndef GCC_AARCH64_PROTOS_H
  23 #define GCC_AARCH64_PROTOS_H
  24
  25 #include "input.h"
  26 #include "config/arm/aarch-common.h"
  27
  28 /* SYMBOL_SMALL_ABSOLUTE: Generate symbol accesses through
  29    high and lo relocs that calculate the base address using a PC
  30    relative reloc.
  31    So to get the address of foo, we generate
  32    adrp x0, foo
  33    add  x0, x0, :lo12:foo
  34
  35    To load or store something to foo, we could use the corresponding
  36    load store variants that generate an
  37    ldr x0, [x0,:lo12:foo]
  38    or
  39    str x1, [x0, :lo12:foo]
  40
  41    This corresponds to the small code model of the compiler.
  42
  43    SYMBOL_SMALL_GOT_4G: Similar to the one above but this
  44    gives us the GOT entry of the symbol being referred to :
  45    Thus calculating the GOT entry for foo is done using the
  46    following sequence of instructions.  The ADRP instruction
  47    gets us to the page containing the GOT entry of the symbol
  48    and the got_lo12 gets us the actual offset in it, together
  49    the base and offset, we can address 4G size GOT table.
  50
  51    adrp  x0, :got:foo
  52    ldr   x0, [x0, :gotoff_lo12:foo]
  53
  54    This corresponds to the small PIC model of the compiler.
  55
  56    SYMBOL_SMALL_GOT_28K: Similar to SYMBOL_SMALL_GOT_4G, but used for symbol
  57    restricted within 28K GOT table size.
  58
  59    ldr reg, [gp, #:gotpage_lo15:sym]
  60
  61    This corresponds to -fpic model for small memory model of the compiler.
  62
  63    SYMBOL_SMALL_TLSGD
  64    SYMBOL_SMALL_TLSDESC
  65    SYMBOL_SMALL_TLSIE
  66    SYMBOL_TINY_TLSIE
  67    SYMBOL_TLSLE12
  68    SYMBOL_TLSLE24
  69    SYMBOL_TLSLE32
  70    SYMBOL_TLSLE48
  71    Each of these represents a thread-local symbol, and corresponds to the
  72    thread local storage relocation operator for the symbol being referred to.
  73
  74    SYMBOL_TINY_ABSOLUTE
  75
  76    Generate symbol accesses as a PC relative address using a single
  77    instruction.  To compute the address of symbol foo, we generate:
  78
  79    ADR x0, foo
  80
  81    SYMBOL_TINY_GOT
  82
  83    Generate symbol accesses via the GOT using a single PC relative
  84    instruction.  To compute the address of symbol foo, we generate:
  85
  86    ldr t0, :got:foo
  87
  88    The value of foo can subsequently read using:
  89
  90    ldrb    t0, [t0]
  91
  92    SYMBOL_FORCE_TO_MEM : Global variables are addressed using
  93    constant pool.  All variable addresses are spilled into constant
  94    pools.  The constant pools themselves are addressed using PC
  95    relative accesses.  This only works for the large code model.
  96  */
  97 enum aarch64_symbol_type
  98 {
  99   SYMBOL_SMALL_ABSOLUTE,
 100   SYMBOL_SMALL_GOT_28K,
 101   SYMBOL_SMALL_GOT_4G,
 102   SYMBOL_SMALL_TLSGD,
 103   SYMBOL_SMALL_TLSDESC,
 104   SYMBOL_SMALL_TLSIE,
 105   SYMBOL_TINY_ABSOLUTE,
 106   SYMBOL_TINY_GOT,
 107   SYMBOL_TINY_TLSIE,
 108   SYMBOL_TLSLE12,
 109   SYMBOL_TLSLE24,
 110   SYMBOL_TLSLE32,
 111   SYMBOL_TLSLE48,
 112   SYMBOL_FORCE_TO_MEM
 113 };
 114
 115 /* Classifies the type of an address query.
 116
 117    ADDR_QUERY_M
 118       Query what is valid for an "m" constraint and a memory_operand
 119       (the rules are the same for both).
 120
 121    ADDR_QUERY_LDP_STP
 122       Query what is valid for a load/store pair.
 123
 124    ADDR_QUERY_LDP_STP_N
 125       Query what is valid for a load/store pair, but narrow the incoming mode
 126       for address checking.  This is used for the store_pair_lanes patterns.
 127
 128    ADDR_QUERY_ANY
 129       Query what is valid for at least one memory constraint, which may
 130       allow things that "m" doesn't.  For example, the SVE LDR and STR
 131       addressing modes allow a wider range of immediate offsets than "m"
 132       does.  */
 133 enum aarch64_addr_query_type {
 134   ADDR_QUERY_M,
 135   ADDR_QUERY_LDP_STP,
 136   ADDR_QUERY_LDP_STP_N,
 137   ADDR_QUERY_ANY
 138 };
 139
 140 /* Enumerates values that can be arbitrarily mixed into a calculation
 141    in order to make the result of the calculation unique to its use case.
 142
 143    AARCH64_SALT_SSP_SET
 144    AARCH64_SALT_SSP_TEST
 145       Used when calculating the address of the stack protection canary value.
 146       There is a separate value for setting and testing the canary, meaning
 147       that these two operations produce unique addresses: they are different
 148       from each other, and from all other address calculations.
 149
 150       The main purpose of this is to prevent the SET address being spilled
 151       to the stack and reloaded for the TEST, since that would give an
 152       attacker the opportunity to change the address of the expected
 153       canary value.  */
 154 enum aarch64_salt_type {
 155   AARCH64_SALT_SSP_SET,
 156   AARCH64_SALT_SSP_TEST
 157 };
 158
 159 /* A set of tuning parameters contains references to size and time
 160    cost models and vectors for address cost calculations, register
 161    move costs and memory move costs.  */
 162
 163 /* Scaled addressing modes can vary cost depending on the mode of the
 164    value to be loaded/stored.  QImode values cannot use scaled
 165    addressing modes.  */
 166
 167 struct scale_addr_mode_cost
 168 {
 169   const int hi;
 170   const int si;
 171   const int di;
 172   const int ti;
 173 };
 174
 175 /* Additional cost for addresses.  */
 176 struct cpu_addrcost_table
 177 {
 178   const struct scale_addr_mode_cost addr_scale_costs;
 179   const int pre_modify;
 180   const int post_modify;
 181   const int post_modify_ld3_st3;
 182   const int post_modify_ld4_st4;
 183   const int register_offset;
 184   const int register_sextend;
 185   const int register_zextend;
 186   const int imm_offset;
 187 };
 188
 189 /* Additional costs for register copies.  Cost is for one register.  */
 190 struct cpu_regmove_cost
 191 {
 192   const int GP2GP;
 193   const int GP2FP;
 194   const int FP2GP;
 195   const int FP2FP;
 196 };
 197
 198 struct simd_vec_cost
 199 {
 200   /* Cost of any integer vector operation, excluding the ones handled
 201      specially below.  */
 202   const int int_stmt_cost;
 203
 204   /* Cost of any fp vector operation, excluding the ones handled
 205      specially below.  */
 206   const int fp_stmt_cost;
 207
 208   /* Per-vector cost of permuting vectors after an LD2, LD3 or LD4,
 209      as well as the per-vector cost of permuting vectors before
 210      an ST2, ST3 or ST4.  */
 211   const int ld2_st2_permute_cost;
 212   const int ld3_st3_permute_cost;
 213   const int ld4_st4_permute_cost;
 214
 215   /* Cost of a permute operation.  */
 216   const int permute_cost;
 217
 218   /* Cost of reductions for various vector types: iN is for N-bit
 219      integer elements and fN is for N-bit floating-point elements.
 220      We need to single out the element type because it affects the
 221      depth of the reduction.  */
 222   const int reduc_i8_cost;
 223   const int reduc_i16_cost;
 224   const int reduc_i32_cost;
 225   const int reduc_i64_cost;
 226   const int reduc_f16_cost;
 227   const int reduc_f32_cost;
 228   const int reduc_f64_cost;
 229
 230   /* Additional cost of storing a single vector element, on top of the
 231      normal cost of a scalar store.  */
 232   const int store_elt_extra_cost;
 233
 234   /* Cost of a vector-to-scalar operation.  */
 235   const int vec_to_scalar_cost;
 236
 237   /* Cost of a scalar-to-vector operation.  */
 238   const int scalar_to_vec_cost;
 239
 240   /* Cost of an aligned vector load.  */
 241   const int align_load_cost;
 242
 243   /* Cost of an unaligned vector load.  */
 244   const int unalign_load_cost;
 245
 246   /* Cost of an unaligned vector store.  */
 247   const int unalign_store_cost;
 248
 249   /* Cost of a vector store.  */
 250   const int store_cost;
 251 };
 252
 253 typedef struct simd_vec_cost advsimd_vec_cost;
 254
 255 /* SVE-specific extensions to the information provided by simd_vec_cost.  */
 256 struct sve_vec_cost : simd_vec_cost
 257 {
 258   CONSTEXPR sve_vec_cost (const simd_vec_cost &base,
 259                           unsigned int clast_cost,
 260                           unsigned int fadda_f16_cost,
 261                           unsigned int fadda_f32_cost,
 262                           unsigned int fadda_f64_cost,
 263                           unsigned int gather_load_x32_cost,
 264                           unsigned int gather_load_x64_cost,
 265                           unsigned int gather_load_x32_init_cost,
 266                           unsigned int gather_load_x64_init_cost,
 267                           unsigned int scatter_store_elt_cost)
 268     : simd_vec_cost (base),
 269       clast_cost (clast_cost),
 270       fadda_f16_cost (fadda_f16_cost),
 271       fadda_f32_cost (fadda_f32_cost),
 272       fadda_f64_cost (fadda_f64_cost),
 273       gather_load_x32_cost (gather_load_x32_cost),
 274       gather_load_x64_cost (gather_load_x64_cost),
 275       gather_load_x32_init_cost (gather_load_x32_init_cost),
 276       gather_load_x64_init_cost (gather_load_x64_init_cost),
 277       scatter_store_elt_cost (scatter_store_elt_cost)
 278   {}
 279
 280   /* The cost of a vector-to-scalar CLASTA or CLASTB instruction,
 281      with the scalar being stored in FP registers.  This cost is
 282      assumed to be a cycle latency.  */
 283   const int clast_cost;
 284
 285   /* The costs of FADDA for the three data types that it supports.
 286      These costs are assumed to be cycle latencies.  */
 287   const int fadda_f16_cost;
 288   const int fadda_f32_cost;
 289   const int fadda_f64_cost;
 290
 291   /* The cost of a gather load instruction.  The x32 value is for loads
 292      of 32-bit elements and the x64 value is for loads of 64-bit elements.  */
 293   const int gather_load_x32_cost;
 294   const int gather_load_x64_cost;
 295
 296   /* Additional loop initialization cost of using a gather load instruction.  The x32
 297      value is for loads of 32-bit elements and the x64 value is for loads of
 298      64-bit elements.  */
 299   const int gather_load_x32_init_cost;
 300   const int gather_load_x64_init_cost;
 301
 302   /* The per-element cost of a scatter store.  */
 303   const int scatter_store_elt_cost;
 304 };
 305
 306 /* Base information about how the CPU issues code, containing
 307    information that is relevant to scalar, Advanced SIMD and SVE
 308    operations.
 309
 310    The structure uses the general term "operation" to refer to
 311    whichever subdivision of an instruction makes sense for the CPU.
 312    These operations would typically be micro operations or macro
 313    operations.
 314
 315    Note that this structure and the ones derived from it are only
 316    as general as they need to be for the CPUs that currently use them.
 317    They will probably need to be extended or refined as more CPUs are
 318    added.  */
 319 struct aarch64_base_vec_issue_info
 320 {
 321   /* How many loads and stores can be issued per cycle.  */
 322   const unsigned int loads_stores_per_cycle;
 323
 324   /* How many stores can be issued per cycle.  */
 325   const unsigned int stores_per_cycle;
 326
 327   /* How many integer or FP/SIMD operations can be issued per cycle.
 328
 329      Currently we don't try to distinguish the two.  For vector code,
 330      we only really track FP/SIMD operations during vector costing;
 331      we don't for example try to cost arithmetic operations like
 332      address calculations, which are only decided later during ivopts.
 333
 334      For scalar code, we effectively assume that code operates entirely
 335      on integers or entirely on floating-point values.  Again, we don't
 336      try to take address calculations into account.
 337
 338      This is not very precise, but it's only meant to be a heuristic.
 339      We could certainly try to do better in future if there's an example
 340      of something that would benefit.  */
 341   const unsigned int general_ops_per_cycle;
 342
 343   /* How many FP/SIMD operations to count for a floating-point or
 344      vector load operation.
 345
 346      When constructing an Advanced SIMD vector from elements that have
 347      been loaded from memory, these values apply to each individual load.
 348      When using an SVE gather load, the values apply to each element of
 349      the gather.  */
 350   const unsigned int fp_simd_load_general_ops;
 351
 352   /* How many FP/SIMD operations to count for a floating-point or
 353      vector store operation.
 354
 355      When storing individual elements of an Advanced SIMD vector out to
 356      memory, these values apply to each individual store.  When using an
 357      SVE scatter store, these values apply to each element of the scatter.  */
 358   const unsigned int fp_simd_store_general_ops;
 359 };
 360
 361 using aarch64_scalar_vec_issue_info = aarch64_base_vec_issue_info;
 362
 363 /* Base information about the issue stage for vector operations.
 364    This structure contains information that is relevant to both
 365    Advanced SIMD and SVE.  */
 366 struct aarch64_simd_vec_issue_info : aarch64_base_vec_issue_info
 367 {
 368   CONSTEXPR aarch64_simd_vec_issue_info (aarch64_base_vec_issue_info base,
 369                                          unsigned int ld2_st2_general_ops,
 370                                          unsigned int ld3_st3_general_ops,
 371                                          unsigned int ld4_st4_general_ops)
 372     : aarch64_base_vec_issue_info (base),
 373       ld2_st2_general_ops (ld2_st2_general_ops),
 374       ld3_st3_general_ops (ld3_st3_general_ops),
 375       ld4_st4_general_ops (ld4_st4_general_ops)
 376   {}
 377
 378   /* How many FP/SIMD operations to count for each vector loaded or
 379      stored by an LD[234] or ST[234] operation, in addition to the
 380      base costs given in the parent class.  For example, the full
 381      number of operations for an LD3 would be:
 382
 383        load ops:    3
 384        general ops: 3 * (fp_simd_load_general_ops + ld3_st3_general_ops).  */
 385   const unsigned int ld2_st2_general_ops;
 386   const unsigned int ld3_st3_general_ops;
 387   const unsigned int ld4_st4_general_ops;
 388 };
 389
 390 using aarch64_advsimd_vec_issue_info = aarch64_simd_vec_issue_info;
 391
 392 /* Information about the issue stage for SVE.  The main thing this adds
 393    is a concept of "predicate operations".  */
 394 struct aarch64_sve_vec_issue_info : aarch64_simd_vec_issue_info
 395 {
 396   CONSTEXPR aarch64_sve_vec_issue_info
 397     (aarch64_simd_vec_issue_info base,
 398      unsigned int pred_ops_per_cycle,
 399      unsigned int while_pred_ops,
 400      unsigned int int_cmp_pred_ops,
 401      unsigned int fp_cmp_pred_ops,
 402      unsigned int gather_scatter_pair_general_ops,
 403      unsigned int gather_scatter_pair_pred_ops)
 404     : aarch64_simd_vec_issue_info (base),
 405       pred_ops_per_cycle (pred_ops_per_cycle),
 406       while_pred_ops (while_pred_ops),
 407       int_cmp_pred_ops (int_cmp_pred_ops),
 408       fp_cmp_pred_ops (fp_cmp_pred_ops),
 409       gather_scatter_pair_general_ops (gather_scatter_pair_general_ops),
 410       gather_scatter_pair_pred_ops (gather_scatter_pair_pred_ops)
 411   {}
 412
 413   /* How many predicate operations can be issued per cycle.  */
 414   const unsigned int pred_ops_per_cycle;
 415
 416   /* How many predicate operations are generated by a WHILExx
 417      instruction.  */
 418   const unsigned int while_pred_ops;
 419
 420   /* How many predicate operations are generated by an integer
 421      comparison instruction.  */
 422   const unsigned int int_cmp_pred_ops;
 423
 424   /* How many predicate operations are generated by a floating-point
 425      comparison instruction.  */
 426   const unsigned int fp_cmp_pred_ops;
 427
 428   /* How many general and predicate operations are generated by each pair
 429      of elements in a gather load or scatter store.  These values apply
 430      on top of the per-element counts recorded in fp_simd_load_general_ops
 431      and fp_simd_store_general_ops.
 432
 433      The reason for using pairs is that that is the largest possible
 434      granule size for 128-bit SVE, which can load and store 2 64-bit
 435      elements or 4 32-bit elements.  */
 436   const unsigned int gather_scatter_pair_general_ops;
 437   const unsigned int gather_scatter_pair_pred_ops;
 438 };
 439
 440 /* Information related to instruction issue for a particular CPU.  */
 441 struct aarch64_vec_issue_info
 442 {
 443   const aarch64_base_vec_issue_info *const scalar;
 444   const aarch64_simd_vec_issue_info *const advsimd;
 445   const aarch64_sve_vec_issue_info *const sve;
 446 };
 447
 448 /* Cost for vector insn classes.  */
 449 struct cpu_vector_cost
 450 {
 451   /* Cost of any integer scalar operation, excluding load and store.  */
 452   const int scalar_int_stmt_cost;
 453
 454   /* Cost of any fp scalar operation, excluding load and store.  */
 455   const int scalar_fp_stmt_cost;
 456
 457   /* Cost of a scalar load.  */
 458   const int scalar_load_cost;
 459
 460   /* Cost of a scalar store.  */
 461   const int scalar_store_cost;
 462
 463   /* Cost of a taken branch.  */
 464   const int cond_taken_branch_cost;
 465
 466   /* Cost of a not-taken branch.  */
 467   const int cond_not_taken_branch_cost;
 468
 469   /* Cost of an Advanced SIMD operations.  */
 470   const advsimd_vec_cost *advsimd;
 471
 472   /* Cost of an SVE operations, or null if SVE is not implemented.  */
 473   const sve_vec_cost *sve;
 474
 475   /* Issue information, or null if none is provided.  */
 476   const aarch64_vec_issue_info *const issue_info;
 477 };
 478
 479 /* Branch costs.  */
 480 struct cpu_branch_cost
 481 {
 482   const int predictable;    /* Predictable branch or optimizing for size.  */
 483   const int unpredictable;  /* Unpredictable branch or optimizing for speed.  */
 484 };
 485
 486 /* Control approximate alternatives to certain FP operators.  */
 487 #define AARCH64_APPROX_MODE(MODE) \
 488   ((MIN_MODE_FLOAT <= (MODE) && (MODE) <= MAX_MODE_FLOAT) \
 489    ? ((uint64_t) 1 << ((MODE) - MIN_MODE_FLOAT)) \
 490    : (MIN_MODE_VECTOR_FLOAT <= (MODE) && (MODE) <= MAX_MODE_VECTOR_FLOAT) \
 491      ? ((uint64_t) 1 << ((MODE) - MIN_MODE_VECTOR_FLOAT \
 492                          + MAX_MODE_FLOAT - MIN_MODE_FLOAT + 1)) \
 493      : (0))
 494 #define AARCH64_APPROX_NONE ((uint64_t) 0)
 495 #define AARCH64_APPROX_ALL (~(uint64_t) 0)
 496
 497 /* Allowed modes for approximations.  */
 498 struct cpu_approx_modes
 499 {
 500   const uint64_t division;      /* Division.  */
 501   const uint64_t sqrt;          /* Square root.  */
 502   const uint64_t recip_sqrt;    /* Reciprocal square root.  */
 503 };
 504
 505 /* Cache prefetch settings for prefetch-loop-arrays.  */
 506 struct cpu_prefetch_tune
 507 {
 508   const int num_slots;
 509   const int l1_cache_size;
 510   const int l1_cache_line_size;
 511   const int l2_cache_size;
 512   /* Whether software prefetch hints should be issued for non-constant
 513      strides.  */
 514   const bool prefetch_dynamic_strides;
 515   /* The minimum constant stride beyond which we should use prefetch
 516      hints for.  */
 517   const int minimum_stride;
 518   const int default_opt_level;
 519 };
 520
 521 /* Model the costs for loads/stores for the register allocators so that it can
 522    do more accurate spill heuristics.  */
 523 struct cpu_memmov_cost
 524 {
 525   int load_int;
 526   int store_int;
 527   int load_fp;
 528   int store_fp;
 529   int load_pred;
 530   int store_pred;
 531 };
 532
 533 struct tune_params
 534 {
 535   const struct cpu_cost_table *insn_extra_cost;
 536   const struct cpu_addrcost_table *addr_cost;
 537   const struct cpu_regmove_cost *regmove_cost;
 538   const struct cpu_vector_cost *vec_costs;
 539   const struct cpu_branch_cost *branch_costs;
 540   const struct cpu_approx_modes *approx_modes;
 541   /* A bitmask of the possible SVE register widths in bits,
 542      or SVE_NOT_IMPLEMENTED if not applicable.  Only used for tuning
 543      decisions, does not disable VLA vectorization.  */
 544   unsigned int sve_width;
 545   /* Structure used by reload to cost spills.  */
 546   struct cpu_memmov_cost memmov_cost;
 547   int issue_rate;
 548   unsigned int fusible_ops;
 549   const char *function_align;
 550   const char *jump_align;
 551   const char *loop_align;
 552   int int_reassoc_width;
 553   int fp_reassoc_width;
 554   int fma_reassoc_width;
 555   int vec_reassoc_width;
 556   int min_div_recip_mul_sf;
 557   int min_div_recip_mul_df;
 558   /* Value for aarch64_case_values_threshold; or 0 for the default.  */
 559   unsigned int max_case_values;
 560 /* An enum specifying how to take into account CPU autoprefetch capabilities
 561    during instruction scheduling:
 562    - AUTOPREFETCHER_OFF: Do not take autoprefetch capabilities into account.
 563    - AUTOPREFETCHER_WEAK: Attempt to sort sequences of loads/store in order of
 564    offsets but allow the pipeline hazard recognizer to alter that order to
 565    maximize multi-issue opportunities.
 566    - AUTOPREFETCHER_STRONG: Attempt to sort sequences of loads/store in order of
 567    offsets and prefer this even if it restricts multi-issue opportunities.  */
 568
 569   enum aarch64_autoprefetch_model
 570   {
 571     AUTOPREFETCHER_OFF,
 572     AUTOPREFETCHER_WEAK,
 573     AUTOPREFETCHER_STRONG
 574   } autoprefetcher_model;
 575
 576   unsigned int extra_tuning_flags;
 577
 578   /* Place prefetch struct pointer at the end to enable type checking
 579      errors when tune_params misses elements (e.g., from erroneous merges).  */
 580   const struct cpu_prefetch_tune *prefetch;
 581
 582   /* Define models for the aarch64_ldp_stp_policy.  */
 583   enum aarch64_ldp_stp_policy ldp_policy_model, stp_policy_model;
 584 };
 585
 586 /* Classifies an address.
 587
 588    ADDRESS_REG_IMM
 589        A simple base register plus immediate offset.
 590
 591    ADDRESS_REG_WB
 592        A base register indexed by immediate offset with writeback.
 593
 594    ADDRESS_REG_REG
 595        A base register indexed by (optionally scaled) register.
 596
 597    ADDRESS_REG_UXTW
 598        A base register indexed by (optionally scaled) zero-extended register.
 599
 600    ADDRESS_REG_SXTW
 601        A base register indexed by (optionally scaled) sign-extended register.
 602
 603    ADDRESS_LO_SUM
 604        A LO_SUM rtx with a base register and "LO12" symbol relocation.
 605
 606    ADDRESS_SYMBOLIC:
 607        A constant symbolic address, in pc-relative literal pool.  */
 608
 609 enum aarch64_address_type {
 610   ADDRESS_REG_IMM,
 611   ADDRESS_REG_WB,
 612   ADDRESS_REG_REG,
 613   ADDRESS_REG_UXTW,
 614   ADDRESS_REG_SXTW,
 615   ADDRESS_LO_SUM,
 616   ADDRESS_SYMBOLIC
 617 };
 618
 619 /* Address information.  */
 620 struct aarch64_address_info {
 621   enum aarch64_address_type type;
 622   rtx base;
 623   rtx offset;
 624   poly_int64 const_offset;
 625   int shift;
 626   enum aarch64_symbol_type symbol_type;
 627 };
 628
 629 #define AARCH64_FUSION_PAIR(x, name) \
 630   AARCH64_FUSE_##name##_index,
 631 /* Supported fusion operations.  */
 632 enum aarch64_fusion_pairs_index
 633 {
 634 #include "aarch64-fusion-pairs.def"
 635   AARCH64_FUSE_index_END
 636 };
 637
 638 #define AARCH64_FUSION_PAIR(x, name) \
 639   AARCH64_FUSE_##name = (1u << AARCH64_FUSE_##name##_index),
 640 /* Supported fusion operations.  */
 641 enum aarch64_fusion_pairs
 642 {
 643   AARCH64_FUSE_NOTHING = 0,
 644 #include "aarch64-fusion-pairs.def"
 645   AARCH64_FUSE_ALL = (1u << AARCH64_FUSE_index_END) - 1
 646 };
 647
 648 #define AARCH64_EXTRA_TUNING_OPTION(x, name) \
 649   AARCH64_EXTRA_TUNE_##name##_index,
 650 /* Supported tuning flags indexes.  */
 651 enum aarch64_extra_tuning_flags_index
 652 {
 653 #include "aarch64-tuning-flags.def"
 654   AARCH64_EXTRA_TUNE_index_END
 655 };
 656
 657
 658 #define AARCH64_EXTRA_TUNING_OPTION(x, name) \
 659   AARCH64_EXTRA_TUNE_##name = (1u << AARCH64_EXTRA_TUNE_##name##_index),
 660 /* Supported tuning flags.  */
 661 enum aarch64_extra_tuning_flags
 662 {
 663   AARCH64_EXTRA_TUNE_NONE = 0,
 664 #include "aarch64-tuning-flags.def"
 665   AARCH64_EXTRA_TUNE_ALL = (1u << AARCH64_EXTRA_TUNE_index_END) - 1
 666 };
 667
 668 extern struct tune_params aarch64_tune_params;
 669
 670 /* The available SVE predicate patterns, known in the ACLE as "svpattern".  */
 671 #define AARCH64_FOR_SVPATTERN(T) \
 672   T (POW2, pow2, 0) \
 673   T (VL1, vl1, 1) \
 674   T (VL2, vl2, 2) \
 675   T (VL3, vl3, 3) \
 676   T (VL4, vl4, 4) \
 677   T (VL5, vl5, 5) \
 678   T (VL6, vl6, 6) \
 679   T (VL7, vl7, 7) \
 680   T (VL8, vl8, 8) \
 681   T (VL16, vl16, 9) \
 682   T (VL32, vl32, 10) \
 683   T (VL64, vl64, 11) \
 684   T (VL128, vl128, 12) \
 685   T (VL256, vl256, 13) \
 686   T (MUL4, mul4, 29) \
 687   T (MUL3, mul3, 30) \
 688   T (ALL, all, 31)
 689
 690 /* The available SVE prefetch operations, known in the ACLE as "svprfop".  */
 691 #define AARCH64_FOR_SVPRFOP(T) \
 692   T (PLDL1KEEP, pldl1keep, 0) \
 693   T (PLDL1STRM, pldl1strm, 1) \
 694   T (PLDL2KEEP, pldl2keep, 2) \
 695   T (PLDL2STRM, pldl2strm, 3) \
 696   T (PLDL3KEEP, pldl3keep, 4) \
 697   T (PLDL3STRM, pldl3strm, 5) \
 698   T (PSTL1KEEP, pstl1keep, 8) \
 699   T (PSTL1STRM, pstl1strm, 9) \
 700   T (PSTL2KEEP, pstl2keep, 10) \
 701   T (PSTL2STRM, pstl2strm, 11) \
 702   T (PSTL3KEEP, pstl3keep, 12) \
 703   T (PSTL3STRM, pstl3strm, 13)
 704
 705 #define AARCH64_SVENUM(UPPER, LOWER, VALUE) AARCH64_SV_##UPPER = VALUE,
 706 enum aarch64_svpattern {
 707   AARCH64_FOR_SVPATTERN (AARCH64_SVENUM)
 708   AARCH64_NUM_SVPATTERNS
 709 };
 710
 711 enum aarch64_svprfop {
 712   AARCH64_FOR_SVPRFOP (AARCH64_SVENUM)
 713   AARCH64_NUM_SVPRFOPS
 714 };
 715 #undef AARCH64_SVENUM
 716
 717 /* It's convenient to divide the built-in function codes into groups,
 718    rather than having everything in a single enum.  This type enumerates
 719    those groups.  */
 720 enum aarch64_builtin_class
 721 {
 722   AARCH64_BUILTIN_GENERAL,
 723   AARCH64_BUILTIN_SVE
 724 };
 725
 726 /* Built-in function codes are structured so that the low
 727    AARCH64_BUILTIN_SHIFT bits contain the aarch64_builtin_class
 728    and the upper bits contain a group-specific subcode.  */
 729 const unsigned int AARCH64_BUILTIN_SHIFT = 1;
 730
 731 /* Mask that selects the aarch64_builtin_class part of a function code.  */
 732 const unsigned int AARCH64_BUILTIN_CLASS = (1 << AARCH64_BUILTIN_SHIFT) - 1;
 733
 734 /* RAII class for enabling enough features to define built-in types
 735    and implement the arm_neon.h pragma.  */
 736 class aarch64_simd_switcher
 737 {
 738 public:
 739   aarch64_simd_switcher (aarch64_feature_flags extra_flags = 0);
 740   ~aarch64_simd_switcher ();
 741
 742 private:
 743   aarch64_feature_flags m_old_asm_isa_flags;
 744   bool m_old_general_regs_only;
 745 };
 746
 747 /* Represents the ISA requirements of an intrinsic function, or of some
 748    other similar operation.  It stores separate feature flags for
 749    non-streaming mode and for streaming-mode; both requirements must
 750    be met in streaming-compatible mode.  */
 751 struct aarch64_required_extensions
 752 {
 753   /* Return a requirement that includes FLAGS on top of any existing
 754      requirements.  */
 755   inline CONSTEXPR aarch64_required_extensions
 756   and_also (aarch64_feature_flags flags)
 757   {
 758     return { sm_off ? sm_off | flags : 0,
 759              sm_on ? sm_on | flags : 0 };
 760   }
 761
 762   /* Return a requirement that is as restrictive as possible while still being
 763      no more restrictive than THIS and no more restrictive than OTHER.  */
 764   inline CONSTEXPR aarch64_required_extensions
 765   common_denominator (const aarch64_required_extensions &other)
 766   {
 767     return { sm_off && other.sm_off
 768              ? sm_off & other.sm_off
 769              : sm_off | other.sm_off,
 770              sm_on && other.sm_on
 771              ? sm_on & other.sm_on
 772              : sm_on | other.sm_on };
 773   }
 774
 775   /* Require non-streaming mode and the features in FLAGS.  */
 776   static inline CONSTEXPR aarch64_required_extensions
 777   nonstreaming_only (aarch64_feature_flags flags)
 778   {
 779     return { AARCH64_FL_SM_OFF | flags, 0 };
 780   }
 781
 782   /* Likewise, and also require SVE.  */
 783   static inline CONSTEXPR aarch64_required_extensions
 784   nonstreaming_sve (aarch64_feature_flags flags)
 785   {
 786     return nonstreaming_only (AARCH64_FL_SVE | flags);
 787   }
 788
 789   /* Allow both streaming and non-streaming mode, requiring the features
 790      in FLAGS for both cases.  */
 791   static inline CONSTEXPR aarch64_required_extensions
 792   streaming_compatible (aarch64_feature_flags flags)
 793   {
 794     return { AARCH64_FL_SM_OFF | flags, AARCH64_FL_SM_ON | flags };
 795   }
 796
 797   /* Likewise, and also require SVE for non-streaming mode.  */
 798   static inline CONSTEXPR aarch64_required_extensions
 799   ssve (aarch64_feature_flags flags)
 800   {
 801     return streaming_compatible (AARCH64_FL_SVE | flags, flags);
 802   }
 803
 804   /* Allow both streaming and non-streaming mode, requiring the features
 805      in SM_OFF for non-streaming mode and the features in SM_ON for
 806      streaming mode.  */
 807   static inline CONSTEXPR aarch64_required_extensions
 808   streaming_compatible (aarch64_feature_flags sm_off,
 809                         aarch64_feature_flags sm_on)
 810   {
 811     return { AARCH64_FL_SM_OFF | sm_off, AARCH64_FL_SM_ON | sm_on };
 812   }
 813
 814   /* Likewise, and also require SVE for non-streaming mode.  */
 815   static inline CONSTEXPR aarch64_required_extensions
 816   sve_and_sme (aarch64_feature_flags sm_off, aarch64_feature_flags sm_on)
 817   {
 818     return streaming_compatible (AARCH64_FL_SVE | sm_off, sm_on);
 819   }
 820
 821   /* Require streaming mode and the features in FLAGS.  */
 822   static inline CONSTEXPR aarch64_required_extensions
 823   streaming_only (aarch64_feature_flags flags)
 824   {
 825     return { 0, AARCH64_FL_SM_ON | flags };
 826   }
 827
 828   /* The ISA requirements in non-streaming mode, or 0 if the operation
 829      is only allowed in streaming mode.  When this field is nonzero,
 830      it always includes AARCH64_FL_SM_OFF.  */
 831   aarch64_feature_flags sm_off;
 832
 833   /* The ISA requirements in streaming mode, or 0 if the operation is only
 834      allowed in non-streaming mode.  When this field is nonzero,
 835      it always includes AARCH64_FL_SM_ON.
 836
 837      This field should not normally include AARCH64_FL_SME, since we
 838      would not be in streaming mode if SME wasn't supported.  Excluding
 839      AARCH64_FL_SME makes it easier to handle streaming-compatible rules
 840      since (for example) svadd_x should be available in streaming-compatible
 841      functions even without +sme.  */
 842   aarch64_feature_flags sm_on;
 843 };
 844
 845 void aarch64_post_cfi_startproc (void);
 846 poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
 847 int aarch64_get_condition_code (rtx);
 848 bool aarch64_address_valid_for_prefetch_p (rtx, bool);
 849 bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
 850 unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
 851 unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
 852 bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
 853 int aarch64_branch_cost (bool, bool);
 854 enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
 855 bool aarch64_advsimd_struct_mode_p (machine_mode mode);
 856 opt_machine_mode aarch64_v64_mode (scalar_mode);
 857 opt_machine_mode aarch64_v128_mode (scalar_mode);
 858 opt_machine_mode aarch64_full_sve_mode (scalar_mode);
 859 bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
 860 bool aarch64_valid_fp_move (rtx, rtx, machine_mode);
 861 bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
 862 bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT,
 863                                             HOST_WIDE_INT);
 864 bool aarch64_const_vec_rnd_cst_p (rtx, rtx);
 865 bool aarch64_rnd_imm_p (rtx);
 866 bool aarch64_constant_address_p (rtx);
 867 bool aarch64_emit_approx_div (rtx, rtx, rtx);
 868 bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
 869 bool aarch64_emit_opt_vec_rotate (rtx, rtx, rtx);
 870 tree aarch64_vector_load_decl (tree);
 871 rtx aarch64_gen_callee_cookie (aarch64_isa_mode, arm_pcs, bool);
 872 void aarch64_expand_call (rtx, rtx, rtx, bool);
 873 bool aarch64_expand_cpymem_mops (rtx *, bool);
 874 bool aarch64_expand_cpymem (rtx *, bool);
 875 bool aarch64_expand_setmem (rtx *);
 876 bool aarch64_float_const_zero_rtx_p (rtx);
 877 bool aarch64_float_const_rtx_p (rtx);
 878 bool aarch64_const_zero_rtx_p (rtx);
 879 bool aarch64_function_arg_regno_p (unsigned);
 880 bool aarch64_fusion_enabled_p (enum aarch64_fusion_pairs);
 881 bool aarch64_gen_cpymemqi (rtx *);
 882 bool aarch64_is_extend_from_extract (scalar_int_mode, rtx, rtx);
 883 bool aarch64_is_long_call_p (rtx);
 884 bool aarch64_is_noplt_call_p (rtx);
 885 bool aarch64_label_mentioned_p (rtx);
 886 void aarch64_declare_function_name (FILE *, const char*, tree);
 887 void aarch64_asm_output_alias (FILE *, const tree, const tree);
 888 void aarch64_asm_output_external (FILE *, tree, const char*);
 889 bool aarch64_legitimate_pic_operand_p (rtx);
 890 bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx);
 891 bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
 892                                         unsigned HOST_WIDE_INT,
 893                                         unsigned HOST_WIDE_INT);
 894 rtx aarch64_sve_reinterpret (machine_mode, rtx);
 895 bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
 896 bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
 897 machine_mode aarch64_sve_int_mode (machine_mode);
 898 opt_machine_mode aarch64_sve_pred_mode (unsigned int);
 899 machine_mode aarch64_sve_pred_mode (machine_mode);
 900 opt_machine_mode aarch64_advsimd_vector_array_mode (machine_mode,
 901                                                     unsigned HOST_WIDE_INT);
 902 opt_machine_mode aarch64_sve_data_mode (scalar_mode, poly_uint64);
 903 bool aarch64_sve_mode_p (machine_mode);
 904 HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int);
 905 bool aarch64_sve_cnt_immediate_p (rtx);
 906 bool aarch64_sve_scalar_inc_dec_immediate_p (rtx);
 907 bool aarch64_sve_rdvl_immediate_p (rtx);
 908 bool aarch64_sve_addvl_addpl_immediate_p (rtx);
 909 bool aarch64_sve_vector_inc_dec_immediate_p (rtx);
 910 int aarch64_add_offset_temporaries (rtx);
 911 void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx);
 912 bool aarch64_rdsvl_immediate_p (const_rtx);
 913 rtx aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT,
 914                               aarch64_isa_mode);
 915 char *aarch64_output_rdsvl (const_rtx);
 916 bool aarch64_addsvl_addspl_immediate_p (const_rtx);
 917 char *aarch64_output_addsvl_addspl (rtx);
 918 bool aarch64_mov_operand_p (rtx, machine_mode);
 919 rtx aarch64_reverse_mask (machine_mode, unsigned int);
 920 bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64);
 921 bool aarch64_offset_9bit_signed_unscaled_p (machine_mode, poly_int64);
 922 char *aarch64_output_sve_prefetch (const char *, rtx, const char *);
 923 char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx);
 924 char *aarch64_output_sve_cnt_pat_immediate (const char *, const char *, rtx *);
 925 char *aarch64_output_sve_scalar_inc_dec (rtx);
 926 char *aarch64_output_sve_rdvl (rtx);
 927 char *aarch64_output_sve_addvl_addpl (rtx);
 928 char *aarch64_output_sve_vector_inc_dec (const char *, rtx);
 929 char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
 930 char *aarch64_output_simd_mov_imm (rtx, unsigned);
 931 char *aarch64_output_simd_orr_imm (rtx, unsigned);
 932 char *aarch64_output_simd_and_imm (rtx, unsigned);
 933 char *aarch64_output_simd_xor_imm (rtx, unsigned);
 934
 935 char *aarch64_output_sve_mov_immediate (rtx);
 936 char *aarch64_output_sve_ptrues (rtx);
 937 bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
 938 bool aarch64_regno_ok_for_base_p (int, bool);
 939 bool aarch64_regno_ok_for_index_p (int, bool);
 940 bool aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *fail);
 941 bool aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
 942                                             bool high);
 943 bool aarch64_parallel_select_half_p (machine_mode, rtx);
 944 bool aarch64_pars_overlap_p (rtx, rtx);
 945 bool aarch64_simd_scalar_immediate_valid_for_move (rtx, scalar_int_mode);
 946 bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
 947 bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *);
 948 bool aarch64_simd_valid_and_imm (rtx);
 949 bool aarch64_simd_valid_mov_imm (rtx);
 950 bool aarch64_simd_valid_orr_imm (rtx);
 951 bool aarch64_simd_valid_xor_imm (rtx);
 952 bool aarch64_valid_sysreg_name_p (const char *);
 953 const char *aarch64_retrieve_sysreg (const char *, bool, bool);
 954 rtx aarch64_check_zero_based_sve_index_immediate (rtx);
 955 bool aarch64_maybe_generate_simd_constant (rtx, rtx, machine_mode);
 956 bool aarch64_simd_special_constant_p (rtx, machine_mode);
 957 bool aarch64_sve_index_immediate_p (rtx);
 958 bool aarch64_sve_arith_immediate_p (machine_mode, rtx, bool);
 959 bool aarch64_sve_sqadd_sqsub_immediate_p (machine_mode, rtx, bool);
 960 bool aarch64_sve_bitmask_immediate_p (rtx);
 961 bool aarch64_sve_dup_immediate_p (rtx);
 962 bool aarch64_sve_cmp_immediate_p (rtx, bool);
 963 bool aarch64_sve_float_arith_immediate_p (rtx, bool);
 964 bool aarch64_sve_float_mul_immediate_p (rtx);
 965 bool aarch64_split_dimode_const_store (rtx, rtx);
 966 bool aarch64_symbolic_address_p (rtx);
 967 bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
 968 int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
 969 bool aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT);
 970 bool aarch64_use_return_insn_p (void);
 971 const char *aarch64_output_casesi (rtx *);
 972 const char *aarch64_output_load_tp (rtx);
 973 const char *aarch64_output_sme_zero_za (rtx);
 974
 975 arm_pcs aarch64_tlsdesc_abi_id ();
 976 enum aarch64_symbol_type aarch64_classify_symbol (rtx, HOST_WIDE_INT);
 977 enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
 978 enum reg_class aarch64_regno_regclass (unsigned);
 979 int aarch64_asm_preferred_eh_data_format (int, int);
 980 int aarch64_fpconst_pow_of_2 (rtx);
 981 int aarch64_fpconst_pow2_recip (rtx);
 982 machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
 983                                                        machine_mode);
 984 int aarch64_uxt_size (int, HOST_WIDE_INT);
 985 int aarch64_vec_fpconst_pow_of_2 (rtx);
 986 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 987 rtx aarch64_return_addr_rtx (void);
 988 rtx aarch64_return_addr (int, rtx);
 989 rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT);
 990 rtx aarch64_gen_shareable_zero (machine_mode);
 991 bool aarch64_split_simd_shift_p (rtx_insn *);
 992 bool aarch64_simd_mem_operand_p (rtx);
 993 bool aarch64_sve_ld1r_operand_p (rtx);
 994 bool aarch64_sve_ld1rq_operand_p (rtx);
 995 bool aarch64_sve_ld1ro_operand_p (rtx, scalar_mode);
 996 bool aarch64_sve_ldff1_operand_p (rtx);
 997 bool aarch64_sve_ldnf1_operand_p (rtx);
 998 bool aarch64_sve_ldr_operand_p (rtx);
 999 bool aarch64_sve_prefetch_operand_p (rtx, machine_mode);
1000 bool aarch64_sve_struct_memory_operand_p (rtx);
1001 bool aarch64_sme_ldr_vnum_offset_p (rtx, rtx);
1002 rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool);
1003 rtx aarch64_gen_stepped_int_parallel (unsigned int, int, int);
1004 bool aarch64_stepped_int_parallel_p (rtx, int);
1005 bool aarch64_strided_registers_p (rtx *, unsigned int, unsigned int);
1006 rtx aarch64_tls_get_addr (void);
1007 unsigned aarch64_debugger_regno (unsigned);
1008 unsigned aarch64_trampoline_size (void);
1009 void aarch64_asm_output_labelref (FILE *, const char *);
1010 void aarch64_cpu_cpp_builtins (cpp_reader *);
1011 const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
1012 const char * aarch64_output_probe_stack_range (rtx, rtx);
1013 const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx);
1014 void aarch64_err_no_fpadvsimd (machine_mode);
1015 void aarch64_expand_epilogue (rtx_call_insn *);
1016 rtx aarch64_ptrue_all (unsigned int);
1017 opt_machine_mode aarch64_ptrue_all_mode (rtx);
1018 rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx);
1019 rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx);
1020 void aarch64_expand_mov_immediate (rtx, rtx);
1021 rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type);
1022 rtx aarch64_ptrue_reg (machine_mode);
1023 rtx aarch64_ptrue_reg (machine_mode, unsigned int);
1024 rtx aarch64_ptrue_reg (machine_mode, machine_mode);
1025 rtx aarch64_pfalse_reg (machine_mode);
1026 bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
1027 void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
1028 void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
1029 bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
1030 rtx aarch64_replace_reg_mode (rtx, machine_mode);
1031 void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
1032 void aarch64_expand_prologue (void);
1033 void aarch64_expand_vector_init (rtx, rtx);
1034 void aarch64_sve_expand_vector_init_subvector (rtx, rtx);
1035 void aarch64_sve_expand_vector_init (rtx, rtx);
1036 void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
1037                                    const_tree, unsigned, bool = false);
1038 void aarch64_init_expanders (void);
1039 rtx_call_insn *aarch64_emit_call_insn (rtx);
1040 void aarch64_register_pragmas (void);
1041 void aarch64_relayout_simd_types (void);
1042 void aarch64_reset_previous_fndecl (void);
1043 bool aarch64_return_address_signing_enabled (void);
1044 void aarch64_save_restore_target_globals (tree);
1045 void aarch64_addti_scratch_regs (rtx, rtx, rtx *,
1046                                  rtx *, rtx *,
1047                                  rtx *, rtx *,
1048                                  rtx *);
1049 void aarch64_subvti_scratch_regs (rtx, rtx, rtx *,
1050                                   rtx *, rtx *,
1051                                   rtx *, rtx *, rtx *);
1052 void aarch64_expand_subvti (rtx, rtx, rtx,
1053                             rtx, rtx, rtx, rtx, bool);
1054
1055
1056 /* Initialize builtins for SIMD intrinsics.  */
1057 void init_aarch64_simd_builtins (void);
1058
1059 void aarch64_simd_emit_reg_reg_move (rtx *, machine_mode, unsigned int);
1060
1061 /* Expand builtins for SIMD intrinsics.  */
1062 rtx aarch64_simd_expand_builtin (int, tree, rtx);
1063
1064 void aarch64_simd_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
1065 rtx aarch64_endian_lane_rtx (machine_mode, unsigned int);
1066
1067 void aarch64_split_move (rtx, rtx, machine_mode);
1068 void aarch64_split_128bit_move (rtx, rtx);
1069
1070 bool aarch64_split_128bit_move_p (rtx, rtx);
1071
1072 bool aarch64_mov128_immediate (rtx);
1073
1074 void aarch64_split_simd_move (rtx, rtx);
1075
1076 /* Check for a legitimate floating point constant for FMOV.  */
1077 bool aarch64_float_const_representable_p (rtx);
1078
1079 extern int aarch64_epilogue_uses (int);
1080
1081 #if defined (RTX_CODE)
1082 void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
1083                                    rtx label_ref);
1084 bool aarch64_legitimate_address_p (machine_mode, rtx, bool,
1085                                    aarch64_addr_query_type = ADDR_QUERY_M);
1086 machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
1087 rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx);
1088 bool aarch64_maxmin_plus_const (rtx_code, rtx *, bool);
1089 rtx aarch64_load_tp (rtx);
1090
1091 void aarch64_expand_compare_and_swap (rtx op[]);
1092 void aarch64_split_compare_and_swap (rtx op[]);
1093
1094 void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
1095
1096 bool aarch64_gen_adjusted_ldpstp (rtx *, bool, machine_mode, RTX_CODE);
1097 void aarch64_finish_ldpstp_peephole (rtx *, bool,
1098                                      enum rtx_code = (enum rtx_code)0);
1099
1100 void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
1101 bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
1102
1103 bool aarch64_prepare_sve_int_fma (rtx *, rtx_code);
1104 bool aarch64_prepare_sve_cond_int_fma (rtx *, rtx_code);
1105 #endif /* RTX_CODE */
1106
1107 bool aarch64_process_target_attr (tree);
1108 void aarch64_override_options_internal (struct gcc_options *);
1109
1110 const char *aarch64_general_mangle_builtin_type (const_tree);
1111 void aarch64_general_init_builtins (void);
1112 tree aarch64_general_fold_builtin (unsigned int, tree, unsigned int, tree *);
1113 gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *,
1114                                              gimple_stmt_iterator *);
1115 rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int);
1116 tree aarch64_general_builtin_decl (unsigned, bool);
1117 tree aarch64_general_builtin_rsqrt (unsigned int);
1118 void handle_arm_acle_h (void);
1119 void handle_arm_neon_h (void);
1120
1121 bool aarch64_check_required_extensions (location_t, tree,
1122                                         aarch64_required_extensions);
1123 bool aarch64_general_check_builtin_call (location_t, vec<location_t>,
1124                                          unsigned int, tree, unsigned int,
1125                                          tree *);
1126
1127 namespace aarch64 {
1128   void report_non_ice (location_t, tree, unsigned int);
1129   void report_out_of_range (location_t, tree, unsigned int, HOST_WIDE_INT,
1130                             HOST_WIDE_INT, HOST_WIDE_INT);
1131   void report_neither_nor (location_t, tree, unsigned int, HOST_WIDE_INT,
1132                            HOST_WIDE_INT, HOST_WIDE_INT);
1133   void report_not_one_of (location_t, tree, unsigned int, HOST_WIDE_INT,
1134                           HOST_WIDE_INT, HOST_WIDE_INT, HOST_WIDE_INT,
1135                           HOST_WIDE_INT);
1136   void report_not_enum (location_t, tree, unsigned int, HOST_WIDE_INT, tree);
1137 }
1138
1139 namespace aarch64_sve {
1140   void init_builtins ();
1141   void handle_arm_sve_h (bool);
1142   void handle_arm_sme_h (bool);
1143   void handle_arm_neon_sve_bridge_h (bool);
1144   tree builtin_decl (unsigned, bool);
1145   bool builtin_type_p (const_tree);
1146   bool builtin_type_p (const_tree, unsigned int *, unsigned int *);
1147   const char *mangle_builtin_type (const_tree);
1148   tree resolve_overloaded_builtin (location_t, unsigned int,
1149                                    vec<tree, va_gc> *);
1150   bool check_builtin_call (location_t, vec<location_t>, unsigned int,
1151                            tree, unsigned int, tree *);
1152   gimple *gimple_fold_builtin (unsigned int, gimple_stmt_iterator *, gcall *);
1153   rtx expand_builtin (unsigned int, tree, rtx);
1154   tree handle_arm_sve_vector_bits_attribute (tree *, tree, tree, int, bool *);
1155 #ifdef GCC_TARGET_H
1156   bool verify_type_context (location_t, type_context_kind, const_tree, bool);
1157 #endif
1158  void add_sve_type_attribute (tree, unsigned int, unsigned int,
1159                               const char *, const char *);
1160 }
1161
1162 extern void aarch64_split_combinev16qi (rtx operands[3]);
1163 extern void aarch64_expand_vec_perm (rtx, rtx, rtx, rtx, unsigned int);
1164 extern void aarch64_expand_sve_vec_perm (rtx, rtx, rtx, rtx);
1165 extern bool aarch64_madd_needs_nop (rtx_insn *);
1166 extern void aarch64_final_prescan_insn (rtx_insn *);
1167 void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
1168 int aarch64_ccmp_mode_to_code (machine_mode mode);
1169
1170 bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset);
1171 bool aarch64_mergeable_load_pair_p (machine_mode, rtx, rtx);
1172 bool aarch64_operands_ok_for_ldpstp (rtx *, bool);
1173 bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode);
1174 bool aarch64_mem_ok_with_ldpstp_policy_model (rtx, bool, machine_mode);
1175 bool aarch64_ldpstp_operand_mode_p (machine_mode);
1176 rtx aarch64_gen_load_pair (rtx, rtx, rtx, enum rtx_code = (enum rtx_code)0);
1177 rtx aarch64_gen_store_pair (rtx, rtx, rtx);
1178
1179 extern void aarch64_asm_output_pool_epilogue (FILE *, const char *,
1180                                               tree, HOST_WIDE_INT);
1181
1182
1183 extern bool aarch64_classify_address (struct aarch64_address_info *, rtx,
1184                                       machine_mode, bool,
1185                                       aarch64_addr_query_type = ADDR_QUERY_M);
1186
1187 void aarch64_set_asm_isa_flags (aarch64_feature_flags);
1188
1189 /* Defined in common/config/aarch64-common.cc.  */
1190 void aarch64_set_asm_isa_flags (gcc_options *, aarch64_feature_flags);
1191 bool aarch64_handle_option (struct gcc_options *, struct gcc_options *,
1192                              const struct cl_decoded_option *, location_t);
1193 void aarch64_print_hint_for_extensions (const char *);
1194 void aarch64_print_hint_for_arch (const char *);
1195 void aarch64_print_hint_for_core (const char *);
1196 enum aarch_parse_opt_result aarch64_parse_extension (const char *,
1197                                                      aarch64_feature_flags *,
1198                                                      std::string *);
1199 enum aarch_parse_opt_result aarch64_parse_arch (const char *,
1200                                                 aarch64_arch *,
1201                                                 aarch64_feature_flags *,
1202                                                 std::string *);
1203 enum aarch_parse_opt_result aarch64_parse_cpu (const char *,
1204                                                aarch64_cpu *,
1205                                                aarch64_feature_flags *,
1206                                                std::string *);
1207 enum aarch_parse_opt_result aarch64_parse_tune (const char *, aarch64_cpu *);
1208 bool aarch64_validate_march (const char *, aarch64_arch *,
1209                              aarch64_feature_flags *);
1210 bool aarch64_validate_mcpu (const char *, aarch64_cpu *,
1211                             aarch64_feature_flags *);
1212 bool aarch64_validate_mtune (const char *, aarch64_cpu *);
1213 std::string aarch64_get_extension_string_for_isa_flags (aarch64_feature_flags,
1214                                                         aarch64_feature_flags);
1215 std::string aarch64_get_arch_string_for_assembler (aarch64_arch,
1216                                                    aarch64_feature_flags);
1217
1218 rtl_opt_pass *make_pass_aarch64_early_ra (gcc::context *);
1219 rtl_opt_pass *make_pass_fma_steering (gcc::context *);
1220 rtl_opt_pass *make_pass_track_speculation (gcc::context *);
1221 rtl_opt_pass *make_pass_late_track_speculation (gcc::context *);
1222 rtl_opt_pass *make_pass_insert_bti (gcc::context *ctxt);
1223 rtl_opt_pass *make_pass_cc_fusion (gcc::context *ctxt);
1224 rtl_opt_pass *make_pass_switch_pstate_sm (gcc::context *ctxt);
1225 rtl_opt_pass *make_pass_ldp_fusion (gcc::context *);
1226
1227 poly_uint64 aarch64_regmode_natural_size (machine_mode);
1228
1229 bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT);
1230
1231 struct atomic_ool_names
1232 {
1233     const char *str[5][5];
1234 };
1235
1236 rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
1237                             const atomic_ool_names *names);
1238 extern const atomic_ool_names aarch64_ool_swp_names;
1239 extern const atomic_ool_names aarch64_ool_ldadd_names;
1240 extern const atomic_ool_names aarch64_ool_ldset_names;
1241 extern const atomic_ool_names aarch64_ool_ldclr_names;
1242 extern const atomic_ool_names aarch64_ool_ldeor_names;
1243
1244 tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *);
1245
1246 const char *aarch64_sls_barrier (int);
1247 const char *aarch64_indirect_call_asm (rtx);
1248 extern bool aarch64_harden_sls_retbr_p (void);
1249 extern bool aarch64_harden_sls_blr_p (void);
1250
1251 extern void aarch64_output_patchable_area (unsigned int, bool);
1252
1253 extern void aarch64_adjust_reg_alloc_order ();
1254
1255 bool aarch64_optimize_mode_switching (aarch64_mode_entity);
1256 void aarch64_restore_za (rtx);
1257 void aarch64_expand_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
1258 void aarch64_expand_reversed_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
1259
1260
1261 extern bool aarch64_gcs_enabled ();
1262
1263 extern unsigned aarch64_data_alignment (const_tree exp, unsigned align);
1264 extern unsigned aarch64_stack_alignment (const_tree exp, unsigned align);
1265
1266 #endif /* GCC_AARCH64_PROTOS_H */