gcc/config/aarch64/aarch64-sve-builtins-functions.h

   1 /* ACLE support for AArch64 SVE (function_base classes)
   2    Copyright (C) 2018-2025 Free Software Foundation, Inc.
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GCC is distributed in the hope that it will be useful, but
  12    WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING3.  If not see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 #ifndef GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H
  21 #define GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H
  22
  23 namespace aarch64_sve {
  24
  25 /* Wrap T, which is derived from function_base, and indicate that the
  26    function never has side effects.  It is only necessary to use this
  27    wrapper on functions that might have floating-point suffixes, since
  28    otherwise we assume by default that the function has no side effects.  */
  29 template<typename T>
  30 class quiet : public T
  31 {
  32 public:
  33   using T::T;
  34
  35   unsigned int
  36   call_properties (const function_instance &) const override
  37   {
  38     return 0;
  39   }
  40 };
  41
  42 /* Wrap T, which is derived from function_base, and indicate that it
  43    additionally has the call properties in PROPERTIES.  */
  44 template<typename T, unsigned int PROPERTIES>
  45 class add_call_properties : public T
  46 {
  47 public:
  48   using T::T;
  49
  50   unsigned int
  51   call_properties (const function_instance &fi) const override
  52   {
  53     return T::call_properties (fi) | PROPERTIES;
  54   }
  55 };
  56
  57 template<typename T>
  58 using read_write_za = add_call_properties<T, CP_READ_ZA | CP_WRITE_ZA>;
  59
  60 template<typename T>
  61 using write_za = add_call_properties<T, CP_WRITE_ZA>;
  62
  63 template<typename T>
  64 using read_zt0 = add_call_properties<T, CP_READ_ZT0>;
  65
  66 template<typename T>
  67 using write_zt0 = add_call_properties<T, CP_WRITE_ZT0>;
  68
  69 /* A function_base that sometimes or always operates on tuples of
  70    vectors.  */
  71 class multi_vector_function : public function_base
  72 {
  73 public:
  74   CONSTEXPR multi_vector_function (unsigned int vectors_per_tuple)
  75     : m_vectors_per_tuple (vectors_per_tuple) {}
  76
  77   unsigned int
  78   vectors_per_tuple (const function_instance &fi) const override
  79   {
  80     if (fi.group_suffix_id != GROUP_none)
  81       {
  82         gcc_checking_assert (m_vectors_per_tuple == 1);
  83         return fi.group_suffix ().vectors_per_tuple;
  84       }
  85     return m_vectors_per_tuple;
  86   }
  87
  88   /* The number of vectors in a tuple, or 1 if the function only operates
  89      on single vectors.  */
  90   unsigned int m_vectors_per_tuple;
  91 };
  92
  93 /* A function_base that loads or stores contiguous memory elements
  94    without extending or truncating them.  */
  95 class full_width_access : public multi_vector_function
  96 {
  97 public:
  98   CONSTEXPR full_width_access (unsigned int vectors_per_tuple = 1)
  99     : multi_vector_function (vectors_per_tuple) {}
 100
 101   tree
 102   memory_scalar_type (const function_instance &fi) const override
 103   {
 104     return fi.scalar_type (0);
 105   }
 106
 107   machine_mode
 108   memory_vector_mode (const function_instance &fi) const override
 109   {
 110     machine_mode mode = fi.vector_mode (0);
 111     auto vectors_per_tuple = fi.vectors_per_tuple ();
 112     if (vectors_per_tuple != 1)
 113       mode = targetm.array_mode (mode, vectors_per_tuple).require ();
 114     return mode;
 115   }
 116 };
 117
 118 /* A function_base that loads elements from memory and extends them
 119    to a wider element.  The memory element type is a fixed part of
 120    the function base name.  */
 121 class extending_load : public function_base
 122 {
 123 public:
 124   CONSTEXPR extending_load (type_suffix_index memory_type)
 125     : m_memory_type (memory_type) {}
 126
 127   unsigned int
 128   call_properties (const function_instance &) const override
 129   {
 130     return CP_READ_MEMORY;
 131   }
 132
 133   tree
 134   memory_scalar_type (const function_instance &) const override
 135   {
 136     return scalar_types[type_suffixes[m_memory_type].vector_type];
 137   }
 138
 139   machine_mode
 140   memory_vector_mode (const function_instance &fi) const override
 141   {
 142     machine_mode mem_mode = type_suffixes[m_memory_type].vector_mode;
 143     machine_mode reg_mode = fi.vector_mode (0);
 144     return aarch64_sve_data_mode (GET_MODE_INNER (mem_mode),
 145                                   GET_MODE_NUNITS (reg_mode)).require ();
 146   }
 147
 148   /* Return the rtx code associated with the kind of extension that
 149      the load performs.  */
 150   rtx_code
 151   extend_rtx_code () const
 152   {
 153     return (type_suffixes[m_memory_type].unsigned_p
 154             ? ZERO_EXTEND : SIGN_EXTEND);
 155   }
 156
 157   /* The type of the memory elements.  This is part of the function base
 158      name rather than a true type suffix.  */
 159   type_suffix_index m_memory_type;
 160 };
 161
 162 /* A function_base that truncates vector elements and stores them to memory.
 163    The memory element width is a fixed part of the function base name.  */
 164 class truncating_store : public function_base
 165 {
 166 public:
 167   CONSTEXPR truncating_store (scalar_int_mode to_mode) : m_to_mode (to_mode) {}
 168
 169   unsigned int
 170   call_properties (const function_instance &) const override
 171   {
 172     return CP_WRITE_MEMORY;
 173   }
 174
 175   tree
 176   memory_scalar_type (const function_instance &fi) const override
 177   {
 178     /* In truncating stores, the signedness of the memory element is defined
 179        to be the same as the signedness of the vector element.  The signedness
 180        doesn't make any difference to the behavior of the function.  */
 181     type_class_index tclass = fi.type_suffix (0).tclass;
 182     unsigned int element_bits = GET_MODE_BITSIZE (m_to_mode);
 183     type_suffix_index suffix = find_type_suffix (tclass, element_bits);
 184     return scalar_types[type_suffixes[suffix].vector_type];
 185   }
 186
 187   machine_mode
 188   memory_vector_mode (const function_instance &fi) const override
 189   {
 190     poly_uint64 nunits = GET_MODE_NUNITS (fi.vector_mode (0));
 191     return aarch64_sve_data_mode (m_to_mode, nunits).require ();
 192   }
 193
 194   /* The mode of a single memory element.  */
 195   scalar_int_mode m_to_mode;
 196 };
 197
 198 /* An incomplete function_base for functions that have an associated rtx code.
 199    It simply records information about the mapping for derived classes
 200    to use.  */
 201 class rtx_code_function_base : public function_base
 202 {
 203 public:
 204   CONSTEXPR rtx_code_function_base (rtx_code code_for_sint,
 205                                     rtx_code code_for_uint,
 206                                     int unspec_for_cond_fp = -1,
 207                                     int unspec_for_uncond_fp = -1)
 208     : m_code_for_sint (code_for_sint), m_code_for_uint (code_for_uint),
 209       m_unspec_for_cond_fp (unspec_for_cond_fp),
 210       m_unspec_for_uncond_fp (unspec_for_uncond_fp) {}
 211
 212   /* The rtx code to use for signed and unsigned integers respectively.
 213      Can be UNKNOWN for functions that don't have integer forms.  */
 214   rtx_code m_code_for_sint;
 215   rtx_code m_code_for_uint;
 216
 217   /* The UNSPEC_COND_* to use for floating-point operations.  Can be -1
 218      for functions that only operate on integers.  */
 219   int m_unspec_for_cond_fp;
 220
 221   /* The UNSPEC_* to use for unpredicated floating-point operations.
 222      Can be -1 if there is no such operation.  */
 223   int m_unspec_for_uncond_fp;
 224 };
 225
 226 /* A function_base for functions that have an associated rtx code.
 227    It supports all forms of predication except PRED_implicit.  */
 228 class rtx_code_function : public rtx_code_function_base
 229 {
 230 public:
 231   using rtx_code_function_base::rtx_code_function_base;
 232
 233   rtx
 234   expand (function_expander &e) const override
 235   {
 236     return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
 237                                m_unspec_for_cond_fp, m_unspec_for_uncond_fp);
 238   }
 239 };
 240
 241 /* Like rtx_code_function, but for functions that take what is normally
 242    the final argument first.  One use of this class is to handle binary
 243    reversed operations; another is to handle MLA-style operations that
 244    are normally expressed in GCC as MAD-style operations.  */
 245 class rtx_code_function_rotated : public rtx_code_function_base
 246 {
 247 public:
 248   using rtx_code_function_base::rtx_code_function_base;
 249
 250   rtx
 251   expand (function_expander &e) const override
 252   {
 253     /* Rotate the inputs into their normal order, but continue to make _m
 254        functions merge with what was originally the first vector argument.  */
 255     unsigned int nargs = e.args.length ();
 256     e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs);
 257     return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
 258                                m_unspec_for_cond_fp, m_unspec_for_uncond_fp,
 259                                nargs - 1);
 260   }
 261 };
 262
 263 /* An incomplete function_base for functions that have an associated
 264    unspec code, with separate codes for signed integers, unsigned
 265    integers and floating-point values.  The class simply records
 266    information about the mapping for derived classes to use.  */
 267 class unspec_based_function_base : public function_base
 268 {
 269 public:
 270   CONSTEXPR unspec_based_function_base (int unspec_for_sint,
 271                                         int unspec_for_uint,
 272                                         int unspec_for_fp,
 273                                         int unspec_for_mfp8 = -1,
 274                                         unsigned int suffix_index = 0)
 275     : m_unspec_for_sint (unspec_for_sint),
 276       m_unspec_for_uint (unspec_for_uint),
 277       m_unspec_for_fp (unspec_for_fp),
 278       m_unspec_for_mfp8 (unspec_for_mfp8),
 279       m_suffix_index (suffix_index)
 280   {}
 281
 282   /* Return the unspec code to use for INSTANCE, based on type suffix 0.  */
 283   int
 284   unspec_for (const function_instance &instance) const
 285   {
 286     if (instance.fpm_mode == FPM_set)
 287       return m_unspec_for_mfp8;
 288
 289     auto &suffix = instance.type_suffix (m_suffix_index);
 290     return (!suffix.integer_p ? m_unspec_for_fp
 291             : suffix.unsigned_p ? m_unspec_for_uint
 292             : m_unspec_for_sint);
 293   }
 294
 295   /* The unspec code associated with signed-integer, unsigned-integer
 296      and floating-point operations respectively.  */
 297   int m_unspec_for_sint;
 298   int m_unspec_for_uint;
 299   int m_unspec_for_fp;
 300   int m_unspec_for_mfp8;
 301
 302   /* Which type suffix is used to choose between the unspecs.  */
 303   unsigned int m_suffix_index;
 304 };
 305
 306 /* A function_base for functions that have an associated unspec code.
 307    It supports all forms of predication except PRED_implicit.  */
 308 class unspec_based_function : public unspec_based_function_base
 309 {
 310 public:
 311   using unspec_based_function_base::unspec_based_function_base;
 312
 313   rtx
 314   expand (function_expander &e) const override
 315   {
 316     return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint,
 317                              m_unspec_for_fp);
 318   }
 319 };
 320
 321 /* Like unspec_based_function, but for functions that take what is normally
 322    the final argument first.  One use of this class is to handle binary
 323    reversed operations; another is to handle MLA-style operations that
 324    are normally expressed in GCC as MAD-style operations.  */
 325 class unspec_based_function_rotated : public unspec_based_function_base
 326 {
 327 public:
 328   using unspec_based_function_base::unspec_based_function_base;
 329
 330   rtx
 331   expand (function_expander &e) const override
 332   {
 333     /* Rotate the inputs into their normal order, but continue to make _m
 334        functions merge with what was originally the first vector argument.  */
 335     unsigned int nargs = e.args.length ();
 336     e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs);
 337     return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint,
 338                              m_unspec_for_fp, nargs - 1);
 339   }
 340 };
 341
 342 /* Like unspec_based_function, but map the function directly to
 343    CODE (UNSPEC, M) instead of using the generic predication-based
 344    expansion. where M is the vector mode associated with type suffix 0.
 345    This is useful if the unspec doesn't describe the full operation or
 346    if the usual predication rules don't apply for some reason.  */
 347 template<insn_code (*CODE) (int, machine_mode)>
 348 class unspec_based_function_exact_insn : public unspec_based_function_base
 349 {
 350 public:
 351   using unspec_based_function_base::unspec_based_function_base;
 352
 353   rtx
 354   expand (function_expander &e) const override
 355   {
 356     return e.use_exact_insn (CODE (unspec_for (e),
 357                                    e.tuple_mode (m_suffix_index)));
 358   }
 359 };
 360
 361 typedef unspec_based_function_exact_insn<code_for_aarch64_sve>
 362   unspec_based_uncond_function;
 363
 364 /* A function that performs an unspec and then adds it to another value.  */
 365 typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add>
 366   unspec_based_add_function;
 367 typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add_lane>
 368   unspec_based_add_lane_function;
 369
 370 /* Generic unspec-based _lane function.  */
 371 typedef unspec_based_function_exact_insn<code_for_aarch64_sve_lane>
 372   unspec_based_lane_function;
 373
 374 /* A functon that uses aarch64_pred* patterns regardless of the
 375    predication type.  */
 376 typedef unspec_based_function_exact_insn<code_for_aarch64_pred>
 377   unspec_based_pred_function;
 378
 379 /* Like unspec_based_add_function and unspec_based_add_lane_function,
 380    but using saturating addition.  */
 381 typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qadd>
 382   unspec_based_qadd_function;
 383 typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qadd_lane>
 384   unspec_based_qadd_lane_function;
 385
 386 /* Like unspec_based_sub_function and unspec_based_sub_lane_function,
 387    but using saturating subtraction.  */
 388 typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qsub>
 389   unspec_based_qsub_function;
 390 typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qsub_lane>
 391   unspec_based_qsub_lane_function;
 392
 393 /* A function that performs an unspec and then subtracts it from
 394    another value.  */
 395 typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub>
 396   unspec_based_sub_function;
 397 typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub_lane>
 398   unspec_based_sub_lane_function;
 399
 400 /* A function that has conditional and unconditional forms, with both
 401    forms being associated with a single unspec each.  */
 402 class cond_or_uncond_unspec_function : public function_base
 403 {
 404 public:
 405   CONSTEXPR cond_or_uncond_unspec_function (int cond_unspec, int uncond_unspec)
 406     : m_cond_unspec (cond_unspec), m_uncond_unspec (uncond_unspec) {}
 407
 408   rtx
 409   expand (function_expander &e) const override
 410   {
 411     if (e.pred == PRED_none)
 412       {
 413         auto mode = e.tuple_mode (0);
 414         auto icode = (e.mode_suffix_id == MODE_single
 415                       ? code_for_aarch64_sve_single (m_uncond_unspec, mode)
 416                       : code_for_aarch64_sve (m_uncond_unspec, mode));
 417         return e.use_exact_insn (icode);
 418       }
 419     return e.map_to_unspecs (m_cond_unspec, m_cond_unspec, m_cond_unspec);
 420   }
 421
 422   /* The unspecs for the conditional and unconditional instructions,
 423      respectively.  */
 424   int m_cond_unspec;
 425   int m_uncond_unspec;
 426 };
 427
 428 /* General SME unspec-based functions, parameterized on the vector mode.  */
 429 class sme_1mode_function : public read_write_za<unspec_based_function_base>
 430 {
 431 public:
 432   using parent = read_write_za<unspec_based_function_base>;
 433
 434   CONSTEXPR sme_1mode_function (int unspec_for_sint, int unspec_for_uint,
 435                                 int unspec_for_fp)
 436     : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, -1, 1)
 437   {}
 438
 439   rtx
 440   expand (function_expander &e) const override
 441   {
 442     insn_code icode;
 443     if (e.mode_suffix_id == MODE_single)
 444       icode = code_for_aarch64_sme_single (unspec_for (e), e.tuple_mode (1));
 445     else
 446       icode = code_for_aarch64_sme (unspec_for (e), e.tuple_mode (1));
 447     return e.use_exact_insn (icode);
 448   }
 449 };
 450
 451 /* General SME unspec-based functions, parameterized on both the ZA mode
 452    and the vector mode.  If the elements of the ZA and vector modes are
 453    the same size (e.g. _za64_f64 or _za32_s32) then the two mode arguments
 454    are equal, otherwise the first mode argument is the single-vector integer
 455    mode associated with the ZA suffix and the second mode argument is the
 456    tuple mode associated with the vector suffix.  */
 457 template<insn_code (*CODE) (int, machine_mode, machine_mode),
 458          insn_code (*CODE_SINGLE) (int, machine_mode, machine_mode)>
 459 class sme_2mode_function_t : public read_write_za<unspec_based_function_base>
 460 {
 461 public:
 462   using parent = read_write_za<unspec_based_function_base>;
 463
 464   CONSTEXPR sme_2mode_function_t (int unspec_for_sint, int unspec_for_uint,
 465                                   int unspec_for_fp)
 466     : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, -1, 1)
 467   {}
 468
 469   rtx
 470   expand (function_expander &e) const override
 471   {
 472     insn_code icode;
 473     machine_mode za_mode = e.vector_mode (0);
 474     machine_mode v_mode = e.tuple_mode (1);
 475     if (GET_MODE_UNIT_BITSIZE (za_mode) == GET_MODE_UNIT_BITSIZE (v_mode))
 476       za_mode = v_mode;
 477     if (e.mode_suffix_id == MODE_single)
 478       icode = CODE_SINGLE (unspec_for (e), za_mode, v_mode);
 479     else
 480       icode = CODE (unspec_for (e), za_mode, v_mode);
 481     return e.use_exact_insn (icode);
 482   }
 483 };
 484
 485 using sme_2mode_function
 486   = sme_2mode_function_t<code_for_aarch64_sme, code_for_aarch64_sme_single>;
 487
 488 using sme_2mode_lane_function
 489   = sme_2mode_function_t<code_for_aarch64_sme_lane, nullptr>;
 490
 491 /* A function that acts like unspec_based_function_exact_insn<INT_CODE>
 492    when operating on integers, but that expands to an (fma ...)-style
 493    aarch64_sve* operation when applied to floats.  */
 494 template<insn_code (*INT_CODE) (int, machine_mode)>
 495 class unspec_based_fused_function : public unspec_based_function_base
 496 {
 497 public:
 498   using unspec_based_function_base::unspec_based_function_base;
 499
 500   rtx
 501   expand (function_expander &e) const override
 502   {
 503     int unspec = unspec_for (e);
 504     insn_code icode;
 505     if (e.type_suffix (m_suffix_index).float_p
 506         && e.fpm_mode != FPM_set)
 507       {
 508         /* Put the operands in the normal (fma ...) order, with the accumulator
 509            last.  This fits naturally since that's also the unprinted operand
 510            in the asm output.  */
 511         e.rotate_inputs_left (0, e.pred != PRED_none ? 4 : 3);
 512         icode = code_for_aarch64_sve (unspec, e.vector_mode (m_suffix_index));
 513       }
 514     else
 515       icode = INT_CODE (unspec, e.vector_mode (m_suffix_index));
 516     return e.use_exact_insn (icode);
 517   }
 518 };
 519 typedef unspec_based_fused_function<code_for_aarch64_sve_add>
 520   unspec_based_mla_function;
 521 typedef unspec_based_fused_function<code_for_aarch64_sve_sub>
 522   unspec_based_mls_function;
 523
 524 /* Like unspec_based_fused_function, but for _lane functions.  */
 525 template<insn_code (*INT_CODE) (int, machine_mode)>
 526 class unspec_based_fused_lane_function : public unspec_based_function_base
 527 {
 528 public:
 529   using unspec_based_function_base::unspec_based_function_base;
 530
 531   rtx
 532   expand (function_expander &e) const override
 533   {
 534     int unspec = unspec_for (e);
 535     insn_code icode;
 536     if (e.type_suffix (m_suffix_index).float_p
 537         && e.fpm_mode != FPM_set)
 538       {
 539         /* Put the operands in the normal (fma ...) order, with the accumulator
 540            last.  This fits naturally since that's also the unprinted operand
 541            in the asm output.  */
 542         e.rotate_inputs_left (0, e.pred != PRED_none ? 5 : 4);
 543         icode = code_for_aarch64_lane (unspec, e.vector_mode (m_suffix_index));
 544       }
 545     else
 546       icode = INT_CODE (unspec, e.vector_mode (m_suffix_index));
 547     return e.use_exact_insn (icode);
 548   }
 549 };
 550 typedef unspec_based_fused_lane_function<code_for_aarch64_sve_add_lane>
 551   unspec_based_mla_lane_function;
 552 typedef unspec_based_fused_lane_function<code_for_aarch64_sve_sub_lane>
 553   unspec_based_mls_lane_function;
 554
 555 /* A function_base that uses CODE_FOR_MODE (M) to get the associated
 556    instruction code, where M is the vector mode associated with type
 557    suffix N.  */
 558 template<insn_code (*CODE_FOR_MODE) (machine_mode), unsigned int N>
 559 class code_for_mode_function : public function_base
 560 {
 561 public:
 562   rtx
 563   expand (function_expander &e) const override
 564   {
 565     return e.use_exact_insn (CODE_FOR_MODE (e.vector_mode (N)));
 566   }
 567 };
 568
 569 /* A function that uses code_for_<PATTERN> (M), where M is the vector
 570    mode associated with the first type suffix.  */
 571 #define CODE_FOR_MODE0(PATTERN) code_for_mode_function<code_for_##PATTERN, 0>
 572
 573 /* Likewise for the second type suffix.  */
 574 #define CODE_FOR_MODE1(PATTERN) code_for_mode_function<code_for_##PATTERN, 1>
 575
 576 /* Like CODE_FOR_MODE0, but the function doesn't raise exceptions when
 577    operating on floating-point data.  */
 578 #define QUIET_CODE_FOR_MODE0(PATTERN) \
 579   quiet< code_for_mode_function<code_for_##PATTERN, 0> >
 580
 581 /* A function_base for functions that always expand to a fixed insn pattern,
 582    regardless of what the suffixes are.  */
 583 class fixed_insn_function : public function_base
 584 {
 585 public:
 586   CONSTEXPR fixed_insn_function (insn_code code) : m_code (code) {}
 587
 588   rtx
 589   expand (function_expander &e) const override
 590   {
 591     return e.use_exact_insn (m_code);
 592   }
 593
 594   /* The instruction to use.  */
 595   insn_code m_code;
 596 };
 597
 598 /* A function_base for functions that permute their arguments.  */
 599 class permute : public quiet<function_base>
 600 {
 601 public:
 602   /* Fold a unary or binary permute with the permute vector given by
 603      BUILDER.  */
 604   gimple *
 605   fold_permute (const gimple_folder &f, const vec_perm_builder &builder) const
 606   {
 607     /* Punt for now on _b16 and wider; we'd need more complex evpc logic
 608        to rerecognize the result.  */
 609     if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8)
 610       return NULL;
 611
 612     unsigned int nargs = gimple_call_num_args (f.call);
 613     poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
 614     vec_perm_indices indices (builder, nargs, nelts);
 615     tree perm_type = build_vector_type (ssizetype, nelts);
 616     return gimple_build_assign (f.lhs, VEC_PERM_EXPR,
 617                                 gimple_call_arg (f.call, 0),
 618                                 gimple_call_arg (f.call, nargs == 1 ? 0 : 1),
 619                                 vec_perm_indices_to_tree (perm_type, indices));
 620   }
 621 };
 622
 623 /* A function_base for functions that permute two vectors using a fixed
 624    choice of indices.  */
 625 class binary_permute : public permute
 626 {
 627 public:
 628   CONSTEXPR binary_permute (int unspec) : m_unspec (unspec) {}
 629
 630   rtx
 631   expand (function_expander &e) const override
 632   {
 633     insn_code icode = code_for_aarch64_sve (m_unspec, e.vector_mode (0));
 634     return e.use_exact_insn (icode);
 635   }
 636
 637   /* The unspec code associated with the operation.  */
 638   int m_unspec;
 639 };
 640
 641 /* A function that implements a x2 or x4 permute instruction.  Both forms
 642    of intrinsic have a single x2 or x4 tuple argument, but the underlying
 643    x2 instruction takes two separate input operands.  */
 644 class multireg_permute : public function_base
 645 {
 646 public:
 647   CONSTEXPR multireg_permute (int unspec) : m_unspec (unspec) {}
 648
 649   rtx
 650   expand (function_expander &e) const override
 651   {
 652     insn_code icode = code_for_aarch64_sve (m_unspec, e.tuple_mode (0));
 653     if (e.group_suffix ().vectors_per_tuple == 2)
 654       {
 655         machine_mode elt_mode = e.vector_mode (0);
 656         rtx arg = e.args[0];
 657         e.args[0] = force_subreg (elt_mode, arg, GET_MODE (arg), 0);
 658         e.args.safe_push (force_subreg (elt_mode, arg, GET_MODE (arg),
 659                                         GET_MODE_SIZE (elt_mode)));
 660       }
 661     return e.use_exact_insn (icode);
 662   }
 663
 664   /* The unspec associated with the permutation.  */
 665   int m_unspec;
 666 };
 667
 668 /* A function that has two type integer type suffixes, which might agree
 669    or disagree on signedness.  There are separate instructions for each
 670    signed/unsigned combination.  */
 671 class integer_conversion : public function_base
 672 {
 673 public:
 674   CONSTEXPR integer_conversion (int unspec_for_sint, int unspec_for_sintu,
 675                                 int unspec_for_uint, int unspec_for_uints)
 676     : m_unspec_for_sint (unspec_for_sint),
 677       m_unspec_for_sintu (unspec_for_sintu),
 678       m_unspec_for_uint (unspec_for_uint),
 679       m_unspec_for_uints (unspec_for_uints)
 680   {}
 681
 682   rtx
 683   expand (function_expander &e) const override
 684   {
 685     machine_mode mode0 = e.vector_mode (0);
 686     machine_mode mode1 = GET_MODE (e.args[0]);
 687     int unspec;
 688     if (e.type_suffix (0).unsigned_p == e.type_suffix (1).unsigned_p)
 689       unspec = (e.type_suffix (0).unsigned_p
 690                 ? m_unspec_for_uint
 691                 : m_unspec_for_sint);
 692     else
 693       unspec = (e.type_suffix (0).unsigned_p
 694                 ? m_unspec_for_sintu
 695                 : m_unspec_for_uints);
 696     return e.use_exact_insn (code_for_aarch64_sve (unspec, mode0, mode1));
 697   }
 698
 699   /* The unspec for signed -> signed.  */
 700   int m_unspec_for_sint;
 701
 702   /* The unspec for signed -> unsigned.  */
 703   int m_unspec_for_sintu;
 704
 705   /* The unspec for unsigned -> signed.  */
 706   int m_unspec_for_uint;
 707
 708   /* The unspec for unsigned -> unsigned.  */
 709   int m_unspec_for_uints;
 710 };
 711
 712 /* A function_base for functions that reduce a vector to a scalar.  */
 713 class reduction : public function_base
 714 {
 715 public:
 716   CONSTEXPR reduction (int unspec)
 717     : m_unspec_for_sint (unspec),
 718       m_unspec_for_uint (unspec),
 719       m_unspec_for_fp (unspec)
 720   {}
 721
 722   CONSTEXPR reduction (int unspec_for_sint, int unspec_for_uint,
 723                        int unspec_for_fp)
 724     : m_unspec_for_sint (unspec_for_sint),
 725       m_unspec_for_uint (unspec_for_uint),
 726       m_unspec_for_fp (unspec_for_fp)
 727   {}
 728
 729   rtx
 730   expand (function_expander &e) const override
 731   {
 732     machine_mode mode = e.vector_mode (0);
 733     int unspec = (!e.type_suffix (0).integer_p ? m_unspec_for_fp
 734                   : e.type_suffix (0).unsigned_p ? m_unspec_for_uint
 735                   : m_unspec_for_sint);
 736     /* There's no distinction between SADDV and UADDV for 64-bit elements;
 737        the signed versions only exist for narrower elements.  */
 738     if (GET_MODE_UNIT_BITSIZE (mode) == 64 && unspec == UNSPEC_SADDV)
 739       unspec = UNSPEC_UADDV;
 740     return e.use_exact_insn (code_for_aarch64_pred_reduc (unspec, mode));
 741   }
 742
 743   /* The unspec code associated with signed-integer, unsigned-integer
 744      and floating-point operations respectively.  */
 745   int m_unspec_for_sint;
 746   int m_unspec_for_uint;
 747   int m_unspec_for_fp;
 748 };
 749
 750 /* A function_base for functions that shift narrower-than-64-bit values
 751    by 64-bit amounts.  */
 752 class shift_wide : public function_base
 753 {
 754 public:
 755   CONSTEXPR shift_wide (rtx_code code, int wide_unspec)
 756     : m_code (code), m_wide_unspec (wide_unspec) {}
 757
 758   rtx
 759   expand (function_expander &e) const override
 760   {
 761     machine_mode mode = e.vector_mode (0);
 762     machine_mode elem_mode = GET_MODE_INNER (mode);
 763
 764     /* If the argument is a constant that the normal shifts can handle
 765        directly, use them instead.  */
 766     rtx shift = unwrap_const_vec_duplicate (e.args.last ());
 767     if (aarch64_simd_shift_imm_p (shift, elem_mode, m_code == ASHIFT))
 768       {
 769         e.args.last () = shift;
 770         return e.map_to_rtx_codes (m_code, m_code, -1, -1);
 771       }
 772
 773     if (e.pred == PRED_x)
 774       return e.use_unpred_insn (code_for_aarch64_sve (m_wide_unspec, mode));
 775
 776     return e.use_cond_insn (code_for_cond (m_wide_unspec, mode));
 777   }
 778
 779   /* The rtx code associated with a "normal" shift.  */
 780   rtx_code m_code;
 781
 782   /* The unspec code associated with the wide shift.  */
 783   int m_wide_unspec;
 784 };
 785
 786 /* A function_base for unary functions that count bits.  */
 787 class unary_count : public quiet<function_base>
 788 {
 789 public:
 790   CONSTEXPR unary_count (rtx_code code) : m_code (code) {}
 791
 792   rtx
 793   expand (function_expander &e) const override
 794   {
 795     /* The md patterns treat the operand as an integer.  */
 796     machine_mode mode = aarch64_sve_int_mode (e.vector_mode (0));
 797     e.args.last () = gen_lowpart (mode, e.args.last ());
 798
 799     if (e.pred == PRED_x)
 800       return e.use_pred_x_insn (code_for_aarch64_pred (m_code, mode));
 801
 802     return e.use_cond_insn (code_for_cond (m_code, mode));
 803   }
 804
 805   /* The rtx code associated with the operation.  */
 806   rtx_code m_code;
 807 };
 808
 809 /* A function_base for svwhile* functions.  */
 810 class while_comparison : public function_base
 811 {
 812 public:
 813   CONSTEXPR while_comparison (int unspec_for_sint, int unspec_for_uint)
 814     : m_unspec_for_sint (unspec_for_sint),
 815       m_unspec_for_uint (unspec_for_uint)
 816   {}
 817
 818   rtx
 819   expand (function_expander &e) const override
 820   {
 821     /* Suffix 0 determines the predicate mode, suffix 1 determines the
 822        scalar mode and signedness.  */
 823     int unspec = (e.type_suffix (1).unsigned_p
 824                   ? m_unspec_for_uint
 825                   : m_unspec_for_sint);
 826     if (e.vectors_per_tuple () > 1)
 827       {
 828         auto bits = e.type_suffix (0).element_bits;
 829         auto icode = code_for_aarch64_sve_while_b_x2 (unspec, bits);
 830         return e.use_exact_insn (icode);
 831       }
 832     if (e.type_suffix (0).tclass == TYPE_count)
 833       {
 834         auto bits = e.type_suffix (0).element_bits;
 835         auto icode = code_for_aarch64_sve_while_c (unspec, bits);
 836         return e.use_exact_insn (icode);
 837       }
 838
 839     machine_mode pred_mode = e.vector_mode (0);
 840     scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1));
 841     return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode));
 842   }
 843
 844   /* The unspec codes associated with signed and unsigned operations
 845      respectively.  */
 846   int m_unspec_for_sint;
 847   int m_unspec_for_uint;
 848 };
 849
 850 }
 851
 852 /* Declare the global function base NAME, creating it from an instance
 853    of class CLASS with constructor arguments ARGS.  */
 854 #define FUNCTION(NAME, CLASS, ARGS) \
 855   namespace { static CONSTEXPR const CLASS NAME##_obj ARGS; } \
 856   namespace functions { const function_base *const NAME = &NAME##_obj; }
 857
 858 #define NEON_SVE_BRIDGE_FUNCTION(NAME, CLASS, ARGS) \
 859   namespace { static CONSTEXPR const CLASS NAME##_obj ARGS; } \
 860   namespace neon_sve_bridge_functions { const function_base *const NAME = &NAME##_obj; }
 861
 862 #endif