gcc/config/aarch64/aarch64-sve-builtins-base.cc

   1 /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
   2    Copyright (C) 2018-2025 Free Software Foundation, Inc.
   3
   4    This file is part of GCC.
   5
   6    GCC is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GCC is distributed in the hope that it will be useful, but
  12    WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GCC; see the file COPYING3.  If not see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tm.h"
  24 #include "tree.h"
  25 #include "rtl.h"
  26 #include "tm_p.h"
  27 #include "memmodel.h"
  28 #include "insn-codes.h"
  29 #include "optabs.h"
  30 #include "recog.h"
  31 #include "expr.h"
  32 #include "basic-block.h"
  33 #include "function.h"
  34 #include "fold-const.h"
  35 #include "gimple.h"
  36 #include "gimple-iterator.h"
  37 #include "gimplify.h"
  38 #include "explow.h"
  39 #include "emit-rtl.h"
  40 #include "tree-vector-builder.h"
  41 #include "rtx-vector-builder.h"
  42 #include "vec-perm-indices.h"
  43 #include "aarch64-sve-builtins.h"
  44 #include "aarch64-sve-builtins-shapes.h"
  45 #include "aarch64-sve-builtins-base.h"
  46 #include "aarch64-sve-builtins-functions.h"
  47 #include "aarch64-builtins.h"
  48 #include "ssa.h"
  49 #include "gimple-fold.h"
  50 #include "tree-ssa.h"
  51
  52 using namespace aarch64_sve;
  53
  54 namespace {
  55
  56 /* Return true if VAL is an undefined value.  */
  57 static bool
  58 is_undef (tree val)
  59 {
  60   if (TREE_CODE (val) == SSA_NAME)
  61     {
  62       if (ssa_undefined_value_p (val, false))
  63         return true;
  64
  65       gimple *def = SSA_NAME_DEF_STMT (val);
  66       if (gcall *call = dyn_cast<gcall *> (def))
  67         if (tree fndecl = gimple_call_fndecl (call))
  68           if (const function_instance *instance = lookup_fndecl (fndecl))
  69             if (instance->base == functions::svundef)
  70               return true;
  71     }
  72   return false;
  73 }
  74
  75 /* Return the UNSPEC_CMLA* unspec for rotation amount ROT.  */
  76 static int
  77 unspec_cmla (int rot)
  78 {
  79   switch (rot)
  80     {
  81     case 0: return UNSPEC_CMLA;
  82     case 90: return UNSPEC_CMLA90;
  83     case 180: return UNSPEC_CMLA180;
  84     case 270: return UNSPEC_CMLA270;
  85     default: gcc_unreachable ();
  86     }
  87 }
  88
  89 /* Return the UNSPEC_FCMLA* unspec for rotation amount ROT.  */
  90 static int
  91 unspec_fcmla (int rot)
  92 {
  93   switch (rot)
  94     {
  95     case 0: return UNSPEC_FCMLA;
  96     case 90: return UNSPEC_FCMLA90;
  97     case 180: return UNSPEC_FCMLA180;
  98     case 270: return UNSPEC_FCMLA270;
  99     default: gcc_unreachable ();
 100     }
 101 }
 102
 103 /* Return the UNSPEC_COND_FCMLA* unspec for rotation amount ROT.  */
 104 static int
 105 unspec_cond_fcmla (int rot)
 106 {
 107   switch (rot)
 108     {
 109     case 0: return UNSPEC_COND_FCMLA;
 110     case 90: return UNSPEC_COND_FCMLA90;
 111     case 180: return UNSPEC_COND_FCMLA180;
 112     case 270: return UNSPEC_COND_FCMLA270;
 113     default: gcc_unreachable ();
 114     }
 115 }
 116
 117 /* Expand a call to svmad, or svmla after reordering its operands.
 118    Make _m forms merge with argument MERGE_ARGNO.  */
 119 static rtx
 120 expand_mad (function_expander &e,
 121             unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
 122 {
 123   if (e.pred == PRED_x)
 124     {
 125       insn_code icode;
 126       if (e.type_suffix (0).integer_p)
 127         icode = code_for_aarch64_pred_fma (e.vector_mode (0));
 128       else
 129         icode = code_for_aarch64_pred (UNSPEC_COND_FMLA, e.vector_mode (0));
 130       return e.use_pred_x_insn (icode);
 131     }
 132
 133   insn_code icode = e.direct_optab_handler (cond_fma_optab);
 134   return e.use_cond_insn (icode, merge_argno);
 135 }
 136
 137 /* Expand a call to svmla_lane or svmls_lane using floating-point unspec
 138    UNSPEC.  */
 139 static rtx
 140 expand_mla_mls_lane (function_expander &e, int unspec)
 141 {
 142   /* Put the operands in the normal (fma ...) order, with the accumulator
 143      last.  This fits naturally since that's also the unprinted operand
 144      in the asm output.  */
 145   e.rotate_inputs_left (0, 4);
 146   insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0));
 147   return e.use_exact_insn (icode);
 148 }
 149
 150 /* Expand a call to svmsb, or svmls after reordering its operands.
 151    Make _m forms merge with argument MERGE_ARGNO.  */
 152 static rtx
 153 expand_msb (function_expander &e,
 154             unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
 155 {
 156   if (e.pred == PRED_x)
 157     {
 158       insn_code icode;
 159       if (e.type_suffix (0).integer_p)
 160         icode = code_for_aarch64_pred_fnma (e.vector_mode (0));
 161       else
 162         icode = code_for_aarch64_pred (UNSPEC_COND_FMLS, e.vector_mode (0));
 163       return e.use_pred_x_insn (icode);
 164     }
 165
 166   insn_code icode = e.direct_optab_handler (cond_fnma_optab);
 167   return e.use_cond_insn (icode, merge_argno);
 168 }
 169
 170 class svabd_impl : public function_base
 171 {
 172 public:
 173   rtx
 174   expand (function_expander &e) const override
 175   {
 176     /* The integer operations are represented as the subtraction of the
 177        minimum from the maximum, with the signedness of the instruction
 178        keyed off the signedness of the maximum operation.  */
 179     rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX;
 180     insn_code icode;
 181     if (e.pred == PRED_x)
 182       {
 183         if (e.type_suffix (0).integer_p)
 184           icode = code_for_aarch64_pred_abd (max_code, e.vector_mode (0));
 185         else
 186           icode = code_for_aarch64_pred_abd (e.vector_mode (0));
 187         return e.use_pred_x_insn (icode);
 188       }
 189
 190     if (e.type_suffix (0).integer_p)
 191       icode = code_for_aarch64_cond_abd (max_code, e.vector_mode (0));
 192     else
 193       icode = code_for_aarch64_cond_abd (e.vector_mode (0));
 194     return e.use_cond_insn (icode);
 195   }
 196 };
 197
 198 /* Implements svacge, svacgt, svacle and svaclt.  */
 199 class svac_impl : public function_base
 200 {
 201 public:
 202   CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {}
 203
 204   gimple *
 205   fold (gimple_folder &f) const override
 206   {
 207     tree pg = gimple_call_arg (f.call, 0);
 208     if (is_pfalse (pg))
 209       return f.fold_call_to (pg);
 210     return NULL;
 211   }
 212
 213   rtx
 214   expand (function_expander &e) const override
 215   {
 216     e.add_ptrue_hint (0, e.gp_mode (0));
 217     insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0));
 218     return e.use_exact_insn (icode);
 219   }
 220
 221   /* The unspec code for the underlying comparison.  */
 222   int m_unspec;
 223 };
 224
 225 class svadda_impl : public function_base
 226 {
 227 public:
 228   gimple *
 229   fold (gimple_folder &f) const override
 230   {
 231     if (is_pfalse (gimple_call_arg (f.call, 0)))
 232       return f.fold_call_to (gimple_call_arg (f.call, 1));
 233     return NULL;
 234   }
 235
 236   rtx
 237   expand (function_expander &e) const override
 238   {
 239     /* Put the predicate last, as required by mask_fold_left_plus_optab.  */
 240     e.rotate_inputs_left (0, 3);
 241     machine_mode mode = e.vector_mode (0);
 242     insn_code icode = direct_optab_handler (mask_fold_left_plus_optab, mode);
 243     return e.use_exact_insn (icode);
 244   }
 245 };
 246
 247 class svaddv_impl : public reduction
 248 {
 249 public:
 250   CONSTEXPR svaddv_impl ()
 251     : reduction (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV) {}
 252
 253   gimple *
 254   fold (gimple_folder &f) const override
 255   {
 256     if (is_pfalse (gimple_call_arg (f.call, 0)))
 257       return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
 258     return NULL;
 259   }
 260 };
 261
 262 /* Implements svadr[bhwd].  */
 263 class svadr_bhwd_impl : public function_base
 264 {
 265 public:
 266   CONSTEXPR svadr_bhwd_impl (unsigned int shift) : m_shift (shift) {}
 267
 268   rtx
 269   expand (function_expander &e) const override
 270   {
 271     machine_mode mode = GET_MODE (e.args[0]);
 272     if (m_shift == 0)
 273       return e.use_exact_insn (code_for_aarch64_adr (mode));
 274
 275     /* Turn the access size into an extra shift argument.  */
 276     rtx shift = gen_int_mode (m_shift, GET_MODE_INNER (mode));
 277     e.args.quick_push (expand_vector_broadcast (mode, shift));
 278     return e.use_exact_insn (code_for_aarch64_adr_shift (mode));
 279   }
 280   /* How many bits left to shift the vector displacement.  */
 281   unsigned int m_shift;
 282 };
 283
 284
 285 class svandv_impl : public reduction
 286 {
 287 public:
 288   CONSTEXPR svandv_impl () : reduction (UNSPEC_ANDV) {}
 289
 290   gimple *
 291   fold (gimple_folder &f) const override
 292   {
 293     if (is_pfalse (gimple_call_arg (f.call, 0)))
 294       return f.fold_call_to (build_all_ones_cst (TREE_TYPE (f.lhs)));
 295     return NULL;
 296   }
 297 };
 298
 299 class svbic_impl : public function_base
 300 {
 301 public:
 302   rtx
 303   expand (function_expander &e) const override
 304   {
 305     /* Convert svbic of a constant into svand of its inverse.  */
 306     if (CONST_INT_P (e.args[2]))
 307       {
 308         machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
 309         e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode);
 310         return e.map_to_rtx_codes (AND, AND, -1, -1);
 311       }
 312
 313     if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
 314       {
 315         gcc_assert (e.pred == PRED_z);
 316         return e.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z);
 317       }
 318
 319     if (e.pred == PRED_x)
 320       return e.use_unpred_insn (e.direct_optab_handler (andn_optab));
 321
 322     return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0)));
 323   }
 324 };
 325
 326 /* Implements svbrkn, svbrkpa and svbrkpb.  */
 327 class svbrk_binary_impl : public function_base
 328 {
 329 public:
 330   CONSTEXPR svbrk_binary_impl (int unspec) : m_unspec (unspec) {}
 331
 332   rtx
 333   expand (function_expander &e) const override
 334   {
 335     return e.use_exact_insn (code_for_aarch64_brk (m_unspec));
 336   }
 337
 338   /* The unspec code associated with the operation.  */
 339   int m_unspec;
 340 };
 341
 342 /* Implements svbrka and svbrkb.  */
 343 class svbrk_unary_impl : public function_base
 344 {
 345 public:
 346   CONSTEXPR svbrk_unary_impl (int unspec) : m_unspec (unspec) {}
 347
 348   rtx
 349   expand (function_expander &e) const override
 350   {
 351     return e.use_cond_insn (code_for_aarch64_brk (m_unspec));
 352   }
 353
 354   /* The unspec code associated with the operation.  */
 355   int m_unspec;
 356 };
 357
 358 class svcadd_impl : public function_base
 359 {
 360 public:
 361   rtx
 362   expand (function_expander &e) const override
 363   {
 364     /* Convert the rotation amount into a specific unspec.  */
 365     int rot = INTVAL (e.args.pop ());
 366     if (rot == 90)
 367       return e.map_to_unspecs (UNSPEC_CADD90, UNSPEC_CADD90,
 368                                UNSPEC_COND_FCADD90);
 369     if (rot == 270)
 370       return e.map_to_unspecs (UNSPEC_CADD270, UNSPEC_CADD270,
 371                                UNSPEC_COND_FCADD270);
 372     gcc_unreachable ();
 373   }
 374 };
 375
 376 /* Implements svclasta and svclastb.  */
 377 class svclast_impl : public quiet<function_base>
 378 {
 379 public:
 380   CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {}
 381
 382   gimple *
 383   fold (gimple_folder &f) const override
 384   {
 385     if (is_pfalse (gimple_call_arg (f.call, 0)))
 386       return f.fold_call_to (gimple_call_arg (f.call, 1));
 387     return NULL;
 388   }
 389
 390   rtx
 391   expand (function_expander &e) const override
 392   {
 393     /* Match the fold_extract_optab order.  */
 394     std::swap (e.args[0], e.args[1]);
 395     machine_mode mode = e.vector_mode (0);
 396     insn_code icode;
 397     if (e.mode_suffix_id == MODE_n)
 398       icode = code_for_fold_extract (m_unspec, mode);
 399     else
 400       icode = code_for_aarch64_fold_extract_vector (m_unspec, mode);
 401     return e.use_exact_insn (icode);
 402   }
 403
 404   /* The unspec code associated with the operation.  */
 405   int m_unspec;
 406 };
 407
 408 class svcmla_impl : public function_base
 409 {
 410 public:
 411   rtx
 412   expand (function_expander &e) const override
 413   {
 414     /* Convert the rotation amount into a specific unspec.  */
 415     int rot = INTVAL (e.args.pop ());
 416     if (e.type_suffix (0).float_p)
 417       {
 418         /* Make the operand order the same as the one used by the fma optabs,
 419            with the accumulator last.  */
 420         e.rotate_inputs_left (1, 4);
 421         return e.map_to_unspecs (-1, -1, unspec_cond_fcmla (rot), 3);
 422       }
 423     else
 424       {
 425         int cmla = unspec_cmla (rot);
 426         return e.map_to_unspecs (cmla, cmla, -1);
 427       }
 428   }
 429 };
 430
 431 class svcmla_lane_impl : public function_base
 432 {
 433 public:
 434   rtx
 435   expand (function_expander &e) const override
 436   {
 437     /* Convert the rotation amount into a specific unspec.  */
 438     int rot = INTVAL (e.args.pop ());
 439     machine_mode mode = e.vector_mode (0);
 440     if (e.type_suffix (0).float_p)
 441       {
 442         /* Make the operand order the same as the one used by the fma optabs,
 443            with the accumulator last.  */
 444         e.rotate_inputs_left (0, 4);
 445         insn_code icode = code_for_aarch64_lane (unspec_fcmla (rot), mode);
 446         return e.use_exact_insn (icode);
 447       }
 448     else
 449       {
 450         insn_code icode = code_for_aarch64_lane (unspec_cmla (rot), mode);
 451         return e.use_exact_insn (icode);
 452       }
 453   }
 454 };
 455
 456 /* Implements svcmp<cc> (except svcmpuo, which is handled separately).  */
 457 class svcmp_impl : public function_base
 458 {
 459 public:
 460   CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp)
 461     : m_code (code), m_unspec_for_fp (unspec_for_fp) {}
 462
 463   gimple *
 464   fold (gimple_folder &f) const override
 465   {
 466     tree pg = gimple_call_arg (f.call, 0);
 467     tree rhs1 = gimple_call_arg (f.call, 1);
 468     tree rhs2 = gimple_call_arg (f.call, 2);
 469
 470     /* Convert a ptrue-predicated integer comparison into the corresponding
 471        gimple-level operation.  */
 472     if (integer_all_onesp (pg)
 473         && f.type_suffix (0).element_bytes == 1
 474         && f.type_suffix (0).integer_p)
 475       {
 476         gimple_seq stmts = NULL;
 477         rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2);
 478         gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
 479         return gimple_build_assign (f.lhs, m_code, rhs1, rhs2);
 480       }
 481
 482     if (is_pfalse (pg))
 483       return f.fold_call_to (pg);
 484     return NULL;
 485   }
 486
 487   rtx
 488   expand (function_expander &e) const override
 489   {
 490     machine_mode mode = e.vector_mode (0);
 491
 492     /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
 493        operand.  */
 494     e.add_ptrue_hint (0, e.gp_mode (0));
 495
 496     if (e.type_suffix (0).integer_p)
 497       {
 498         bool unsigned_p = e.type_suffix (0).unsigned_p;
 499         rtx_code code = get_rtx_code (m_code, unsigned_p);
 500         return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode));
 501       }
 502
 503     insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode);
 504     return e.use_exact_insn (icode);
 505   }
 506
 507   /* The tree code associated with the comparison.  */
 508   tree_code m_code;
 509
 510   /* The unspec code to use for floating-point comparisons.  */
 511   int m_unspec_for_fp;
 512 };
 513
 514 /* Implements svcmp<cc>_wide.  */
 515 class svcmp_wide_impl : public function_base
 516 {
 517 public:
 518   CONSTEXPR svcmp_wide_impl (tree_code code, int unspec_for_sint,
 519                              int unspec_for_uint)
 520     : m_code (code), m_unspec_for_sint (unspec_for_sint),
 521       m_unspec_for_uint (unspec_for_uint) {}
 522
 523   gimple *
 524   fold (gimple_folder &f) const override
 525   {
 526     tree pg = gimple_call_arg (f.call, 0);
 527     if (is_pfalse (pg))
 528       return f.fold_call_to (pg);
 529     return NULL;
 530   }
 531
 532   rtx
 533   expand (function_expander &e) const override
 534   {
 535     machine_mode mode = e.vector_mode (0);
 536     bool unsigned_p = e.type_suffix (0).unsigned_p;
 537     rtx_code code = get_rtx_code (m_code, unsigned_p);
 538
 539     /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
 540        operand.  */
 541     e.add_ptrue_hint (0, e.gp_mode (0));
 542
 543     /* If the argument is a constant that the unwidened comparisons
 544        can handle directly, use them instead.  */
 545     insn_code icode = code_for_aarch64_pred_cmp (code, mode);
 546     rtx op2 = unwrap_const_vec_duplicate (e.args[3]);
 547     if (CONSTANT_P (op2)
 548         && insn_data[icode].operand[4].predicate (op2, DImode))
 549       {
 550         e.args[3] = op2;
 551         return e.use_exact_insn (icode);
 552       }
 553
 554     int unspec = (unsigned_p ? m_unspec_for_uint : m_unspec_for_sint);
 555     return e.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec, mode));
 556   }
 557
 558   /* The tree code associated with the comparison.  */
 559   tree_code m_code;
 560
 561   /* The unspec codes for signed and unsigned wide comparisons
 562      respectively.  */
 563   int m_unspec_for_sint;
 564   int m_unspec_for_uint;
 565 };
 566
 567 class svcmpuo_impl : public quiet<function_base>
 568 {
 569 public:
 570
 571   gimple *
 572   fold (gimple_folder &f) const override
 573   {
 574     tree pg = gimple_call_arg (f.call, 0);
 575     if (is_pfalse (pg))
 576       return f.fold_call_to (pg);
 577     return NULL;
 578   }
 579
 580   rtx
 581   expand (function_expander &e) const override
 582   {
 583     e.add_ptrue_hint (0, e.gp_mode (0));
 584     return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0)));
 585   }
 586 };
 587
 588 class svcnot_impl : public function_base
 589 {
 590 public:
 591   rtx
 592   expand (function_expander &e) const override
 593   {
 594     machine_mode mode = e.vector_mode (0);
 595     machine_mode pred_mode = e.gp_mode (0);
 596     /* The underlying _x pattern is effectively:
 597
 598          dst = src == 0 ? 1 : 0
 599
 600        rather than an UNSPEC_PRED_X.  Using this form allows autovec
 601        constructs to be matched by combine, but it means that the
 602        predicate on the src == 0 comparison must be all-true.
 603
 604        For simplicity, represent other _x operations as fully-defined _m
 605        operations rather than using a separate bespoke pattern.  */
 606     if (e.pred == PRED_x
 607         && gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode))
 608       return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode));
 609     return e.use_cond_insn (code_for_cond_cnot (mode),
 610                             e.pred == PRED_x ? 1 : 0);
 611   }
 612 };
 613
 614 /* Implements svcnt[bhwd], which count the number of elements
 615    in a particular vector mode.  */
 616 class svcnt_bhwd_impl : public function_base
 617 {
 618 public:
 619   CONSTEXPR svcnt_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {}
 620
 621   gimple *
 622   fold (gimple_folder &f) const override
 623   {
 624     return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode));
 625   }
 626
 627   rtx
 628   expand (function_expander &) const override
 629   {
 630     return gen_int_mode (GET_MODE_NUNITS (m_ref_mode), DImode);
 631   }
 632
 633   /* The mode of the vector associated with the [bhwd] suffix.  */
 634   machine_mode m_ref_mode;
 635 };
 636
 637 /* Implements svcnt[bhwd]_pat.  */
 638 class svcnt_bhwd_pat_impl : public svcnt_bhwd_impl
 639 {
 640 public:
 641   using svcnt_bhwd_impl::svcnt_bhwd_impl;
 642
 643   gimple *
 644   fold (gimple_folder &f) const override
 645   {
 646     tree pattern_arg = gimple_call_arg (f.call, 0);
 647     aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
 648
 649     if (pattern == AARCH64_SV_ALL)
 650       /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] ().  */
 651       return svcnt_bhwd_impl::fold (f);
 652
 653     /* See whether we can count the number of elements in the pattern
 654        at compile time.  */
 655     unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
 656     HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq);
 657     if (value >= 0)
 658       return f.fold_to_cstu (value);
 659
 660     return NULL;
 661   }
 662
 663   rtx
 664   expand (function_expander &e) const override
 665   {
 666     unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
 667     e.args.quick_push (gen_int_mode (elements_per_vq, DImode));
 668     e.args.quick_push (const1_rtx);
 669     return e.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat);
 670   }
 671 };
 672
 673 class svcntp_impl : public function_base
 674 {
 675 public:
 676
 677   gimple *
 678   fold (gimple_folder &f) const override
 679   {
 680     tree pg = gimple_call_arg (f.call, 0);
 681     if (is_pfalse (pg))
 682       return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
 683     return NULL;
 684   }
 685
 686   rtx
 687   expand (function_expander &e) const override
 688   {
 689     if (e.type_suffix (0).tclass == TYPE_count)
 690       {
 691         unsigned int bits = e.type_suffix (0).element_bits;
 692         return e.use_exact_insn (code_for_aarch64_sve_cntp_c (bits));
 693       }
 694
 695     machine_mode mode = e.vector_mode (0);
 696     e.add_ptrue_hint (0, mode);
 697     return e.use_exact_insn (code_for_aarch64_pred_cntp (mode));
 698   }
 699 };
 700
 701 class svcompact_impl
 702   : public QUIET_CODE_FOR_MODE0 (aarch64_sve_compact)
 703 {
 704 public:
 705   gimple *
 706   fold (gimple_folder &f) const override
 707   {
 708     if (is_pfalse (gimple_call_arg (f.call, 0)))
 709       return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
 710     return NULL;
 711   }
 712 };
 713
 714 /* Implements svcreate2, svcreate3 and svcreate4.  */
 715 class svcreate_impl : public quiet<multi_vector_function>
 716 {
 717 public:
 718   using quiet<multi_vector_function>::quiet;
 719
 720   gimple *
 721   fold (gimple_folder &f) const override
 722   {
 723     unsigned int nargs = gimple_call_num_args (f.call);
 724     tree lhs_type = TREE_TYPE (f.lhs);
 725
 726     /* Replace the call with a clobber of the result (to prevent it from
 727        becoming upwards exposed) followed by stores into each individual
 728        vector of tuple.
 729
 730        The fold routines expect the replacement statement to have the
 731        same lhs as the original call, so return the clobber statement
 732        rather than the final vector store.  */
 733     gassign *clobber = gimple_build_assign (f.lhs, build_clobber (lhs_type));
 734
 735     for (unsigned int i = nargs; i-- > 0; )
 736       {
 737         tree rhs_vector = gimple_call_arg (f.call, i);
 738         tree field = tuple_type_field (TREE_TYPE (f.lhs));
 739         tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
 740                                  unshare_expr (f.lhs), field, NULL_TREE);
 741         tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
 742                                   lhs_array, size_int (i),
 743                                   NULL_TREE, NULL_TREE);
 744         gassign *assign = gimple_build_assign (lhs_vector, rhs_vector);
 745         gsi_insert_after (f.gsi, assign, GSI_SAME_STMT);
 746       }
 747     return clobber;
 748   }
 749
 750   rtx
 751   expand (function_expander &e) const override
 752   {
 753     rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
 754
 755     /* Record that LHS_TUPLE is dead before the first store.  */
 756     emit_clobber (lhs_tuple);
 757     for (unsigned int i = 0; i < e.args.length (); ++i)
 758       {
 759         /* Use an lvalue subreg to refer to vector I in LHS_TUPLE.  */
 760         rtx lhs_vector = simplify_gen_subreg (GET_MODE (e.args[i]),
 761                                               lhs_tuple, GET_MODE (lhs_tuple),
 762                                               i * BYTES_PER_SVE_VECTOR);
 763         emit_move_insn (lhs_vector, e.args[i]);
 764       }
 765     return lhs_tuple;
 766   }
 767 };
 768
 769 class svcvt_impl : public function_base
 770 {
 771 public:
 772   rtx
 773   expand (function_expander &e) const override
 774   {
 775     insn_code icode;
 776     if (e.pred == PRED_none)
 777       {
 778         machine_mode mode0 = e.result_mode ();
 779         machine_mode mode1 = GET_MODE (e.args[0]);
 780         convert_optab optab;
 781         if (e.type_suffix (0).integer_p)
 782           optab = e.type_suffix (0).unsigned_p ? ufix_optab : sfix_optab;
 783         else if (e.type_suffix (1).integer_p)
 784           optab = e.type_suffix (1).unsigned_p ? ufloat_optab : sfloat_optab;
 785         else if (e.type_suffix (0).element_bits
 786                  < e.type_suffix (1).element_bits)
 787           optab = trunc_optab;
 788         else
 789           optab = sext_optab;
 790         icode = convert_optab_handler (optab, mode0, mode1);
 791         gcc_assert (icode != CODE_FOR_nothing);
 792         return e.use_exact_insn (icode);
 793       }
 794     machine_mode mode0 = e.vector_mode (0);
 795     machine_mode mode1 = e.vector_mode (1);
 796     /* All this complication comes from the need to select four things
 797        simultaneously:
 798
 799        (1) the kind of conversion (int<-float, float<-int, float<-float)
 800        (2) signed vs. unsigned integers, where relevant
 801        (3) the predication mode, which must be the wider of the predication
 802            modes for MODE0 and MODE1
 803        (4) the predication type (m, x or z)
 804
 805        The only supported int<->float conversions for which the integer is
 806        narrower than the float are SI<->DF.  It's therefore more convenient
 807        to handle (3) by defining two patterns for int<->float conversions:
 808        one in which the integer is at least as wide as the float and so
 809        determines the predication mode, and another single SI<->DF pattern
 810        in which the float's mode determines the predication mode (which is
 811        always VNx2BI in that case).
 812
 813        The names of the patterns follow the optab convention of giving
 814        the source mode before the destination mode.  */
 815     if (e.type_suffix (1).integer_p)
 816       {
 817         int unspec = (e.type_suffix (1).unsigned_p
 818                       ? UNSPEC_COND_UCVTF
 819                       : UNSPEC_COND_SCVTF);
 820         if (e.type_suffix (0).element_bytes <= e.type_suffix (1).element_bytes)
 821           icode = (e.pred == PRED_x
 822                    ? code_for_aarch64_sve_nonextend (unspec, mode1, mode0)
 823                    : code_for_cond_nonextend (unspec, mode1, mode0));
 824         else
 825           icode = (e.pred == PRED_x
 826                    ? code_for_aarch64_sve_extend (unspec, mode1, mode0)
 827                    : code_for_cond_extend (unspec, mode1, mode0));
 828       }
 829     else
 830       {
 831         int unspec = (!e.type_suffix (0).integer_p ? UNSPEC_COND_FCVT
 832                       : e.type_suffix (0).unsigned_p ? UNSPEC_COND_FCVTZU
 833                       : UNSPEC_COND_FCVTZS);
 834         if (e.type_suffix (0).element_bytes >= e.type_suffix (1).element_bytes)
 835           icode = (e.pred == PRED_x
 836                    ? code_for_aarch64_sve_nontrunc (unspec, mode1, mode0)
 837                    : code_for_cond_nontrunc (unspec, mode1, mode0));
 838         else
 839           icode = (e.pred == PRED_x
 840                    ? code_for_aarch64_sve_trunc (unspec, mode1, mode0)
 841                    : code_for_cond_trunc (unspec, mode1, mode0));
 842       }
 843
 844     if (e.pred == PRED_x)
 845       return e.use_pred_x_insn (icode);
 846     return e.use_cond_insn (icode);
 847   }
 848 };
 849
 850 class svcvtnt_impl : public CODE_FOR_MODE0 (aarch64_sve_cvtnt)
 851 {
 852 public:
 853   gimple *
 854   fold (gimple_folder &f) const override
 855   {
 856     if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1)))
 857       f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
 858     return NULL;
 859   }
 860 };
 861
 862 class svdiv_impl : public rtx_code_function
 863 {
 864 public:
 865   CONSTEXPR svdiv_impl ()
 866     : rtx_code_function (DIV, UDIV, UNSPEC_COND_FDIV) {}
 867
 868   gimple *
 869   fold (gimple_folder &f) const override
 870   {
 871     if (auto *res = f.fold_const_binary (TRUNC_DIV_EXPR))
 872       return res;
 873
 874     /* If the divisor is all ones, fold to dividend.  */
 875     tree op1 = gimple_call_arg (f.call, 1);
 876     tree op2 = gimple_call_arg (f.call, 2);
 877     if (integer_onep (op2))
 878       return f.fold_active_lanes_to (op1);
 879
 880     /* If one of the operands is all zeros, fold to zero vector.  */
 881     if (integer_zerop (op1) || integer_zerop (op2))
 882       return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs)));
 883
 884     /* If the divisor is all integer -1, fold to svneg.  */
 885     tree pg = gimple_call_arg (f.call, 0);
 886     if (!f.type_suffix (0).unsigned_p && integer_minus_onep (op2))
 887       {
 888         function_instance instance ("svneg", functions::svneg, shapes::unary,
 889                                     MODE_none, f.type_suffix_ids, GROUP_none,
 890                                     f.pred, FPM_unused);
 891         gcall *call = f.redirect_call (instance);
 892         unsigned offset_index = 0;
 893         if (f.pred == PRED_m)
 894           {
 895             offset_index = 1;
 896             gimple_call_set_arg (call, 0, op1);
 897           }
 898         else
 899           gimple_set_num_ops (call, 5);
 900         gimple_call_set_arg (call, offset_index, pg);
 901         gimple_call_set_arg (call, offset_index + 1, op1);
 902         return call;
 903       }
 904
 905     /* If the divisor is a uniform power of 2, fold to a shift
 906        instruction.  */
 907     tree op2_cst = uniform_integer_cst_p (op2);
 908     if (!op2_cst || !integer_pow2p (op2_cst))
 909       return NULL;
 910
 911     tree new_divisor;
 912     gcall *call;
 913
 914     if (f.type_suffix (0).unsigned_p && tree_to_uhwi (op2_cst) != 1)
 915       {
 916         function_instance instance ("svlsr", functions::svlsr,
 917                                     shapes::binary_uint_opt_n, MODE_n,
 918                                     f.type_suffix_ids, GROUP_none, f.pred,
 919                                     FPM_unused);
 920         call = f.redirect_call (instance);
 921         tree d = INTEGRAL_TYPE_P (TREE_TYPE (op2)) ? op2 : op2_cst;
 922         new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d));
 923       }
 924     else
 925       {
 926         if (tree_int_cst_sign_bit (op2_cst)
 927             || tree_to_shwi (op2_cst) == 1)
 928           return NULL;
 929
 930         function_instance instance ("svasrd", functions::svasrd,
 931                                     shapes::shift_right_imm, MODE_n,
 932                                     f.type_suffix_ids, GROUP_none, f.pred,
 933                                     FPM_unused);
 934         call = f.redirect_call (instance);
 935         new_divisor = wide_int_to_tree (scalar_types[VECTOR_TYPE_svuint64_t],
 936                                         tree_log2 (op2_cst));
 937       }
 938
 939     gimple_call_set_arg (call, 2, new_divisor);
 940     return call;
 941   }
 942 };
 943
 944
 945 class svdot_impl : public function_base
 946 {
 947 public:
 948   rtx
 949   expand (function_expander &e) const override
 950   {
 951     insn_code icode;
 952     if (e.fpm_mode == aarch64_sve::FPM_set)
 953       icode = code_for_aarch64_sve_dot (e.result_mode ());
 954     else
 955       {
 956         /* In the optab, the multiplication operands come before the accumulator
 957            operand.  The optab is keyed off the multiplication mode.  */
 958         e.rotate_inputs_left (0, 3);
 959         if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES)
 960           icode = e.convert_optab_handler_for_sign (sdot_prod_optab,
 961                                                     udot_prod_optab,
 962                                                     0, e.result_mode (),
 963                                                     GET_MODE (e.args[0]));
 964         else
 965           icode = (e.type_suffix (0).float_p
 966                    ? CODE_FOR_aarch64_sve_fdotvnx4sfvnx8hf
 967                    : e.type_suffix (0).unsigned_p
 968                    ? CODE_FOR_udot_prodvnx4sivnx8hi
 969                    : CODE_FOR_sdot_prodvnx4sivnx8hi);
 970       }
 971     return e.use_unpred_insn (icode);
 972   }
 973 };
 974
 975 class svdotprod_lane_impl : public unspec_based_function_base
 976 {
 977 public:
 978   using unspec_based_function_base::unspec_based_function_base;
 979
 980   rtx
 981   expand (function_expander &e) const override
 982   {
 983     insn_code icode;
 984     machine_mode mode0 = GET_MODE (e.args[0]);
 985     machine_mode mode1 = GET_MODE (e.args[1]);
 986     if (e.fpm_mode == aarch64_sve::FPM_set)
 987       {
 988         icode = code_for_aarch64_sve_dot_lane (mode0);
 989       }
 990     else
 991       {
 992         /* Use the same ordering as the dot_prod_optab, with the
 993            accumulator last.  */
 994         e.rotate_inputs_left (0, 4);
 995         int unspec = unspec_for (e);
 996         if (unspec == UNSPEC_FDOT)
 997           icode = CODE_FOR_aarch64_fdot_prod_lanevnx4sfvnx8hf;
 998         else
 999           icode = code_for_aarch64_dot_prod_lane (unspec, mode0, mode1);
1000       }
1001     return e.use_exact_insn (icode);
1002   }
1003 };
1004
1005 class svdup_impl : public quiet<function_base>
1006 {
1007 public:
1008   gimple *
1009   fold (gimple_folder &f) const override
1010   {
1011     tree vec_type = TREE_TYPE (f.lhs);
1012     tree rhs = gimple_call_arg (f.call, f.pred == PRED_none ? 0 : 1);
1013
1014     if (f.pred == PRED_none || f.pred == PRED_x)
1015       {
1016         if (CONSTANT_CLASS_P (rhs))
1017           {
1018             if (f.type_suffix (0).bool_p)
1019               return (tree_to_shwi (rhs)
1020                       ? f.fold_to_ptrue ()
1021                       : f.fold_to_pfalse ());
1022
1023             tree rhs_vector = build_vector_from_val (vec_type, rhs);
1024             return gimple_build_assign (f.lhs, rhs_vector);
1025           }
1026
1027         /* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we
1028            would need to introduce an extra and unwanted conversion to
1029            the truth vector element type.  */
1030         if (!f.type_suffix (0).bool_p)
1031           return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs);
1032       }
1033
1034     /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>.  */
1035     if (f.pred == PRED_z)
1036       {
1037         gimple_seq stmts = NULL;
1038         tree pred = f.convert_pred (stmts, vec_type, 0);
1039         rhs = f.force_vector (stmts, vec_type, rhs);
1040         gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
1041         return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs,
1042                                     build_zero_cst (vec_type));
1043       }
1044
1045     return NULL;
1046   }
1047
1048   rtx
1049   expand (function_expander &e) const override
1050   {
1051     if (e.pred == PRED_none || e.pred == PRED_x)
1052       /* There's no benefit to using predicated instructions for _x here.  */
1053       return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab));
1054
1055     /* Model predicated svdups as a SEL in which the "true" value is
1056        the duplicate of the function argument and the "false" value
1057        is the value of inactive lanes.  */
1058     insn_code icode;
1059     machine_mode mode = e.vector_mode (0);
1060     if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ()))
1061       /* Duplicate the constant to fill a vector.  The pattern optimizes
1062          various cases involving constant operands, falling back to SEL
1063          if necessary.  */
1064       icode = code_for_vcond_mask (mode, mode);
1065     else
1066       /* Use the pattern for selecting between a duplicated scalar
1067          variable and a vector fallback.  */
1068       icode = code_for_aarch64_sel_dup (mode);
1069     return e.use_vcond_mask_insn (icode);
1070   }
1071 };
1072
1073 class svdup_lane_impl : public quiet<function_base>
1074 {
1075 public:
1076   rtx
1077   expand (function_expander &e) const override
1078   {
1079     /* The native DUP lane has an index range of 64 bytes.  */
1080     machine_mode mode = e.vector_mode (0);
1081     if (CONST_INT_P (e.args[1])
1082         && IN_RANGE (INTVAL (e.args[1]) * GET_MODE_UNIT_SIZE (mode), 0, 63))
1083       return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode));
1084
1085     /* Treat svdup_lane as if it were svtbl_n.  */
1086     return e.use_exact_insn (code_for_aarch64_sve (UNSPEC_TBL,
1087                                                    e.vector_mode (0)));
1088   }
1089 };
1090
1091 class svdupq_impl : public quiet<function_base>
1092 {
1093 private:
1094   gimple *
1095   fold_nonconst_dupq (gimple_folder &f) const
1096   {
1097     /* Lower lhs = svdupq (arg0, arg1, ..., argN} into:
1098        tmp = {arg0, arg1, ..., arg<N-1>}
1099        lhs = VEC_PERM_EXPR (tmp, tmp, {0, 1, 2, N-1, ...})  */
1100
1101     if (f.type_suffix (0).bool_p
1102         || BYTES_BIG_ENDIAN)
1103       return NULL;
1104
1105     tree lhs = gimple_call_lhs (f.call);
1106     tree lhs_type = TREE_TYPE (lhs);
1107     tree elt_type = TREE_TYPE (lhs_type);
1108     scalar_mode elt_mode = SCALAR_TYPE_MODE (elt_type);
1109     machine_mode vq_mode = aarch64_v128_mode (elt_mode).require ();
1110     tree vq_type = build_vector_type_for_mode (elt_type, vq_mode);
1111
1112     unsigned nargs = gimple_call_num_args (f.call);
1113     vec<constructor_elt, va_gc> *v;
1114     vec_alloc (v, nargs);
1115     for (unsigned i = 0; i < nargs; i++)
1116       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, gimple_call_arg (f.call, i));
1117     tree vec = build_constructor (vq_type, v);
1118     tree tmp = make_ssa_name_fn (cfun, vq_type, 0);
1119     gimple *g = gimple_build_assign (tmp, vec);
1120
1121     gimple_seq stmts = NULL;
1122     gimple_seq_add_stmt_without_update (&stmts, g);
1123
1124     poly_uint64 lhs_len = TYPE_VECTOR_SUBPARTS (lhs_type);
1125     vec_perm_builder sel (lhs_len, nargs, 1);
1126     for (unsigned i = 0; i < nargs; i++)
1127       sel.quick_push (i);
1128
1129     vec_perm_indices indices (sel, 1, nargs);
1130     tree mask_type = build_vector_type (ssizetype, lhs_len);
1131     tree mask = vec_perm_indices_to_tree (mask_type, indices);
1132
1133     gimple *g2 = gimple_build_assign (lhs, VEC_PERM_EXPR, tmp, tmp, mask);
1134     gimple_seq_add_stmt_without_update (&stmts, g2);
1135     gsi_replace_with_seq (f.gsi, stmts, false);
1136     return g2;
1137   }
1138
1139 public:
1140   gimple *
1141   fold (gimple_folder &f) const override
1142   {
1143     tree vec_type = TREE_TYPE (f.lhs);
1144     unsigned int nargs = gimple_call_num_args (f.call);
1145     /* For predicates, pad out each argument so that we have one element
1146        per bit.  */
1147     unsigned int factor = (f.type_suffix (0).bool_p
1148                            ? f.type_suffix (0).element_bytes : 1);
1149     tree_vector_builder builder (vec_type, nargs * factor, 1);
1150     for (unsigned int i = 0; i < nargs; ++i)
1151       {
1152         tree elt = gimple_call_arg (f.call, i);
1153         if (!CONSTANT_CLASS_P (elt))
1154           return fold_nonconst_dupq (f);
1155         builder.quick_push (elt);
1156         for (unsigned int j = 1; j < factor; ++j)
1157           builder.quick_push (build_zero_cst (TREE_TYPE (vec_type)));
1158       }
1159     return gimple_build_assign (f.lhs, builder.build ());
1160   }
1161
1162   rtx
1163   expand (function_expander &e) const override
1164   {
1165     machine_mode mode = e.vector_mode (0);
1166     unsigned int elements_per_vq = e.args.length ();
1167     if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
1168       {
1169         /* Construct a vector of integers so that we can compare them against
1170            zero below.  Zero vs. nonzero is the only distinction that
1171            matters.  */
1172         mode = aarch64_sve_int_mode (mode);
1173         for (unsigned int i = 0; i < elements_per_vq; ++i)
1174           e.args[i] = simplify_gen_unary (ZERO_EXTEND, GET_MODE_INNER (mode),
1175                                           e.args[i], QImode);
1176       }
1177
1178     /* Get the 128-bit Advanced SIMD vector for this data size.  */
1179     scalar_mode element_mode = GET_MODE_INNER (mode);
1180     machine_mode vq_mode = aarch64_v128_mode (element_mode).require ();
1181     gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode)));
1182
1183     /* Put the arguments into a 128-bit Advanced SIMD vector.  We want
1184        argument N to go into architectural lane N, whereas Advanced SIMD
1185        vectors are loaded memory lsb to register lsb.  We therefore need
1186        to reverse the elements for big-endian targets.  */
1187     rtx vq_reg = gen_reg_rtx (vq_mode);
1188     rtvec vec = rtvec_alloc (elements_per_vq);
1189     for (unsigned int i = 0; i < elements_per_vq; ++i)
1190       {
1191         unsigned int argno = BYTES_BIG_ENDIAN ? elements_per_vq - i - 1 : i;
1192         RTVEC_ELT (vec, i) = e.args[argno];
1193       }
1194     aarch64_expand_vector_init (vq_reg, gen_rtx_PARALLEL (vq_mode, vec));
1195
1196     /* If the result is a boolean, compare the data vector against zero.  */
1197     if (mode != e.vector_mode (0))
1198       {
1199         rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg);
1200         return aarch64_convert_sve_data_to_pred (e.possible_target,
1201                                                  e.vector_mode (0), data_dupq);
1202       }
1203
1204     return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg);
1205   }
1206 };
1207
1208 class svdupq_lane_impl : public quiet<function_base>
1209 {
1210 public:
1211   rtx
1212   expand (function_expander &e) const override
1213   {
1214     machine_mode mode = e.vector_mode (0);
1215     rtx index = e.args[1];
1216     if (CONST_INT_P (index) && IN_RANGE (INTVAL (index), 0, 3))
1217       {
1218         /* Use the .Q form of DUP, which is the native instruction for
1219            this function.  */
1220         insn_code icode = code_for_aarch64_sve_dupq_lane (mode);
1221         unsigned int num_indices = e.elements_per_vq (0);
1222         rtx indices = aarch64_gen_stepped_int_parallel
1223           (num_indices, INTVAL (index) * num_indices, 1);
1224
1225         e.add_output_operand (icode);
1226         e.add_input_operand (icode, e.args[0]);
1227         e.add_fixed_operand (indices);
1228         return e.generate_insn (icode);
1229       }
1230
1231     /* Build a .D TBL index for the pairs of doublewords that we want to
1232        duplicate.  */
1233     if (CONST_INT_P (index))
1234       {
1235         /* The index vector is a constant.  */
1236         rtx_vector_builder builder (VNx2DImode, 2, 1);
1237         builder.quick_push (gen_int_mode (INTVAL (index) * 2, DImode));
1238         builder.quick_push (gen_int_mode (INTVAL (index) * 2 + 1, DImode));
1239         index = builder.build ();
1240       }
1241     else
1242       {
1243         /* Duplicate INDEX * 2 to fill a DImode vector.  The ACLE spec
1244            explicitly allows the top of the index to be dropped.  */
1245         index = force_reg (DImode, simplify_gen_binary (ASHIFT, DImode,
1246                                                         index, const1_rtx));
1247         index = expand_vector_broadcast (VNx2DImode, index);
1248
1249         /* Get an alternating 0, 1 predicate.  */
1250         rtx_vector_builder builder (VNx2BImode, 2, 1);
1251         builder.quick_push (const0_rtx);
1252         builder.quick_push (constm1_rtx);
1253         rtx pg = force_reg (VNx2BImode, builder.build ());
1254
1255         /* Add one to the odd elements of the index.  */
1256         rtx one = force_reg (VNx2DImode, CONST1_RTX (VNx2DImode));
1257         rtx target = gen_reg_rtx (VNx2DImode);
1258         emit_insn (gen_cond_addvnx2di (target, pg, index, one, index));
1259         index = target;
1260       }
1261
1262     e.args[0] = gen_lowpart (VNx2DImode, e.args[0]);
1263     e.args[1] = index;
1264     return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di);
1265   }
1266 };
1267
1268 class sveorv_impl : public reduction
1269 {
1270 public:
1271   CONSTEXPR sveorv_impl () : reduction (UNSPEC_XORV) {}
1272
1273   gimple *
1274   fold (gimple_folder &f) const override
1275   {
1276     if (is_pfalse (gimple_call_arg (f.call, 0)))
1277       return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
1278     return NULL;
1279   }
1280 };
1281
1282 /* Implements svextb, svexth and svextw.  */
1283 class svext_bhw_impl : public function_base
1284 {
1285 public:
1286   CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode)
1287     : m_from_mode (from_mode) {}
1288
1289   rtx
1290   expand (function_expander &e) const override
1291   {
1292     if (e.type_suffix (0).unsigned_p)
1293       {
1294         /* Convert to an AND.  The widest we go is 0xffffffff, which fits
1295            in a CONST_INT.  */
1296         e.args.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode)));
1297         if (e.pred == PRED_m)
1298           /* We now have arguments "(inactive, pg, op, mask)".  Convert this
1299              to "(pg, op, mask, inactive)" so that the order matches svand_m
1300              with an extra argument on the end.  Take the inactive elements
1301              from this extra argument.  */
1302           e.rotate_inputs_left (0, 4);
1303         return e.map_to_rtx_codes (AND, AND, -1, -1, 3);
1304       }
1305
1306     machine_mode wide_mode = e.vector_mode (0);
1307     poly_uint64 nunits = GET_MODE_NUNITS (wide_mode);
1308     machine_mode narrow_mode
1309       = aarch64_sve_data_mode (m_from_mode, nunits).require ();
1310     if (e.pred == PRED_x)
1311       {
1312         insn_code icode = code_for_aarch64_pred_sxt (wide_mode, narrow_mode);
1313         return e.use_pred_x_insn (icode);
1314       }
1315
1316     insn_code icode = code_for_aarch64_cond_sxt (wide_mode, narrow_mode);
1317     return e.use_cond_insn (icode);
1318   }
1319
1320   /* The element mode that we're extending from.  */
1321   scalar_int_mode m_from_mode;
1322 };
1323
1324 /* Implements svget2, svget3 and svget4.  */
1325 class svget_impl : public quiet<multi_vector_function>
1326 {
1327 public:
1328   using quiet<multi_vector_function>::quiet;
1329
1330   gimple *
1331   fold (gimple_folder &f) const override
1332   {
1333     /* Fold into a normal gimple component access.  */
1334     tree rhs_tuple = gimple_call_arg (f.call, 0);
1335     tree index = gimple_call_arg (f.call, 1);
1336     tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
1337     tree rhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
1338                              rhs_tuple, field, NULL_TREE);
1339     tree rhs_vector = build4 (ARRAY_REF, TREE_TYPE (f.lhs),
1340                               rhs_array, index, NULL_TREE, NULL_TREE);
1341     return gimple_build_assign (f.lhs, rhs_vector);
1342   }
1343
1344   rtx
1345   expand (function_expander &e) const override
1346   {
1347     /* Fold the access into a subreg rvalue.  */
1348     return force_subreg (e.vector_mode (0), e.args[0], GET_MODE (e.args[0]),
1349                          INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR);
1350   }
1351 };
1352
1353 class svget_neonq_impl : public function_base
1354 {
1355 public:
1356   gimple *
1357   fold (gimple_folder &f) const override
1358   {
1359     if (BYTES_BIG_ENDIAN)
1360       return NULL;
1361     tree rhs_sve_vector = gimple_call_arg (f.call, 0);
1362     tree rhs_vector = build3 (BIT_FIELD_REF, TREE_TYPE (f.lhs),
1363                              rhs_sve_vector, bitsize_int (128), bitsize_int (0));
1364     return gimple_build_assign (f.lhs, rhs_vector);
1365   }
1366
1367   rtx
1368   expand (function_expander &e) const override
1369   {
1370     if (BYTES_BIG_ENDIAN)
1371       {
1372         machine_mode mode = e.vector_mode (0);
1373         insn_code icode = code_for_aarch64_sve_get_neonq (mode);
1374         unsigned int nunits = 128 / GET_MODE_UNIT_BITSIZE (mode);
1375         rtx indices = aarch64_gen_stepped_int_parallel
1376           (nunits, nunits - 1, -1);
1377
1378         e.add_output_operand (icode);
1379         e.add_input_operand (icode, e.args[0]);
1380         e.add_fixed_operand (indices);
1381         return e.generate_insn (icode);
1382       }
1383     return force_subreg (e.result_mode (), e.args[0], GET_MODE (e.args[0]), 0);
1384   }
1385 };
1386
1387 class svset_neonq_impl : public function_base
1388 {
1389 public:
1390   rtx
1391   expand (function_expander &e) const override
1392   {
1393     machine_mode mode = e.vector_mode (0);
1394
1395     /* If the SVE argument is undefined, we just need to reinterpret the
1396        Advanced SIMD argument as an SVE vector.  */
1397     if (!BYTES_BIG_ENDIAN
1398         && is_undef (CALL_EXPR_ARG (e.call_expr, 0)))
1399       return force_subreg (mode, e.args[1], GET_MODE (e.args[1]), 0);
1400
1401     rtx_vector_builder builder (VNx16BImode, 16, 2);
1402     for (unsigned int i = 0; i < 16; i++)
1403       builder.quick_push (CONST1_RTX (BImode));
1404     for (unsigned int i = 0; i < 16; i++)
1405       builder.quick_push (CONST0_RTX (BImode));
1406     e.args.quick_push (builder.build ());
1407     if (BYTES_BIG_ENDIAN)
1408       return e.use_exact_insn (code_for_aarch64_sve_set_neonq (mode));
1409     insn_code icode = code_for_vcond_mask (mode, mode);
1410     e.args[1] = force_lowpart_subreg (mode, e.args[1], GET_MODE (e.args[1]));
1411     e.add_output_operand (icode);
1412     e.add_input_operand (icode, e.args[1]);
1413     e.add_input_operand (icode, e.args[0]);
1414     e.add_input_operand (icode, e.args[2]);
1415     return e.generate_insn (icode);
1416   }
1417 };
1418
1419 class svdup_neonq_impl : public function_base
1420 {
1421 public:
1422   gimple *
1423   fold (gimple_folder &f) const override
1424   {
1425     if (BYTES_BIG_ENDIAN)
1426       return NULL;
1427     tree rhs_vector = gimple_call_arg (f.call, 0);
1428     unsigned HOST_WIDE_INT neon_nelts
1429       = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs_vector)).to_constant ();
1430     poly_uint64 sve_nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
1431     vec_perm_builder builder (sve_nelts, neon_nelts, 1);
1432     for (unsigned int i = 0; i < neon_nelts; i++)
1433       builder.quick_push (i);
1434     vec_perm_indices indices (builder, 1, neon_nelts);
1435     tree perm_type = build_vector_type (ssizetype, sve_nelts);
1436     return gimple_build_assign (f.lhs, VEC_PERM_EXPR,
1437                                 rhs_vector,
1438                                 rhs_vector,
1439                                 vec_perm_indices_to_tree (perm_type, indices));
1440   }
1441
1442   rtx
1443   expand (function_expander &e) const override
1444   {
1445     machine_mode mode = e.vector_mode (0);
1446     if (BYTES_BIG_ENDIAN)
1447       {
1448         insn_code icode = code_for_aarch64_vec_duplicate_vq_be (mode);
1449         unsigned int nunits = 128 / GET_MODE_UNIT_BITSIZE (mode);
1450         rtx indices = aarch64_gen_stepped_int_parallel
1451           (nunits, nunits - 1, -1);
1452
1453         e.add_output_operand (icode);
1454         e.add_input_operand (icode, e.args[0]);
1455         e.add_fixed_operand (indices);
1456         return e.generate_insn (icode);
1457       }
1458     insn_code icode = code_for_aarch64_vec_duplicate_vq_le (mode);
1459     e.add_output_operand (icode);
1460     e.add_input_operand (icode, e.args[0]);
1461     return e.generate_insn (icode);
1462   }
1463 };
1464
1465 class svindex_impl : public function_base
1466 {
1467 public:
1468   gimple *
1469   fold (gimple_folder &f) const override
1470   {
1471     /* Apply constant folding if base and step are integer constants.  */
1472     tree vec_type = TREE_TYPE (f.lhs);
1473     tree base = gimple_call_arg (f.call, 0);
1474     tree step = gimple_call_arg (f.call, 1);
1475     if (TREE_CODE (base) != INTEGER_CST || TREE_CODE (step) != INTEGER_CST)
1476       return NULL;
1477     return gimple_build_assign (f.lhs,
1478                                 build_vec_series (vec_type, base, step));
1479   }
1480
1481 public:
1482   rtx
1483   expand (function_expander &e) const override
1484   {
1485     return e.use_exact_insn (e.direct_optab_handler (vec_series_optab));
1486   }
1487 };
1488
1489 class svinsr_impl : public quiet<function_base>
1490 {
1491 public:
1492   gimple *
1493   fold (gimple_folder &f) const override
1494   {
1495     gcall *new_call = gimple_build_call_internal (IFN_VEC_SHL_INSERT, 2,
1496                                                   gimple_call_arg (f.call, 0),
1497                                                   gimple_call_arg (f.call, 1));
1498     gimple_call_set_lhs (new_call, f.lhs);
1499     return new_call;
1500   }
1501
1502   rtx
1503   expand (function_expander &e) const override
1504   {
1505     insn_code icode = direct_optab_handler (vec_shl_insert_optab,
1506                                             e.vector_mode (0));
1507     return e.use_exact_insn (icode);
1508   }
1509 };
1510
1511 /* Implements svlasta and svlastb.  */
1512 class svlast_impl : public quiet<function_base>
1513 {
1514 public:
1515   CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {}
1516
1517   bool is_lasta () const { return m_unspec == UNSPEC_LASTA; }
1518   bool is_lastb () const { return m_unspec == UNSPEC_LASTB; }
1519
1520   /* Fold a svlast{a/b} call with constant predicate to a BIT_FIELD_REF.
1521      BIT_FIELD_REF lowers to Advanced SIMD element extract, so we have to
1522      ensure the index of the element being accessed is in the range of a
1523      Advanced SIMD vector width.  */
1524   gimple *
1525   fold (gimple_folder & f) const override
1526   {
1527     tree pred = gimple_call_arg (f.call, 0);
1528     tree val = gimple_call_arg (f.call, 1);
1529
1530     if (TREE_CODE (pred) == VECTOR_CST)
1531       {
1532         HOST_WIDE_INT pos;
1533         int i = 0;
1534         int step = f.type_suffix (0).element_bytes;
1535         int step_1 = gcd (step, VECTOR_CST_NPATTERNS (pred));
1536         int npats = VECTOR_CST_NPATTERNS (pred);
1537         unsigned enelts = vector_cst_encoded_nelts (pred);
1538         tree b = NULL_TREE;
1539         unsigned HOST_WIDE_INT nelts;
1540
1541         /* We can optimize 2 cases common to variable and fixed-length cases
1542            without a linear search of the predicate vector:
1543            1.  LASTA if predicate is all true, return element 0.
1544            2.  LASTA if predicate all false, return element 0.  */
1545         if (is_lasta () && vector_cst_all_same (pred, step_1))
1546           {
1547             b = build3 (BIT_FIELD_REF, TREE_TYPE (f.lhs), val,
1548                         bitsize_int (step * BITS_PER_UNIT), bitsize_int (0));
1549             return gimple_build_assign (f.lhs, b);
1550           }
1551
1552         /* Handle the all-false case for LASTB where SVE VL == 128b -
1553            return the highest numbered element.  */
1554         if (is_lastb () && known_eq (BYTES_PER_SVE_VECTOR, 16)
1555             && vector_cst_all_same (pred, step_1)
1556             && integer_zerop (VECTOR_CST_ENCODED_ELT (pred, 0)))
1557           {
1558             b = build3 (BIT_FIELD_REF, TREE_TYPE (f.lhs), val,
1559                         bitsize_int (step * BITS_PER_UNIT),
1560                         bitsize_int ((16 - step) * BITS_PER_UNIT));
1561
1562             return gimple_build_assign (f.lhs, b);
1563           }
1564
1565         /* Determine if there are any repeating non-zero elements in variable
1566            length vectors.  */
1567         if (!VECTOR_CST_NELTS (pred).is_constant (&nelts))
1568           {
1569            /* If VECTOR_CST_NELTS_PER_PATTERN (pred) == 2 and every multiple of
1570               'step_1' in
1571                 [VECTOR_CST_NPATTERNS .. VECTOR_CST_ENCODED_NELTS - 1]
1572               is zero, then we can treat the vector as VECTOR_CST_NPATTERNS
1573               elements followed by all inactive elements.  */
1574             if (VECTOR_CST_NELTS_PER_PATTERN (pred) == 2)
1575               {
1576                 /* Restrict the scope of search to NPATS if vector is
1577                    variable-length for linear search later.  */
1578                 nelts = npats;
1579                 for (unsigned j = npats; j < enelts; j += step_1)
1580                   {
1581                     /* If there are active elements in the repeated pattern of a
1582                        variable-length vector, then return NULL as there is no
1583                        way to be sure statically if this falls within the
1584                        Advanced SIMD range.  */
1585                     if (!integer_zerop (VECTOR_CST_ENCODED_ELT (pred, j)))
1586                       return NULL;
1587                   }
1588               }
1589             else
1590               /* If we're here, it means that for NELTS_PER_PATTERN != 2, there
1591                  is a repeating non-zero element.  */
1592               return NULL;
1593           }
1594
1595         /* If we're here, it means either:
1596            1. The vector is variable-length and there's no active element in the
1597               repeated part of the pattern, or
1598            2. The vector is fixed-length.
1599
1600            Fall through to finding the last active element linearly for
1601            for all cases where the last active element is known to be
1602            within a statically-determinable range.  */
1603         i = MAX ((int)nelts - step, 0);
1604         for (; i >= 0; i -= step)
1605           if (!integer_zerop (VECTOR_CST_ELT (pred, i)))
1606             break;
1607
1608         if (is_lastb ())
1609           {
1610             /* For LASTB, the element is the last active element.  */
1611             pos = i;
1612           }
1613         else
1614           {
1615             /* For LASTA, the element is one after last active element.  */
1616             pos = i + step;
1617
1618             /* If last active element is
1619                last element, wrap-around and return first Advanced SIMD
1620                element.  */
1621             if (known_ge (pos, BYTES_PER_SVE_VECTOR))
1622               pos = 0;
1623           }
1624
1625         /* Out of Advanced SIMD range.  */
1626         if (pos < 0 || pos > 15)
1627           return NULL;
1628
1629         b = build3 (BIT_FIELD_REF, TREE_TYPE (f.lhs), val,
1630                     bitsize_int (step * BITS_PER_UNIT),
1631                     bitsize_int (pos * BITS_PER_UNIT));
1632
1633         return gimple_build_assign (f.lhs, b);
1634       }
1635     return NULL;
1636   }
1637
1638   rtx
1639   expand (function_expander &e) const override
1640   {
1641     return e.use_exact_insn (code_for_extract (m_unspec, e.vector_mode (0)));
1642   }
1643
1644   /* The unspec code associated with the operation.  */
1645   int m_unspec;
1646 };
1647
1648 class svld1_impl : public full_width_access
1649 {
1650 public:
1651   unsigned int
1652   call_properties (const function_instance &) const override
1653   {
1654     return CP_READ_MEMORY;
1655   }
1656
1657   gimple *
1658   fold (gimple_folder &f) const override
1659   {
1660     if (f.vectors_per_tuple () != 1)
1661       return nullptr;
1662
1663     tree vectype = f.vector_type (0);
1664
1665     /* Get the predicate and base pointer.  */
1666     gimple_seq stmts = NULL;
1667     tree pred = f.convert_pred (stmts, vectype, 0);
1668     tree base = f.fold_contiguous_base (stmts, vectype);
1669     tree els = build_zero_cst (vectype);
1670     gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
1671
1672     tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
1673     gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
1674                                                   base, cookie, pred, els);
1675     gimple_call_set_lhs (new_call, f.lhs);
1676     return new_call;
1677   }
1678
1679   rtx
1680   expand (function_expander &e) const override
1681   {
1682     insn_code icode;
1683     if (e.vectors_per_tuple () == 1)
1684       icode = convert_optab_handler (maskload_optab,
1685                                      e.vector_mode (0), e.gp_mode (0));
1686     else
1687       icode = code_for_aarch64 (UNSPEC_LD1_COUNT, e.tuple_mode (0));
1688     return e.use_contiguous_load_insn (icode, true);
1689   }
1690 };
1691
1692 /* Implements extending contiguous forms of svld1.  */
1693 class svld1_extend_impl : public extending_load
1694 {
1695 public:
1696   using extending_load::extending_load;
1697
1698   rtx
1699   expand (function_expander &e) const override
1700   {
1701     insn_code icode = code_for_aarch64_load (extend_rtx_code (),
1702                                              e.vector_mode (0),
1703                                              e.memory_vector_mode ());
1704     return e.use_contiguous_load_insn (icode, true);
1705   }
1706 };
1707
1708 class svld1_gather_impl : public full_width_access
1709 {
1710 public:
1711   unsigned int
1712   call_properties (const function_instance &) const override
1713   {
1714     return CP_READ_MEMORY;
1715   }
1716
1717   rtx
1718   expand (function_expander &e) const override
1719   {
1720     e.prepare_gather_address_operands (1);
1721     /* Put the predicate last, as required by mask_gather_load_optab.  */
1722     e.rotate_inputs_left (0, 5);
1723     /* Add the else operand.  */
1724     e.args.quick_push (CONST0_RTX (e.vector_mode (0)));
1725     machine_mode mem_mode = e.memory_vector_mode ();
1726     machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
1727     insn_code icode = convert_optab_handler (mask_gather_load_optab,
1728                                              mem_mode, int_mode);
1729     return e.use_exact_insn (icode);
1730   }
1731 };
1732
1733 /* Implements extending forms of svld1_gather.  */
1734 class svld1_gather_extend_impl : public extending_load
1735 {
1736 public:
1737   using extending_load::extending_load;
1738
1739   rtx
1740   expand (function_expander &e) const override
1741   {
1742     e.prepare_gather_address_operands (1);
1743     /* Put the predicate last, since the extending gathers use the same
1744        operand order as mask_gather_load_optab.  */
1745     e.rotate_inputs_left (0, 5);
1746     /* Add a constant predicate for the extension rtx.  */
1747     e.args.quick_push (CONSTM1_RTX (VNx16BImode));
1748     /* Add the else operand.  */
1749     e.args.quick_push (CONST0_RTX (e.vector_mode (1)));
1750     insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (),
1751                                                     e.vector_mode (0),
1752                                                     e.memory_vector_mode ());
1753     return e.use_exact_insn (icode);
1754   }
1755 };
1756
1757 class load_replicate : public function_base
1758 {
1759 public:
1760   unsigned int
1761   call_properties (const function_instance &) const override
1762   {
1763     return CP_READ_MEMORY;
1764   }
1765
1766   tree
1767   memory_scalar_type (const function_instance &fi) const override
1768   {
1769     return fi.scalar_type (0);
1770   }
1771 };
1772
1773 class svld1rq_impl : public load_replicate
1774 {
1775 public:
1776   machine_mode
1777   memory_vector_mode (const function_instance &fi) const override
1778   {
1779     return aarch64_v128_mode (GET_MODE_INNER (fi.vector_mode (0))).require ();
1780   }
1781
1782   rtx
1783   expand (function_expander &e) const override
1784   {
1785     insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
1786     return e.use_contiguous_load_insn (icode);
1787   }
1788
1789   gimple *
1790   fold (gimple_folder &f) const override
1791   {
1792     tree arg0 = gimple_call_arg (f.call, 0);
1793     tree arg1 = gimple_call_arg (f.call, 1);
1794
1795     /* Transform:
1796        lhs = svld1rq ({-1, -1, ... }, arg1)
1797        into:
1798        tmp = mem_ref<vectype> [(elem * {ref-all}) arg1]
1799        lhs = vec_perm_expr<tmp, tmp, {0, 1, 2, 3, ...}>.
1800        on little endian target.
1801        vectype is the corresponding ADVSIMD type.  */
1802
1803     if (!BYTES_BIG_ENDIAN
1804         && integer_all_onesp (arg0)
1805         && !flag_non_call_exceptions)
1806       {
1807         tree lhs = gimple_call_lhs (f.call);
1808         tree lhs_type = TREE_TYPE (lhs);
1809         poly_uint64 lhs_len = TYPE_VECTOR_SUBPARTS (lhs_type);
1810         tree eltype = TREE_TYPE (lhs_type);
1811
1812         scalar_mode elmode = GET_MODE_INNER (TYPE_MODE (lhs_type));
1813         machine_mode vq_mode = aarch64_v128_mode (elmode).require ();
1814         tree vectype = build_vector_type_for_mode (eltype, vq_mode);
1815
1816         tree elt_ptr_type
1817           = build_pointer_type_for_mode (eltype, VOIDmode, true);
1818         tree zero = build_zero_cst (elt_ptr_type);
1819
1820         /* Use element type alignment.  */
1821         tree access_type
1822           = build_aligned_type (vectype, TYPE_ALIGN (eltype));
1823
1824         tree mem_ref_lhs = make_ssa_name_fn (cfun, access_type, 0);
1825         tree mem_ref_op = fold_build2 (MEM_REF, access_type, arg1, zero);
1826         gimple *mem_ref_stmt
1827           = gimple_build_assign (mem_ref_lhs, mem_ref_op);
1828
1829         gimple_seq stmts = NULL;
1830         gimple_seq_add_stmt_without_update (&stmts, mem_ref_stmt);
1831
1832         int source_nelts = TYPE_VECTOR_SUBPARTS (access_type).to_constant ();
1833         vec_perm_builder sel (lhs_len, source_nelts, 1);
1834         for (int i = 0; i < source_nelts; i++)
1835           sel.quick_push (i);
1836
1837         vec_perm_indices indices (sel, 1, source_nelts);
1838         gcc_checking_assert (can_vec_perm_const_p (TYPE_MODE (lhs_type),
1839                                                    TYPE_MODE (access_type),
1840                                                    indices));
1841         tree mask_type = build_vector_type (ssizetype, lhs_len);
1842         tree mask = vec_perm_indices_to_tree (mask_type, indices);
1843         gimple *g2 = gimple_build_assign (lhs, VEC_PERM_EXPR,
1844                                           mem_ref_lhs, mem_ref_lhs, mask);
1845         gimple_seq_add_stmt_without_update (&stmts, g2);
1846         gsi_replace_with_seq_vops (f.gsi, stmts);
1847         return g2;
1848       }
1849
1850     return NULL;
1851   }
1852 };
1853
1854 class svld1ro_impl : public load_replicate
1855 {
1856 public:
1857   machine_mode
1858   memory_vector_mode (const function_instance &) const override
1859   {
1860     return OImode;
1861   }
1862
1863   rtx
1864   expand (function_expander &e) const override
1865   {
1866     insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0));
1867     return e.use_contiguous_load_insn (icode);
1868   }
1869 };
1870
1871 /* Implements svld2, svld3 and svld4.  */
1872 class svld234_impl : public full_width_access
1873 {
1874 public:
1875   using full_width_access::full_width_access;
1876
1877   unsigned int
1878   call_properties (const function_instance &) const override
1879   {
1880     return CP_READ_MEMORY;
1881   }
1882
1883   gimple *
1884   fold (gimple_folder &f) const override
1885   {
1886     tree tuple_type = TREE_TYPE (f.lhs);
1887     tree vectype = f.vector_type (0);
1888
1889     /* Get the predicate and base pointer.  */
1890     gimple_seq stmts = NULL;
1891     tree pred = f.convert_pred (stmts, vectype, 0);
1892     tree els = build_zero_cst (vectype);
1893     tree base = f.fold_contiguous_base (stmts, vectype);
1894     gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
1895
1896     /* Emit two statements: a clobber of the lhs, so that it isn't
1897        upwards exposed, and then the load itself.
1898
1899        The fold routines expect the replacement statement to have the
1900        same lhs as the original call, so return the clobber statement
1901        rather than the load.  */
1902     gimple *clobber = gimple_build_assign (f.lhs, build_clobber (tuple_type));
1903
1904     /* View the loaded data as an array of vectors.  */
1905     tree field = tuple_type_field (tuple_type);
1906     tree lhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field),
1907                              unshare_expr (f.lhs));
1908
1909     /* Emit the load itself.  */
1910     tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
1911     gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4,
1912                                                   base, cookie, pred, els);
1913     gimple_call_set_lhs (new_call, lhs_array);
1914     gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT);
1915
1916     return clobber;
1917   }
1918
1919   rtx
1920   expand (function_expander &e) const override
1921   {
1922     machine_mode tuple_mode = e.result_mode ();
1923     insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab,
1924                                              tuple_mode, e.vector_mode (0));
1925     return e.use_contiguous_load_insn (icode, true);
1926   }
1927 };
1928
1929 class svldff1_gather_impl : public full_width_access
1930 {
1931 public:
1932   unsigned int
1933   call_properties (const function_instance &) const override
1934   {
1935     return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
1936   }
1937
1938   rtx
1939   expand (function_expander &e) const override
1940   {
1941     /* See the block comment in aarch64-sve.md for details about the
1942        FFR handling.  */
1943     emit_insn (gen_aarch64_update_ffr_for_load ());
1944
1945     e.prepare_gather_address_operands (1);
1946     /* Put the predicate last, since ldff1_gather uses the same operand
1947        order as mask_gather_load_optab.  */
1948     e.rotate_inputs_left (0, 5);
1949     machine_mode mem_mode = e.memory_vector_mode ();
1950     return e.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode));
1951   }
1952 };
1953
1954 /* Implements extending forms of svldff1_gather.  */
1955 class svldff1_gather_extend : public extending_load
1956 {
1957 public:
1958   using extending_load::extending_load;
1959
1960   rtx
1961   expand (function_expander &e) const override
1962   {
1963     /* See the block comment in aarch64-sve.md for details about the
1964        FFR handling.  */
1965     emit_insn (gen_aarch64_update_ffr_for_load ());
1966
1967     e.prepare_gather_address_operands (1);
1968     /* Put the predicate last, since ldff1_gather uses the same operand
1969        order as mask_gather_load_optab.  */
1970     e.rotate_inputs_left (0, 5);
1971     /* Add a constant predicate for the extension rtx.  */
1972     e.args.quick_push (CONSTM1_RTX (VNx16BImode));
1973     insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (),
1974                                                      e.vector_mode (0),
1975                                                      e.memory_vector_mode ());
1976     return e.use_exact_insn (icode);
1977   }
1978 };
1979
1980 class svldnt1_impl : public full_width_access
1981 {
1982 public:
1983   unsigned int
1984   call_properties (const function_instance &) const override
1985   {
1986     return CP_READ_MEMORY;
1987   }
1988
1989   rtx
1990   expand (function_expander &e) const override
1991   {
1992     insn_code icode = (e.vectors_per_tuple () == 1
1993                        ? code_for_aarch64_ldnt1 (e.vector_mode (0))
1994                        : code_for_aarch64 (UNSPEC_LDNT1_COUNT,
1995                                            e.tuple_mode (0)));
1996     return e.use_contiguous_load_insn (icode, true);
1997   }
1998 };
1999
2000 /* Implements svldff1 and svldnf1.  */
2001 class svldxf1_impl : public full_width_access
2002 {
2003 public:
2004   CONSTEXPR svldxf1_impl (int unspec) : m_unspec (unspec) {}
2005
2006   unsigned int
2007   call_properties (const function_instance &) const override
2008   {
2009     return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
2010   }
2011
2012   rtx
2013   expand (function_expander &e) const override
2014   {
2015     /* See the block comment in aarch64-sve.md for details about the
2016        FFR handling.  */
2017     emit_insn (gen_aarch64_update_ffr_for_load ());
2018
2019     machine_mode mode = e.vector_mode (0);
2020     return e.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec, mode));
2021   }
2022
2023   /* The unspec associated with the load.  */
2024   int m_unspec;
2025 };
2026
2027 /* Implements extending contiguous forms of svldff1 and svldnf1.  */
2028 class svldxf1_extend_impl : public extending_load
2029 {
2030 public:
2031   CONSTEXPR svldxf1_extend_impl (type_suffix_index memory_type, int unspec)
2032     : extending_load (memory_type), m_unspec (unspec) {}
2033
2034   unsigned int
2035   call_properties (const function_instance &) const override
2036   {
2037     return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
2038   }
2039
2040   rtx
2041   expand (function_expander &e) const override
2042   {
2043     /* See the block comment in aarch64-sve.md for details about the
2044        FFR handling.  */
2045     emit_insn (gen_aarch64_update_ffr_for_load ());
2046
2047     insn_code icode = code_for_aarch64_ldf1 (m_unspec, extend_rtx_code (),
2048                                              e.vector_mode (0),
2049                                              e.memory_vector_mode ());
2050     return e.use_contiguous_load_insn (icode);
2051   }
2052
2053   /* The unspec associated with the load.  */
2054   int m_unspec;
2055 };
2056
2057 class svlen_impl : public quiet<function_base>
2058 {
2059 public:
2060   gimple *
2061   fold (gimple_folder &f) const override
2062   {
2063     /* The argument only exists for its type.  */
2064     tree rhs_type = TREE_TYPE (gimple_call_arg (f.call, 0));
2065     tree count = build_int_cstu (TREE_TYPE (f.lhs),
2066                                  TYPE_VECTOR_SUBPARTS (rhs_type));
2067     return gimple_build_assign (f.lhs, count);
2068   }
2069
2070   rtx
2071   expand (function_expander &e) const override
2072   {
2073     /* The argument only exists for its type.  */
2074     return gen_int_mode (GET_MODE_NUNITS (e.vector_mode (0)), DImode);
2075   }
2076 };
2077
2078 class svlsl_impl : public rtx_code_function
2079 {
2080 public:
2081   CONSTEXPR svlsl_impl ()
2082     : rtx_code_function (ASHIFT, ASHIFT) {}
2083
2084   gimple *
2085   fold (gimple_folder &f) const override
2086   {
2087     return f.fold_const_binary (LSHIFT_EXPR);
2088   }
2089 };
2090
2091 class svmad_impl : public function_base
2092 {
2093 public:
2094   rtx
2095   expand (function_expander &e) const override
2096   {
2097     return expand_mad (e);
2098   }
2099 };
2100
2101 class svminv_impl : public reduction
2102 {
2103 public:
2104   CONSTEXPR svminv_impl ()
2105     : reduction (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV) {}
2106
2107   gimple *
2108   fold (gimple_folder &f) const override
2109   {
2110     if (is_pfalse (gimple_call_arg (f.call, 0)))
2111       {
2112         tree rhs = f.type_suffix (0).integer_p
2113           ? TYPE_MAX_VALUE (TREE_TYPE (f.lhs))
2114           : build_real (TREE_TYPE (f.lhs), dconstinf);
2115         return f.fold_call_to (rhs);
2116       }
2117     return NULL;
2118   }
2119 };
2120
2121 class svmaxnmv_impl : public reduction
2122 {
2123 public:
2124   CONSTEXPR svmaxnmv_impl () : reduction (UNSPEC_FMAXNMV) {}
2125   gimple *
2126   fold (gimple_folder &f) const override
2127   {
2128     if (is_pfalse (gimple_call_arg (f.call, 0)))
2129       {
2130         REAL_VALUE_TYPE rnan = dconst0;
2131         rnan.cl = rvc_nan;
2132         return f.fold_call_to (build_real (TREE_TYPE (f.lhs), rnan));
2133       }
2134     return NULL;
2135   }
2136 };
2137
2138 class svmaxv_impl : public reduction
2139 {
2140 public:
2141   CONSTEXPR svmaxv_impl ()
2142     : reduction (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV) {}
2143
2144   gimple *
2145   fold (gimple_folder &f) const override
2146   {
2147     if (is_pfalse (gimple_call_arg (f.call, 0)))
2148       {
2149         tree rhs = f.type_suffix (0).integer_p
2150           ? TYPE_MIN_VALUE (TREE_TYPE (f.lhs))
2151           : build_real (TREE_TYPE (f.lhs), dconstninf);
2152         return f.fold_call_to (rhs);
2153       }
2154     return NULL;
2155   }
2156 };
2157
2158 class svminnmv_impl : public reduction
2159 {
2160 public:
2161   CONSTEXPR svminnmv_impl () : reduction (UNSPEC_FMINNMV) {}
2162   gimple *
2163   fold (gimple_folder &f) const override
2164   {
2165     if (is_pfalse (gimple_call_arg (f.call, 0)))
2166       {
2167         REAL_VALUE_TYPE rnan = dconst0;
2168         rnan.cl = rvc_nan;
2169         return f.fold_call_to (build_real (TREE_TYPE (f.lhs), rnan));
2170       }
2171     return NULL;
2172   }
2173 };
2174
2175 class svmla_impl : public function_base
2176 {
2177 public:
2178   rtx
2179   expand (function_expander &e) const override
2180   {
2181     /* Put the accumulator at the end (argument 3), but keep it as the
2182        merge input for _m functions.  */
2183     e.rotate_inputs_left (1, 4);
2184     return expand_mad (e, 3);
2185   }
2186 };
2187
2188 class svmla_lane_impl : public function_base
2189 {
2190 public:
2191   rtx
2192   expand (function_expander &e) const override
2193   {
2194     if (e.type_suffix (0).integer_p)
2195       {
2196         machine_mode mode = e.vector_mode (0);
2197         return e.use_exact_insn (code_for_aarch64_sve_add_mul_lane (mode));
2198       }
2199     return expand_mla_mls_lane (e, UNSPEC_FMLA);
2200   }
2201 };
2202
2203 class svmls_impl : public function_base
2204 {
2205 public:
2206   rtx
2207   expand (function_expander &e) const override
2208   {
2209     /* Put the accumulator at the end (argument 3), but keep it as the
2210        merge input for _m functions.  */
2211     e.rotate_inputs_left (1, 4);
2212     return expand_msb (e, 3);
2213   }
2214 };
2215
2216 class svmov_impl : public function_base
2217 {
2218 public:
2219   gimple *
2220   fold (gimple_folder &f) const override
2221   {
2222     return gimple_build_assign (f.lhs, BIT_AND_EXPR,
2223                                 gimple_call_arg (f.call, 0),
2224                                 gimple_call_arg (f.call, 1));
2225   }
2226
2227   rtx
2228   expand (function_expander &e) const override
2229   {
2230     /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B"
2231        is "AND Pa.B, Pb/Z, Pc.B, Pc.B".  */
2232     gcc_assert (e.pred == PRED_z);
2233     e.args.quick_push (e.args[1]);
2234     return e.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z);
2235   }
2236 };
2237
2238 class svmls_lane_impl : public function_base
2239 {
2240 public:
2241   rtx
2242   expand (function_expander &e) const override
2243   {
2244     if (e.type_suffix (0).integer_p)
2245       {
2246         machine_mode mode = e.vector_mode (0);
2247         return e.use_exact_insn (code_for_aarch64_sve_sub_mul_lane (mode));
2248       }
2249     return expand_mla_mls_lane (e, UNSPEC_FMLS);
2250   }
2251 };
2252
2253 class svmmla_impl : public function_base
2254 {
2255 public:
2256   rtx
2257   expand (function_expander &e) const override
2258   {
2259     insn_code icode;
2260     if (e.type_suffix (0).integer_p)
2261       {
2262         if (e.type_suffix (0).unsigned_p)
2263           icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0));
2264         else
2265           icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0));
2266       }
2267     else
2268       icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0));
2269     return e.use_exact_insn (icode);
2270   }
2271 };
2272
2273 class svmsb_impl : public function_base
2274 {
2275 public:
2276   rtx
2277   expand (function_expander &e) const override
2278   {
2279     return expand_msb (e);
2280   }
2281 };
2282
2283 class svmul_impl : public rtx_code_function
2284 {
2285 public:
2286   CONSTEXPR svmul_impl ()
2287     : rtx_code_function (MULT, MULT, UNSPEC_COND_FMUL) {}
2288
2289   gimple *
2290   fold (gimple_folder &f) const override
2291   {
2292     if (auto *res = f.fold_const_binary (MULT_EXPR))
2293       return res;
2294
2295     /* If one of the operands is all ones, fold to other operand.  */
2296     tree op1 = gimple_call_arg (f.call, 1);
2297     tree op2 = gimple_call_arg (f.call, 2);
2298     if (integer_onep (op1))
2299       return f.fold_active_lanes_to (op2);
2300     if (integer_onep (op2))
2301       return f.fold_active_lanes_to (op1);
2302
2303     /* If one of the operands is all zeros, fold to zero vector.  */
2304     if (integer_zerop (op1) || integer_zerop (op2))
2305       return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs)));
2306
2307     /* If one of the operands is all integer -1, fold to svneg.  */
2308     if (integer_minus_onep (op1) || integer_minus_onep (op2))
2309       {
2310         auto mul_by_m1 = [](gimple_folder &f, tree lhs_conv,
2311                             vec<tree> &args_conv) -> gimple *
2312           {
2313             gcc_assert (lhs_conv && args_conv.length () == 3);
2314             tree pg = args_conv[0];
2315             tree op1 = args_conv[1];
2316             tree op2 = args_conv[2];
2317             tree negated_op = op1;
2318             if (integer_minus_onep (op1))
2319               negated_op = op2;
2320             type_suffix_pair signed_tsp =
2321               {find_type_suffix (TYPE_signed, f.type_suffix (0).element_bits),
2322                 f.type_suffix_ids[1]};
2323             function_instance instance ("svneg", functions::svneg,
2324                                         shapes::unary, MODE_none, signed_tsp,
2325                                         GROUP_none, f.pred, FPM_unused);
2326             gcall *call = f.redirect_call (instance);
2327             gimple_call_set_lhs (call, lhs_conv);
2328             unsigned offset = 0;
2329             if (f.pred == PRED_m)
2330               {
2331                 offset = 1;
2332                 gimple_call_set_arg (call, 0, op1);
2333               }
2334             else
2335               gimple_set_num_ops (call, 5);
2336             gimple_call_set_arg (call, offset, pg);
2337             gimple_call_set_arg (call, offset + 1, negated_op);
2338             return call;
2339           };
2340         tree ty =
2341           get_vector_type (find_type_suffix (TYPE_signed,
2342                                              f.type_suffix (0).element_bits));
2343         return f.convert_and_fold (ty, mul_by_m1);
2344       }
2345
2346     /* If one of the operands is a uniform power of 2, fold to a left shift
2347        by immediate.  */
2348     tree pg = gimple_call_arg (f.call, 0);
2349     tree op1_cst = uniform_integer_cst_p (op1);
2350     tree op2_cst = uniform_integer_cst_p (op2);
2351     tree shift_op1, shift_op2 = NULL;
2352     if (op1_cst && integer_pow2p (op1_cst)
2353         && (f.pred != PRED_m
2354             || is_ptrue (pg, f.type_suffix (0).element_bytes)))
2355       {
2356         shift_op1 = op2;
2357         shift_op2 = op1_cst;
2358       }
2359     else if (op2_cst && integer_pow2p (op2_cst))
2360       {
2361         shift_op1 = op1;
2362         shift_op2 = op2_cst;
2363       }
2364     else
2365       return NULL;
2366
2367     if (shift_op2)
2368       {
2369         shift_op2 = wide_int_to_tree (unsigned_type_for (TREE_TYPE (shift_op2)),
2370                                       tree_log2 (shift_op2));
2371         function_instance instance ("svlsl", functions::svlsl,
2372                                     shapes::binary_uint_opt_n, MODE_n,
2373                                     f.type_suffix_ids, GROUP_none, f.pred,
2374                                     FPM_unused);
2375         gcall *call = f.redirect_call (instance);
2376         gimple_call_set_arg (call, 1, shift_op1);
2377         gimple_call_set_arg (call, 2, shift_op2);
2378         return call;
2379       }
2380
2381     return NULL;
2382   }
2383 };
2384
2385 class svnand_impl : public function_base
2386 {
2387 public:
2388   rtx
2389   expand (function_expander &e) const override
2390   {
2391     gcc_assert (e.pred == PRED_z);
2392     return e.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z);
2393   }
2394 };
2395
2396 class svnor_impl : public function_base
2397 {
2398 public:
2399   rtx
2400   expand (function_expander &e) const override
2401   {
2402     gcc_assert (e.pred == PRED_z);
2403     return e.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z);
2404   }
2405 };
2406
2407 class svnot_impl : public rtx_code_function
2408 {
2409 public:
2410   CONSTEXPR svnot_impl () : rtx_code_function (NOT, NOT, -1) {}
2411
2412   rtx
2413   expand (function_expander &e) const override
2414   {
2415     if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
2416       {
2417         /* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B"
2418            is "EOR Pa.B, Pb/Z, Pb.B, Pc.B".  */
2419         gcc_assert (e.pred == PRED_z);
2420         e.args.quick_insert (1, e.args[0]);
2421         return e.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z);
2422       }
2423     return rtx_code_function::expand (e);
2424   }
2425 };
2426
2427 class svorn_impl : public function_base
2428 {
2429 public:
2430   rtx
2431   expand (function_expander &e) const override
2432   {
2433     gcc_assert (e.pred == PRED_z);
2434     return e.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z);
2435   }
2436 };
2437
2438 class svorv_impl : public reduction
2439 {
2440 public:
2441   CONSTEXPR svorv_impl () : reduction (UNSPEC_IORV) {}
2442
2443   gimple *
2444   fold (gimple_folder &f) const override
2445   {
2446     if (is_pfalse (gimple_call_arg (f.call, 0)))
2447       return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
2448     return NULL;
2449   }
2450 };
2451
2452 class svpfalse_impl : public function_base
2453 {
2454 public:
2455   gimple *
2456   fold (gimple_folder &f) const override
2457   {
2458     if (f.type_suffix (0).tclass == TYPE_bool)
2459       return f.fold_to_pfalse ();
2460
2461     return nullptr;
2462   }
2463
2464   rtx
2465   expand (function_expander &) const override
2466   {
2467     return CONST0_RTX (VNx16BImode);
2468   }
2469 };
2470
2471 /* Implements svpfirst and svpnext, which share the same .md patterns.  */
2472 class svpfirst_svpnext_impl : public function_base
2473 {
2474 public:
2475   CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {}
2476   gimple *
2477   fold (gimple_folder &f) const override
2478   {
2479     tree pg = gimple_call_arg (f.call, 0);
2480     if (is_pfalse (pg))
2481       return f.fold_call_to (m_unspec == UNSPEC_PFIRST
2482                              ? gimple_call_arg (f.call, 1)
2483                              : pg);
2484     return NULL;
2485   }
2486
2487   rtx
2488   expand (function_expander &e) const override
2489   {
2490     machine_mode mode = e.vector_mode (0);
2491     e.add_ptrue_hint (0, mode);
2492     return e.use_exact_insn (code_for_aarch64_sve (m_unspec, mode));
2493   }
2494
2495   /* The unspec associated with the operation.  */
2496   int m_unspec;
2497 };
2498
2499 /* Implements contiguous forms of svprf[bhwd].  */
2500 class svprf_bhwd_impl : public function_base
2501 {
2502 public:
2503   CONSTEXPR svprf_bhwd_impl (machine_mode mode) : m_mode (mode) {}
2504
2505   unsigned int
2506   call_properties (const function_instance &) const override
2507   {
2508     return CP_PREFETCH_MEMORY;
2509   }
2510
2511   rtx
2512   expand (function_expander &e) const override
2513   {
2514     e.prepare_prefetch_operands ();
2515     insn_code icode = code_for_aarch64_sve_prefetch (m_mode);
2516     return e.use_contiguous_prefetch_insn (icode);
2517   }
2518
2519   /* The mode that we'd use to hold one vector of prefetched data.  */
2520   machine_mode m_mode;
2521 };
2522
2523 /* Implements svprf[bhwd]_gather.  */
2524 class svprf_bhwd_gather_impl : public function_base
2525 {
2526 public:
2527   CONSTEXPR svprf_bhwd_gather_impl (machine_mode mode) : m_mode (mode) {}
2528
2529   unsigned int
2530   call_properties (const function_instance &) const override
2531   {
2532     return CP_PREFETCH_MEMORY;
2533   }
2534
2535   machine_mode
2536   memory_vector_mode (const function_instance &) const override
2537   {
2538     return m_mode;
2539   }
2540
2541   rtx
2542   expand (function_expander &e) const override
2543   {
2544     e.prepare_prefetch_operands ();
2545     e.prepare_gather_address_operands (1);
2546
2547     /* Insert a zero operand to identify the mode of the memory being
2548        accessed.  This goes between the gather operands and prefetch
2549        operands created above.  */
2550     e.args.quick_insert (5, CONST0_RTX (m_mode));
2551
2552     machine_mode reg_mode = GET_MODE (e.args[2]);
2553     insn_code icode = code_for_aarch64_sve_gather_prefetch (m_mode, reg_mode);
2554     return e.use_exact_insn (icode);
2555   }
2556
2557   /* The mode that we'd use to hold one vector of prefetched data.  */
2558   machine_mode m_mode;
2559 };
2560
2561 /* Implements svptest_any, svptest_first and svptest_last.  */
2562 class svptest_impl : public function_base
2563 {
2564 public:
2565   CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {}
2566   gimple *
2567   fold (gimple_folder &f) const override
2568   {
2569     if (is_pfalse (gimple_call_arg (f.call, 0)))
2570       return f.fold_call_to (boolean_false_node);
2571     return NULL;
2572   }
2573
2574   rtx
2575   expand (function_expander &e) const override
2576   {
2577     /* See whether GP is an exact ptrue for some predicate mode;
2578        i.e. whether converting the GP to that mode will not drop
2579        set bits and will leave all significant bits set.  */
2580     machine_mode wide_mode;
2581     int hint;
2582     if (aarch64_ptrue_all_mode (e.args[0]).exists (&wide_mode))
2583       hint = SVE_KNOWN_PTRUE;
2584     else
2585       {
2586         hint = SVE_MAYBE_NOT_PTRUE;
2587         wide_mode = VNx16BImode;
2588       }
2589
2590     /* Generate the PTEST itself.  */
2591     rtx pg = force_reg (VNx16BImode, e.args[0]);
2592     rtx wide_pg = gen_lowpart (wide_mode, pg);
2593     rtx hint_rtx = gen_int_mode (hint, DImode);
2594     rtx op = force_reg (wide_mode, gen_lowpart (wide_mode, e.args[1]));
2595     emit_insn (gen_aarch64_ptestvnx16bi (pg, wide_pg, hint_rtx, op));
2596
2597     /* Get the location of the boolean result.  We can provide SImode and
2598        DImode values directly; rely on generic code to convert others.  */
2599     rtx target = e.possible_target;
2600     if (!target
2601         || !REG_P (target)
2602         || (GET_MODE (target) != SImode && GET_MODE (target) != DImode))
2603       target = gen_reg_rtx (DImode);
2604
2605     /* Generate a CSET to convert the CC result of the PTEST to a boolean.  */
2606     rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
2607     rtx compare = gen_rtx_fmt_ee (m_compare, GET_MODE (target),
2608                                   cc_reg, const0_rtx);
2609     emit_insn (gen_rtx_SET (target, compare));
2610     return target;
2611   }
2612
2613   /* The comparison code associated with ptest condition.  */
2614   rtx_code m_compare;
2615 };
2616
2617 class svptrue_impl : public function_base
2618 {
2619 public:
2620   gimple *
2621   fold (gimple_folder &f) const override
2622   {
2623     if (f.type_suffix (0).tclass == TYPE_bool)
2624       return f.fold_to_ptrue ();
2625
2626     return nullptr;
2627   }
2628
2629   rtx
2630   expand (function_expander &e) const override
2631   {
2632     if (e.type_suffix (0).tclass == TYPE_bool)
2633       return aarch64_ptrue_all (e.type_suffix (0).element_bytes);
2634
2635     auto bits = e.type_suffix (0).element_bits;
2636     return e.use_exact_insn (code_for_aarch64_sve_ptrue_c (bits));
2637   }
2638 };
2639
2640 class svptrue_pat_impl : public function_base
2641 {
2642 public:
2643   gimple *
2644   fold (gimple_folder &f) const override
2645   {
2646     tree pattern_arg = gimple_call_arg (f.call, 0);
2647     aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
2648
2649     if (pattern == AARCH64_SV_ALL)
2650       /* svptrue_pat_bN (SV_ALL) == svptrue_bN ().  */
2651       return f.fold_to_ptrue ();
2652
2653     /* See whether we can count the number of elements in the pattern
2654        at compile time.  If so, construct a predicate with that number
2655        of 1s followed by all 0s.  */
2656     int nelts_per_vq = f.elements_per_vq (0);
2657     HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, nelts_per_vq);
2658     if (value >= 0)
2659       return f.fold_to_vl_pred (value);
2660
2661     return NULL;
2662   }
2663
2664   rtx
2665   expand (function_expander &e) const override
2666   {
2667     /* In rtl, the predicate is represented as the constant:
2668
2669          (const:V16BI (unspec:V16BI [(const_int PATTERN)
2670                                      (const_vector:VnnBI [zeros])]
2671                                     UNSPEC_PTRUE))
2672
2673        where nn determines the element size.  */
2674     rtvec vec = gen_rtvec (2, e.args[0], CONST0_RTX (e.vector_mode (0)));
2675     return gen_rtx_CONST (VNx16BImode,
2676                           gen_rtx_UNSPEC (VNx16BImode, vec, UNSPEC_PTRUE));
2677   }
2678 };
2679
2680 /* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}.  */
2681 class svqdec_svqinc_bhwd_impl : public function_base
2682 {
2683 public:
2684   CONSTEXPR svqdec_svqinc_bhwd_impl (rtx_code code_for_sint,
2685                                      rtx_code code_for_uint,
2686                                      scalar_int_mode elem_mode)
2687     : m_code_for_sint (code_for_sint),
2688       m_code_for_uint (code_for_uint),
2689       m_elem_mode (elem_mode)
2690   {}
2691
2692   rtx
2693   expand (function_expander &e) const override
2694   {
2695     /* Treat non-_pat functions in the same way as _pat functions with
2696        an SV_ALL argument.  */
2697     if (e.args.length () == 2)
2698       e.args.quick_insert (1, gen_int_mode (AARCH64_SV_ALL, DImode));
2699
2700     /* Insert the number of elements per 128-bit block as a fake argument,
2701        between the pattern and the multiplier.  Arguments 1, 2 and 3 then
2702        correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see
2703        aarch64_sve_cnt_pat for details.  */
2704     unsigned int elements_per_vq = 128 / GET_MODE_BITSIZE (m_elem_mode);
2705     e.args.quick_insert (2, gen_int_mode (elements_per_vq, DImode));
2706
2707     rtx_code code = (e.type_suffix (0).unsigned_p
2708                      ? m_code_for_uint
2709                      : m_code_for_sint);
2710
2711     /* Choose between operating on integer scalars or integer vectors.  */
2712     machine_mode mode = e.vector_mode (0);
2713     if (e.mode_suffix_id == MODE_n)
2714       mode = GET_MODE_INNER (mode);
2715     return e.use_exact_insn (code_for_aarch64_sve_pat (code, mode));
2716   }
2717
2718   /* The saturating addition or subtraction codes to use for signed and
2719      unsigned values respectively.  */
2720   rtx_code m_code_for_sint;
2721   rtx_code m_code_for_uint;
2722
2723   /* The integer mode associated with the [bhwd] suffix.  */
2724   scalar_int_mode m_elem_mode;
2725 };
2726
2727 /* Implements svqdec[bhwd]{,_pat}.  */
2728 class svqdec_bhwd_impl : public svqdec_svqinc_bhwd_impl
2729 {
2730 public:
2731   CONSTEXPR svqdec_bhwd_impl (scalar_int_mode elem_mode)
2732     : svqdec_svqinc_bhwd_impl (SS_MINUS, US_MINUS, elem_mode) {}
2733 };
2734
2735 /* Implements svqinc[bhwd]{,_pat}.  */
2736 class svqinc_bhwd_impl : public svqdec_svqinc_bhwd_impl
2737 {
2738 public:
2739   CONSTEXPR svqinc_bhwd_impl (scalar_int_mode elem_mode)
2740     : svqdec_svqinc_bhwd_impl (SS_PLUS, US_PLUS, elem_mode) {}
2741 };
2742
2743 /* Implements svqdecp and svqincp.  */
2744 class svqdecp_svqincp_impl : public function_base
2745 {
2746 public:
2747   CONSTEXPR svqdecp_svqincp_impl (rtx_code code_for_sint,
2748                                   rtx_code code_for_uint)
2749     : m_code_for_sint (code_for_sint),
2750       m_code_for_uint (code_for_uint)
2751   {}
2752
2753   rtx
2754   expand (function_expander &e) const override
2755   {
2756     rtx_code code = (e.type_suffix (0).unsigned_p
2757                      ? m_code_for_uint
2758                      : m_code_for_sint);
2759     insn_code icode;
2760     if (e.mode_suffix_id == MODE_n)
2761       {
2762         /* Increment or decrement a scalar (whose mode is given by the first
2763            type suffix) by the number of active elements in a predicate
2764            (whose mode is given by the second type suffix).  */
2765         machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
2766         icode = code_for_aarch64_sve_cntp (code, mode, e.vector_mode (1));
2767       }
2768     else
2769       /* Increment a vector by the number of active elements in a predicate,
2770          with the vector mode determining the predicate mode.  */
2771       icode = code_for_aarch64_sve_cntp (code, e.vector_mode (0));
2772     return e.use_exact_insn (icode);
2773   }
2774
2775   /* The saturating addition or subtraction codes to use for signed and
2776      unsigned values respectively.  */
2777   rtx_code m_code_for_sint;
2778   rtx_code m_code_for_uint;
2779 };
2780
2781 class svrdffr_impl : public function_base
2782 {
2783 public:
2784   unsigned int
2785   call_properties (const function_instance &) const override
2786   {
2787     return CP_READ_FFR;
2788   }
2789
2790   rtx
2791   expand (function_expander &e) const override
2792   {
2793     /* See the block comment in aarch64-sve.md for details about the
2794        FFR handling.  */
2795     emit_insn (gen_aarch64_copy_ffr_to_ffrt ());
2796     rtx result = e.use_exact_insn (e.pred == PRED_z
2797                                    ? CODE_FOR_aarch64_rdffr_z
2798                                    : CODE_FOR_aarch64_rdffr);
2799     emit_insn (gen_aarch64_update_ffrt ());
2800     return result;
2801   }
2802 };
2803
2804 class svreinterpret_impl : public quiet<function_base>
2805 {
2806 public:
2807   gimple *
2808   fold (gimple_folder &f) const override
2809   {
2810     if (f.vectors_per_tuple () > 1)
2811       return NULL;
2812
2813     /* Punt to rtl if the effect of the reinterpret on registers does not
2814        conform to GCC's endianness model.  */
2815     if (GET_MODE_CLASS (f.vector_mode (0)) != MODE_VECTOR_BOOL
2816         && !targetm.can_change_mode_class (f.vector_mode (0),
2817                                            f.vector_mode (1), FP_REGS))
2818       return NULL;
2819
2820     /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR
2821        reinterpretation.  */
2822     tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (f.lhs),
2823                        gimple_call_arg (f.call, 0));
2824     return gimple_build_assign (f.lhs, VIEW_CONVERT_EXPR, rhs);
2825   }
2826
2827   rtx
2828   expand (function_expander &e) const override
2829   {
2830     machine_mode mode = e.tuple_mode (0);
2831     /* Handle svbool_t <-> svcount_t.  */
2832     if (mode == e.tuple_mode (1))
2833       return e.args[0];
2834     return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode));
2835   }
2836 };
2837
2838 class svrev_impl : public permute
2839 {
2840 public:
2841   gimple *
2842   fold (gimple_folder &f) const override
2843   {
2844     /* Punt for now on _b16 and wider; we'd need more complex evpc logic
2845        to rerecognize the result.  */
2846     if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8)
2847       return NULL;
2848
2849     /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }.  */
2850     poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
2851     vec_perm_builder builder (nelts, 1, 3);
2852     for (int i = 0; i < 3; ++i)
2853       builder.quick_push (nelts - i - 1);
2854     return fold_permute (f, builder);
2855   }
2856
2857   rtx
2858   expand (function_expander &e) const override
2859   {
2860     return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0)));
2861   }
2862 };
2863
2864 class svrint_impl : public function_base
2865 {
2866 public:
2867   CONSTEXPR svrint_impl (optab_tag optab, int cond_unspec)
2868     : m_optab (optab), m_cond_unspec (cond_unspec)
2869   {}
2870
2871   rtx
2872   expand (function_expander &e) const override
2873   {
2874     if (e.pred == PRED_none)
2875       {
2876         auto icode = direct_optab_handler (m_optab, e.tuple_mode (0));
2877         return e.use_exact_insn (icode);
2878       }
2879     return e.map_to_unspecs (-1, -1, m_cond_unspec);
2880   }
2881
2882   optab_tag m_optab;
2883   int m_cond_unspec;
2884 };
2885
2886 class svsel_impl : public quiet<function_base>
2887 {
2888 public:
2889   gimple *
2890   fold (gimple_folder &f) const override
2891   {
2892     if (f.vectors_per_tuple () > 1)
2893       return nullptr;
2894
2895     /* svsel corresponds exactly to VEC_COND_EXPR.  */
2896     gimple_seq stmts = NULL;
2897     tree pred = f.convert_pred (stmts, f.vector_type (0), 0);
2898     gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
2899     return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred,
2900                                 gimple_call_arg (f.call, 1),
2901                                 gimple_call_arg (f.call, 2));
2902   }
2903
2904   rtx
2905   expand (function_expander &e) const override
2906   {
2907     /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond).  */
2908     e.rotate_inputs_left (0, 3);
2909     insn_code icode = (e.vectors_per_tuple () > 1
2910                        ? code_for_aarch64_sve_sel (e.tuple_mode (0))
2911                        : convert_optab_handler (vcond_mask_optab,
2912                                                 e.vector_mode (0),
2913                                                 e.gp_mode (0)));
2914     return e.use_exact_insn (icode);
2915   }
2916 };
2917
2918 /* Implements svset2, svset3 and svset4.  */
2919 class svset_impl : public quiet<multi_vector_function>
2920 {
2921 public:
2922   using quiet<multi_vector_function>::quiet;
2923
2924   gimple *
2925   fold (gimple_folder &f) const override
2926   {
2927     tree rhs_tuple = gimple_call_arg (f.call, 0);
2928     tree index = gimple_call_arg (f.call, 1);
2929     tree rhs_vector = gimple_call_arg (f.call, 2);
2930
2931     /* Replace the call with two statements: a copy of the full tuple
2932        to the call result, followed by an update of the individual vector.
2933
2934        The fold routines expect the replacement statement to have the
2935        same lhs as the original call, so return the copy statement
2936        rather than the field update.  */
2937     gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple);
2938
2939     /* Get a reference to the individual vector.  */
2940     tree field = tuple_type_field (TREE_TYPE (f.lhs));
2941     tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
2942                              f.lhs, field, NULL_TREE);
2943     tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
2944                               lhs_array, index, NULL_TREE, NULL_TREE);
2945     gassign *update = gimple_build_assign (lhs_vector, rhs_vector);
2946     gsi_insert_after (f.gsi, update, GSI_SAME_STMT);
2947
2948     return copy;
2949   }
2950
2951   rtx
2952   expand (function_expander &e) const override
2953   {
2954     rtx rhs_tuple = e.args[0];
2955     unsigned int index = INTVAL (e.args[1]);
2956     rtx rhs_vector = e.args[2];
2957
2958     /* First copy the full tuple to the target register.  */
2959     rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
2960     emit_move_insn (lhs_tuple, rhs_tuple);
2961
2962     /* ...then update the individual vector.  */
2963     rtx lhs_vector = simplify_gen_subreg (GET_MODE (rhs_vector),
2964                                           lhs_tuple, GET_MODE (lhs_tuple),
2965                                           index * BYTES_PER_SVE_VECTOR);
2966     emit_move_insn (lhs_vector, rhs_vector);
2967     return lhs_vector;
2968   }
2969 };
2970
2971 class svsetffr_impl : public function_base
2972 {
2973 public:
2974   unsigned int
2975   call_properties (const function_instance &) const override
2976   {
2977     return CP_WRITE_FFR;
2978   }
2979
2980   rtx
2981   expand (function_expander &e) const override
2982   {
2983     e.args.quick_push (CONSTM1_RTX (VNx16BImode));
2984     return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
2985   }
2986 };
2987
2988 class svsplice_impl : public QUIET_CODE_FOR_MODE0 (aarch64_sve_splice)
2989 {
2990 public:
2991   gimple *
2992   fold (gimple_folder &f) const override
2993   {
2994     if (is_pfalse (gimple_call_arg (f.call, 0)))
2995       return f.fold_call_to (gimple_call_arg (f.call, 2));
2996     return NULL;
2997   }
2998 };
2999
3000 class svst1_impl : public full_width_access
3001 {
3002 public:
3003   unsigned int
3004   call_properties (const function_instance &) const override
3005   {
3006     return CP_WRITE_MEMORY;
3007   }
3008
3009   gimple *
3010   fold (gimple_folder &f) const override
3011   {
3012     if (f.vectors_per_tuple () != 1)
3013       return nullptr;
3014
3015     tree vectype = f.vector_type (0);
3016
3017     /* Get the predicate and base pointer.  */
3018     gimple_seq stmts = NULL;
3019     tree pred = f.convert_pred (stmts, vectype, 0);
3020     tree base = f.fold_contiguous_base (stmts, vectype);
3021     gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
3022
3023     tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
3024     tree rhs = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 1);
3025     return gimple_build_call_internal (IFN_MASK_STORE, 4,
3026                                        base, cookie, pred, rhs);
3027   }
3028
3029   rtx
3030   expand (function_expander &e) const override
3031   {
3032     insn_code icode;
3033     if (e.vectors_per_tuple () == 1)
3034       icode = convert_optab_handler (maskstore_optab,
3035                                      e.vector_mode (0), e.gp_mode (0));
3036     else
3037       icode = code_for_aarch64 (UNSPEC_ST1_COUNT, e.tuple_mode (0));
3038     return e.use_contiguous_store_insn (icode);
3039   }
3040 };
3041
3042 class svst1_scatter_impl : public full_width_access
3043 {
3044 public:
3045   unsigned int
3046   call_properties (const function_instance &) const override
3047   {
3048     return CP_WRITE_MEMORY;
3049   }
3050
3051   rtx
3052   expand (function_expander &e) const override
3053   {
3054     e.prepare_gather_address_operands (1);
3055     /* Put the predicate last, as required by mask_scatter_store_optab.  */
3056     e.rotate_inputs_left (0, 6);
3057     machine_mode mem_mode = e.memory_vector_mode ();
3058     machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
3059     insn_code icode = convert_optab_handler (mask_scatter_store_optab,
3060                                              mem_mode, int_mode);
3061     return e.use_exact_insn (icode);
3062   }
3063 };
3064
3065 /* Implements truncating forms of svst1_scatter.  */
3066 class svst1_scatter_truncate_impl : public truncating_store
3067 {
3068 public:
3069   using truncating_store::truncating_store;
3070
3071   rtx
3072   expand (function_expander &e) const override
3073   {
3074     e.prepare_gather_address_operands (1);
3075     /* Put the predicate last, since the truncating scatters use the same
3076        operand order as mask_scatter_store_optab.  */
3077     e.rotate_inputs_left (0, 6);
3078     insn_code icode = code_for_aarch64_scatter_store_trunc
3079       (e.memory_vector_mode (), e.vector_mode (0));
3080     return e.use_exact_insn (icode);
3081   }
3082 };
3083
3084 /* Implements truncating contiguous forms of svst1.  */
3085 class svst1_truncate_impl : public truncating_store
3086 {
3087 public:
3088   using truncating_store::truncating_store;
3089
3090   rtx
3091   expand (function_expander &e) const override
3092   {
3093     insn_code icode = code_for_aarch64_store_trunc (e.memory_vector_mode (),
3094                                                     e.vector_mode (0));
3095     return e.use_contiguous_store_insn (icode);
3096   }
3097 };
3098
3099 /* Implements svst2, svst3 and svst4.  */
3100 class svst234_impl : public full_width_access
3101 {
3102 public:
3103   using full_width_access::full_width_access;
3104
3105   unsigned int
3106   call_properties (const function_instance &) const override
3107   {
3108     return CP_WRITE_MEMORY;
3109   }
3110
3111   gimple *
3112   fold (gimple_folder &f) const override
3113   {
3114     tree vectype = f.vector_type (0);
3115
3116     /* Get the predicate and base pointer.  */
3117     gimple_seq stmts = NULL;
3118     tree pred = f.convert_pred (stmts, vectype, 0);
3119     tree base = f.fold_contiguous_base (stmts, vectype);
3120     gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
3121
3122     /* View the stored data as an array of vectors.  */
3123     unsigned int num_args = gimple_call_num_args (f.call);
3124     tree rhs_tuple = gimple_call_arg (f.call, num_args - 1);
3125     tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
3126     tree rhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), rhs_tuple);
3127
3128     tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
3129     return gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
3130                                        base, cookie, pred, rhs_array);
3131   }
3132
3133   rtx
3134   expand (function_expander &e) const override
3135   {
3136     machine_mode tuple_mode = GET_MODE (e.args.last ());
3137     insn_code icode = convert_optab_handler (vec_mask_store_lanes_optab,
3138                                              tuple_mode, e.vector_mode (0));
3139     return e.use_contiguous_store_insn (icode);
3140   }
3141 };
3142
3143 class svstnt1_impl : public full_width_access
3144 {
3145 public:
3146   unsigned int
3147   call_properties (const function_instance &) const override
3148   {
3149     return CP_WRITE_MEMORY;
3150   }
3151
3152   rtx
3153   expand (function_expander &e) const override
3154   {
3155     insn_code icode = (e.vectors_per_tuple () == 1
3156                        ? code_for_aarch64_stnt1 (e.vector_mode (0))
3157                        : code_for_aarch64 (UNSPEC_STNT1_COUNT,
3158                                            e.tuple_mode (0)));
3159     return e.use_contiguous_store_insn (icode);
3160   }
3161 };
3162
3163 class svsub_impl : public rtx_code_function
3164 {
3165 public:
3166   CONSTEXPR svsub_impl ()
3167     : rtx_code_function (MINUS, MINUS, UNSPEC_COND_FSUB) {}
3168
3169   rtx
3170   expand (function_expander &e) const override
3171   {
3172     /* Canonicalize subtractions of constants to additions.  */
3173     machine_mode mode = e.vector_mode (0);
3174     if (e.try_negating_argument (2, mode))
3175       return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD, -1);
3176
3177     return rtx_code_function::expand (e);
3178   }
3179 };
3180
3181 /* Implements svtrn1 and svtrn2.  */
3182 class svtrn_impl : public binary_permute
3183 {
3184 public:
3185   CONSTEXPR svtrn_impl (int base)
3186     : binary_permute (base ? UNSPEC_TRN2 : UNSPEC_TRN1), m_base (base) {}
3187
3188   gimple *
3189   fold (gimple_folder &f) const override
3190   {
3191     /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... }
3192        svtrn2: as for svtrn1, but with 1 added to each index.  */
3193     poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
3194     vec_perm_builder builder (nelts, 2, 3);
3195     for (unsigned int i = 0; i < 3; ++i)
3196       {
3197         builder.quick_push (m_base + i * 2);
3198         builder.quick_push (m_base + i * 2 + nelts);
3199       }
3200     return fold_permute (f, builder);
3201   }
3202
3203   /* 0 for svtrn1, 1 for svtrn2.  */
3204   unsigned int m_base;
3205 };
3206
3207 /* Base class for svundef{,2,3,4}.  */
3208 class svundef_impl : public quiet<multi_vector_function>
3209 {
3210 public:
3211   using quiet<multi_vector_function>::quiet;
3212
3213   rtx
3214   expand (function_expander &e) const override
3215   {
3216     rtx target = e.get_reg_target ();
3217     emit_clobber (copy_rtx (target));
3218     return target;
3219   }
3220 };
3221
3222 /* Implements svunpklo and svunpkhi.  */
3223 class svunpk_impl : public quiet<function_base>
3224 {
3225 public:
3226   CONSTEXPR svunpk_impl (bool high_p) : m_high_p (high_p) {}
3227
3228   gimple *
3229   fold (gimple_folder &f) const override
3230   {
3231     /* Don't fold the predicate ops, since every bit of the svbool_t
3232        result is significant.  */
3233     if (f.type_suffix_ids[0] == TYPE_SUFFIX_b)
3234       return NULL;
3235
3236     /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian
3237        and VEC_UNPACK_HI_EXPR for big-endian.  */
3238     bool high_p = BYTES_BIG_ENDIAN ? !m_high_p : m_high_p;
3239     tree_code code = high_p ? VEC_UNPACK_HI_EXPR : VEC_UNPACK_LO_EXPR;
3240     return gimple_build_assign (f.lhs, code, gimple_call_arg (f.call, 0));
3241   }
3242
3243   rtx
3244   expand (function_expander &e) const override
3245   {
3246     machine_mode mode = GET_MODE (e.args[0]);
3247     unsigned int unpacku = m_high_p ? UNSPEC_UNPACKUHI : UNSPEC_UNPACKULO;
3248     unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO;
3249     insn_code icode;
3250     if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
3251       icode = code_for_aarch64_sve_punpk (unpacku, mode);
3252     else
3253       {
3254         int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks;
3255         icode = code_for_aarch64_sve_unpk (unspec, unspec, mode);
3256       }
3257     return e.use_exact_insn (icode);
3258   }
3259
3260   /* True for svunpkhi, false for svunpklo.  */
3261   bool m_high_p;
3262 };
3263
3264 /* Also implements svsudot.  */
3265 class svusdot_impl : public function_base
3266 {
3267 public:
3268   CONSTEXPR svusdot_impl (bool su) : m_su (su) {}
3269
3270   rtx
3271   expand (function_expander &e) const override
3272   {
3273     /* The implementation of the ACLE function svsudot (for the non-lane
3274        version) is through the USDOT instruction but with the second and third
3275        inputs swapped.  */
3276     if (m_su)
3277       e.rotate_inputs_left (1, 3);
3278     /* The ACLE function has the same order requirements as for svdot.
3279        While there's no requirement for the RTL pattern to have the same sort
3280        of order as that for <sur>dot_prod, it's easier to read.
3281        Hence we do the same rotation on arguments as svdot_impl does.  */
3282     e.rotate_inputs_left (0, 3);
3283     machine_mode mode = e.vector_mode (0);
3284     insn_code icode = code_for_dot_prod (UNSPEC_USDOT, e.result_mode (), mode);
3285     return e.use_exact_insn (icode);
3286   }
3287
3288 private:
3289   bool m_su;
3290 };
3291
3292 /* Implements svuzp1 and svuzp2.  */
3293 class svuzp_impl : public binary_permute
3294 {
3295 public:
3296   CONSTEXPR svuzp_impl (unsigned int base)
3297     : binary_permute (base ? UNSPEC_UZP2 : UNSPEC_UZP1), m_base (base) {}
3298
3299   gimple *
3300   fold (gimple_folder &f) const override
3301   {
3302     /* svuzp1: { 0, 2, 4, 6, ... }
3303        svuzp2: { 1, 3, 5, 7, ... }.  */
3304     poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
3305     vec_perm_builder builder (nelts, 1, 3);
3306     for (unsigned int i = 0; i < 3; ++i)
3307       builder.quick_push (m_base + i * 2);
3308     return fold_permute (f, builder);
3309   }
3310
3311   /* 0 for svuzp1, 1 for svuzp2.  */
3312   unsigned int m_base;
3313 };
3314
3315 /* A function_base for svwhilele and svwhilelt functions.  */
3316 class svwhilelx_impl : public while_comparison
3317 {
3318 public:
3319   CONSTEXPR svwhilelx_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p)
3320     : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p)
3321   {}
3322
3323   /* Try to fold a call by treating its arguments as constants of type T.
3324      We have already filtered out the degenerate cases of X .LT. MIN
3325      and X .LE. MAX.  */
3326   template<typename T>
3327   gimple *
3328   fold_type (gimple_folder &f) const
3329   {
3330     /* Only handle cases in which both operands are constant.  */
3331     T arg0, arg1;
3332     if (!poly_int_tree_p (gimple_call_arg (f.call, 0), &arg0)
3333         || !poly_int_tree_p (gimple_call_arg (f.call, 1), &arg1))
3334       return NULL;
3335
3336     /* Check whether the result is known to be all-false.  */
3337     if (m_eq_p ? known_gt (arg0, arg1) : known_ge (arg0, arg1))
3338       return f.fold_to_pfalse ();
3339
3340     /* Punt if we can't tell at compile time whether the result
3341        is all-false.  */
3342     if (m_eq_p ? maybe_gt (arg0, arg1) : maybe_ge (arg0, arg1))
3343       return NULL;
3344
3345     /* At this point we know the result has at least one set element.  */
3346     poly_uint64 diff = arg1 - arg0;
3347     poly_uint64 nelts = GET_MODE_NUNITS (f.vector_mode (0));
3348
3349     /* Canonicalize the svwhilele form to the svwhilelt form.  Subtract
3350        from NELTS rather than adding to DIFF, to prevent overflow.  */
3351     if (m_eq_p)
3352       nelts -= 1;
3353
3354     /* Check whether the result is known to be all-true.  */
3355     if (known_ge (diff, nelts))
3356       return f.fold_to_ptrue ();
3357
3358     /* Punt if DIFF might not be the actual number of set elements
3359        in the result.  Conditional equality is fine.  */
3360     if (maybe_gt (diff, nelts))
3361       return NULL;
3362
3363     /* At this point we know that the predicate will have DIFF set elements
3364        for svwhilelt and DIFF + 1 set elements for svwhilele (which stops
3365        after rather than before ARG1 is reached).  See if we can create
3366        the predicate at compile time.  */
3367     unsigned HOST_WIDE_INT vl;
3368     if (diff.is_constant (&vl))
3369       /* Overflow is no longer possible after the checks above.  */
3370       return f.fold_to_vl_pred (m_eq_p ? vl + 1 : vl);
3371
3372     return NULL;
3373   }
3374
3375   gimple *
3376   fold (gimple_folder &f) const override
3377   {
3378     if (f.vectors_per_tuple () > 1)
3379       return nullptr;
3380
3381     /* Filter out cases where the condition is always true or always false.  */
3382     tree arg1 = gimple_call_arg (f.call, 1);
3383     if (!m_eq_p && operand_equal_p (arg1, TYPE_MIN_VALUE (TREE_TYPE (arg1))))
3384       return f.fold_to_pfalse ();
3385     if (m_eq_p && operand_equal_p (arg1, TYPE_MAX_VALUE (TREE_TYPE (arg1))))
3386       return f.fold_to_ptrue ();
3387
3388     if (f.type_suffix (1).unsigned_p)
3389       return fold_type<poly_uint64> (f);
3390     else
3391       return fold_type<poly_int64> (f);
3392   }
3393
3394   /* True svwhilele, false for svwhilelt.  */
3395   bool m_eq_p;
3396 };
3397
3398 class svwrffr_impl : public function_base
3399 {
3400 public:
3401   unsigned int
3402   call_properties (const function_instance &) const override
3403   {
3404     return CP_WRITE_FFR;
3405   }
3406
3407   rtx
3408   expand (function_expander &e) const override
3409   {
3410     return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
3411   }
3412 };
3413
3414 /* Implements svzip1 and svzip2.  */
3415 class svzip_impl : public binary_permute
3416 {
3417 public:
3418   CONSTEXPR svzip_impl (unsigned int base)
3419     : binary_permute (base ? UNSPEC_ZIP2 : UNSPEC_ZIP1), m_base (base) {}
3420
3421   gimple *
3422   fold (gimple_folder &f) const override
3423   {
3424     /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... }
3425        svzip2: as for svzip1, but with nelts / 2 added to each index.  */
3426     poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
3427     poly_uint64 base = m_base * exact_div (nelts, 2);
3428     vec_perm_builder builder (nelts, 2, 3);
3429     for (unsigned int i = 0; i < 3; ++i)
3430       {
3431         builder.quick_push (base + i);
3432         builder.quick_push (base + i + nelts);
3433       }
3434     return fold_permute (f, builder);
3435   }
3436
3437   /* 0 for svzip1, 1 for svzip2.  */
3438   unsigned int m_base;
3439 };
3440
3441 } /* end anonymous namespace */
3442
3443 namespace aarch64_sve {
3444
3445 FUNCTION (svabd, svabd_impl,)
3446 FUNCTION (svabs, quiet<rtx_code_function>, (ABS, ABS, UNSPEC_COND_FABS))
3447 FUNCTION (svacge, svac_impl, (UNSPEC_COND_FCMGE))
3448 FUNCTION (svacgt, svac_impl, (UNSPEC_COND_FCMGT))
3449 FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE))
3450 FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT))
3451 FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD))
3452 FUNCTION (svadda, svadda_impl,)
3453 FUNCTION (svaddv, svaddv_impl,)
3454 FUNCTION (svadrb, svadr_bhwd_impl, (0))
3455 FUNCTION (svadrd, svadr_bhwd_impl, (3))
3456 FUNCTION (svadrh, svadr_bhwd_impl, (1))
3457 FUNCTION (svadrw, svadr_bhwd_impl, (2))
3458 FUNCTION (svand, rtx_code_function, (AND, AND))
3459 FUNCTION (svandv, svandv_impl,)
3460 FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
3461 FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE))
3462 FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1))
3463 FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf))
3464 FUNCTION (svbfdot_lane, fixed_insn_function,
3465           (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf))
3466 FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf))
3467 FUNCTION (svbfmlalb_lane, fixed_insn_function,
3468           (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf))
3469 FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf))
3470 FUNCTION (svbfmlalt_lane, fixed_insn_function,
3471           (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf))
3472 FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf))
3473 FUNCTION (svbic, svbic_impl,)
3474 FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA))
3475 FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB))
3476 FUNCTION (svbrkn, svbrk_binary_impl, (UNSPEC_BRKN))
3477 FUNCTION (svbrkpa, svbrk_binary_impl, (UNSPEC_BRKPA))
3478 FUNCTION (svbrkpb, svbrk_binary_impl, (UNSPEC_BRKPB))
3479 FUNCTION (svcadd, svcadd_impl,)
3480 FUNCTION (svclasta, svclast_impl, (UNSPEC_CLASTA))
3481 FUNCTION (svclastb, svclast_impl, (UNSPEC_CLASTB))
3482 FUNCTION (svcls, unary_count, (CLRSB))
3483 FUNCTION (svclz, unary_count, (CLZ))
3484 FUNCTION (svcmla, svcmla_impl,)
3485 FUNCTION (svcmla_lane, svcmla_lane_impl,)
3486 FUNCTION (svcmpeq, svcmp_impl, (EQ_EXPR, UNSPEC_COND_FCMEQ))
3487 FUNCTION (svcmpeq_wide, svcmp_wide_impl, (EQ_EXPR, UNSPEC_COND_CMPEQ_WIDE,
3488                                           UNSPEC_COND_CMPEQ_WIDE))
3489 FUNCTION (svcmpge, svcmp_impl, (GE_EXPR, UNSPEC_COND_FCMGE))
3490 FUNCTION (svcmpge_wide, svcmp_wide_impl, (GE_EXPR, UNSPEC_COND_CMPGE_WIDE,
3491                                           UNSPEC_COND_CMPHS_WIDE))
3492 FUNCTION (svcmpgt, svcmp_impl, (GT_EXPR, UNSPEC_COND_FCMGT))
3493 FUNCTION (svcmpgt_wide, svcmp_wide_impl, (GT_EXPR, UNSPEC_COND_CMPGT_WIDE,
3494                                           UNSPEC_COND_CMPHI_WIDE))
3495 FUNCTION (svcmple, svcmp_impl, (LE_EXPR, UNSPEC_COND_FCMLE))
3496 FUNCTION (svcmple_wide, svcmp_wide_impl, (LE_EXPR, UNSPEC_COND_CMPLE_WIDE,
3497                                           UNSPEC_COND_CMPLS_WIDE))
3498 FUNCTION (svcmplt, svcmp_impl, (LT_EXPR, UNSPEC_COND_FCMLT))
3499 FUNCTION (svcmplt_wide, svcmp_wide_impl, (LT_EXPR, UNSPEC_COND_CMPLT_WIDE,
3500                                           UNSPEC_COND_CMPLO_WIDE))
3501 FUNCTION (svcmpne, svcmp_impl, (NE_EXPR, UNSPEC_COND_FCMNE))
3502 FUNCTION (svcmpne_wide, svcmp_wide_impl, (NE_EXPR, UNSPEC_COND_CMPNE_WIDE,
3503                                           UNSPEC_COND_CMPNE_WIDE))
3504 FUNCTION (svcmpuo, svcmpuo_impl,)
3505 FUNCTION (svcnot, svcnot_impl,)
3506 FUNCTION (svcnt, unary_count, (POPCOUNT))
3507 FUNCTION (svcntb, svcnt_bhwd_impl, (VNx16QImode))
3508 FUNCTION (svcntb_pat, svcnt_bhwd_pat_impl, (VNx16QImode))
3509 FUNCTION (svcntd, svcnt_bhwd_impl, (VNx2DImode))
3510 FUNCTION (svcntd_pat, svcnt_bhwd_pat_impl, (VNx2DImode))
3511 FUNCTION (svcnth, svcnt_bhwd_impl, (VNx8HImode))
3512 FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode))
3513 FUNCTION (svcntp, svcntp_impl,)
3514 FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode))
3515 FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode))
3516 FUNCTION (svcompact, svcompact_impl,)
3517 FUNCTION (svcreate2, svcreate_impl, (2))
3518 FUNCTION (svcreate3, svcreate_impl, (3))
3519 FUNCTION (svcreate4, svcreate_impl, (4))
3520 FUNCTION (svcvt, svcvt_impl,)
3521 FUNCTION (svcvtnt, svcvtnt_impl,)
3522 FUNCTION (svdiv, svdiv_impl,)
3523 FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
3524 FUNCTION (svdot, svdot_impl,)
3525 FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT,
3526                                             UNSPEC_FDOT, UNSPEC_DOT_LANE_FP8))
3527 FUNCTION (svdup, svdup_impl,)
3528 FUNCTION (svdup_lane, svdup_lane_impl,)
3529 FUNCTION (svdupq, svdupq_impl,)
3530 FUNCTION (svdupq_lane, svdupq_lane_impl,)
3531 FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1))
3532 FUNCTION (sveorv, sveorv_impl,)
3533 FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA))
3534 FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),)
3535 FUNCTION (svextb, svext_bhw_impl, (QImode))
3536 FUNCTION (svexth, svext_bhw_impl, (HImode))
3537 FUNCTION (svextw, svext_bhw_impl, (SImode))
3538 FUNCTION (svget2, svget_impl, (2))
3539 FUNCTION (svget3, svget_impl, (3))
3540 FUNCTION (svget4, svget_impl, (4))
3541 FUNCTION (svindex, svindex_impl,)
3542 FUNCTION (svinsr, svinsr_impl,)
3543 FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA))
3544 FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB))
3545 FUNCTION (svld1, svld1_impl,)
3546 FUNCTION (svld1_gather, svld1_gather_impl,)
3547 FUNCTION (svld1ro, svld1ro_impl,)
3548 FUNCTION (svld1rq, svld1rq_impl,)
3549 FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8))
3550 FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8))
3551 FUNCTION (svld1sh, svld1_extend_impl, (TYPE_SUFFIX_s16))
3552 FUNCTION (svld1sh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s16))
3553 FUNCTION (svld1sw, svld1_extend_impl, (TYPE_SUFFIX_s32))
3554 FUNCTION (svld1sw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s32))
3555 FUNCTION (svld1ub, svld1_extend_impl, (TYPE_SUFFIX_u8))
3556 FUNCTION (svld1ub_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u8))
3557 FUNCTION (svld1uh, svld1_extend_impl, (TYPE_SUFFIX_u16))
3558 FUNCTION (svld1uh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u16))
3559 FUNCTION (svld1uw, svld1_extend_impl, (TYPE_SUFFIX_u32))
3560 FUNCTION (svld1uw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u32))
3561 FUNCTION (svld2, svld234_impl, (2))
3562 FUNCTION (svld3, svld234_impl, (3))
3563 FUNCTION (svld4, svld234_impl, (4))
3564 FUNCTION (svldff1, svldxf1_impl, (UNSPEC_LDFF1))
3565 FUNCTION (svldff1_gather, svldff1_gather_impl,)
3566 FUNCTION (svldff1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDFF1))
3567 FUNCTION (svldff1sb_gather, svldff1_gather_extend, (TYPE_SUFFIX_s8))
3568 FUNCTION (svldff1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDFF1))
3569 FUNCTION (svldff1sh_gather, svldff1_gather_extend, (TYPE_SUFFIX_s16))
3570 FUNCTION (svldff1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDFF1))
3571 FUNCTION (svldff1sw_gather, svldff1_gather_extend, (TYPE_SUFFIX_s32))
3572 FUNCTION (svldff1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDFF1))
3573 FUNCTION (svldff1ub_gather, svldff1_gather_extend, (TYPE_SUFFIX_u8))
3574 FUNCTION (svldff1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDFF1))
3575 FUNCTION (svldff1uh_gather, svldff1_gather_extend, (TYPE_SUFFIX_u16))
3576 FUNCTION (svldff1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDFF1))
3577 FUNCTION (svldff1uw_gather, svldff1_gather_extend, (TYPE_SUFFIX_u32))
3578 FUNCTION (svldnf1, svldxf1_impl, (UNSPEC_LDNF1))
3579 FUNCTION (svldnf1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDNF1))
3580 FUNCTION (svldnf1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDNF1))
3581 FUNCTION (svldnf1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDNF1))
3582 FUNCTION (svldnf1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDNF1))
3583 FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1))
3584 FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1))
3585 FUNCTION (svldnt1, svldnt1_impl,)
3586 FUNCTION (svlen, svlen_impl,)
3587 FUNCTION (svlsl, svlsl_impl,)
3588 FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE))
3589 FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT))
3590 FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE))
3591 FUNCTION (svmad, svmad_impl,)
3592 FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX,
3593                                      UNSPEC_FMAX))
3594 FUNCTION (svmaxnm, cond_or_uncond_unspec_function, (UNSPEC_COND_FMAXNM,
3595                                                     UNSPEC_FMAXNM))
3596 FUNCTION (svmaxnmv, svmaxnmv_impl,)
3597 FUNCTION (svmaxv, svmaxv_impl,)
3598 FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN,
3599                                      UNSPEC_FMIN))
3600 FUNCTION (svminnm, cond_or_uncond_unspec_function, (UNSPEC_COND_FMINNM,
3601                                                     UNSPEC_FMINNM))
3602 FUNCTION (svminnmv, svminnmv_impl,)
3603 FUNCTION (svminv, svminv_impl,)
3604 FUNCTION (svmla, svmla_impl,)
3605 FUNCTION (svmla_lane, svmla_lane_impl,)
3606 FUNCTION (svmls, svmls_impl,)
3607 FUNCTION (svmls_lane, svmls_lane_impl,)
3608 FUNCTION (svmmla, svmmla_impl,)
3609 FUNCTION (svmov, svmov_impl,)
3610 FUNCTION (svmsb, svmsb_impl,)
3611 FUNCTION (svmul, svmul_impl,)
3612 FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),)
3613 FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART,
3614                                           UNSPEC_UMUL_HIGHPART, -1))
3615 FUNCTION (svmulx, unspec_based_function, (-1, -1, UNSPEC_COND_FMULX))
3616 FUNCTION (svnand, svnand_impl,)
3617 FUNCTION (svneg, quiet<rtx_code_function>, (NEG, NEG, UNSPEC_COND_FNEG))
3618 FUNCTION (svnmad, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLA))
3619 FUNCTION (svnmla, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLA))
3620 FUNCTION (svnmls, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLS))
3621 FUNCTION (svnmsb, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLS))
3622 FUNCTION (svnor, svnor_impl,)
3623 FUNCTION (svnot, svnot_impl,)
3624 FUNCTION (svorn, svorn_impl,)
3625 FUNCTION (svorr, rtx_code_function, (IOR, IOR))
3626 FUNCTION (svorv, svorv_impl,)
3627 FUNCTION (svpfalse, svpfalse_impl,)
3628 FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST))
3629 FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT))
3630 FUNCTION (svprfb, svprf_bhwd_impl, (VNx16QImode))
3631 FUNCTION (svprfb_gather, svprf_bhwd_gather_impl, (VNx16QImode))
3632 FUNCTION (svprfd, svprf_bhwd_impl, (VNx2DImode))
3633 FUNCTION (svprfd_gather, svprf_bhwd_gather_impl, (VNx2DImode))
3634 FUNCTION (svprfh, svprf_bhwd_impl, (VNx8HImode))
3635 FUNCTION (svprfh_gather, svprf_bhwd_gather_impl, (VNx8HImode))
3636 FUNCTION (svprfw, svprf_bhwd_impl, (VNx4SImode))
3637 FUNCTION (svprfw_gather, svprf_bhwd_gather_impl, (VNx4SImode))
3638 FUNCTION (svptest_any, svptest_impl, (NE))
3639 FUNCTION (svptest_first, svptest_impl, (LT))
3640 FUNCTION (svptest_last, svptest_impl, (LTU))
3641 FUNCTION (svptrue, svptrue_impl,)
3642 FUNCTION (svptrue_pat, svptrue_pat_impl,)
3643 FUNCTION (svqadd, rtx_code_function, (SS_PLUS, US_PLUS, -1))
3644 FUNCTION (svqdecb, svqdec_bhwd_impl, (QImode))
3645 FUNCTION (svqdecb_pat, svqdec_bhwd_impl, (QImode))
3646 FUNCTION (svqdecd, svqdec_bhwd_impl, (DImode))
3647 FUNCTION (svqdecd_pat, svqdec_bhwd_impl, (DImode))
3648 FUNCTION (svqdech, svqdec_bhwd_impl, (HImode))
3649 FUNCTION (svqdech_pat, svqdec_bhwd_impl, (HImode))
3650 FUNCTION (svqdecp, svqdecp_svqincp_impl, (SS_MINUS, US_MINUS))
3651 FUNCTION (svqdecw, svqdec_bhwd_impl, (SImode))
3652 FUNCTION (svqdecw_pat, svqdec_bhwd_impl, (SImode))
3653 FUNCTION (svqincb, svqinc_bhwd_impl, (QImode))
3654 FUNCTION (svqincb_pat, svqinc_bhwd_impl, (QImode))
3655 FUNCTION (svqincd, svqinc_bhwd_impl, (DImode))
3656 FUNCTION (svqincd_pat, svqinc_bhwd_impl, (DImode))
3657 FUNCTION (svqinch, svqinc_bhwd_impl, (HImode))
3658 FUNCTION (svqinch_pat, svqinc_bhwd_impl, (HImode))
3659 FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, US_PLUS))
3660 FUNCTION (svqincw, svqinc_bhwd_impl, (SImode))
3661 FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode))
3662 FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1))
3663 FUNCTION (svrbit, rtx_code_function, (BITREVERSE, BITREVERSE, -1))
3664 FUNCTION (svrdffr, svrdffr_impl,)
3665 FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE))
3666 FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS))
3667 FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX))
3668 FUNCTION (svreinterpret, svreinterpret_impl,)
3669 FUNCTION (svrev, svrev_impl,)
3670 FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1))
3671 FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1))
3672 FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1))
3673 FUNCTION (svrinta, svrint_impl, (round_optab, UNSPEC_COND_FRINTA))
3674 FUNCTION (svrinti, svrint_impl, (nearbyint_optab, UNSPEC_COND_FRINTI))
3675 FUNCTION (svrintm, svrint_impl, (floor_optab, UNSPEC_COND_FRINTM))
3676 FUNCTION (svrintn, svrint_impl, (roundeven_optab, UNSPEC_COND_FRINTN))
3677 FUNCTION (svrintp, svrint_impl, (ceil_optab, UNSPEC_COND_FRINTP))
3678 FUNCTION (svrintx, svrint_impl, (rint_optab, UNSPEC_COND_FRINTX))
3679 FUNCTION (svrintz, svrint_impl, (btrunc_optab, UNSPEC_COND_FRINTZ))
3680 FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE))
3681 FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS))
3682 FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE))
3683 FUNCTION (svsel, svsel_impl,)
3684 FUNCTION (svset2, svset_impl, (2))
3685 FUNCTION (svset3, svset_impl, (3))
3686 FUNCTION (svset4, svset_impl, (4))
3687 FUNCTION (svsetffr, svsetffr_impl,)
3688 FUNCTION (svsplice, svsplice_impl,)
3689 FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT))
3690 FUNCTION (svst1, svst1_impl,)
3691 FUNCTION (svst1_scatter, svst1_scatter_impl,)
3692 FUNCTION (svst1b, svst1_truncate_impl, (QImode))
3693 FUNCTION (svst1b_scatter, svst1_scatter_truncate_impl, (QImode))
3694 FUNCTION (svst1h, svst1_truncate_impl, (HImode))
3695 FUNCTION (svst1h_scatter, svst1_scatter_truncate_impl, (HImode))
3696 FUNCTION (svst1w, svst1_truncate_impl, (SImode))
3697 FUNCTION (svst1w_scatter, svst1_scatter_truncate_impl, (SImode))
3698 FUNCTION (svst2, svst234_impl, (2))
3699 FUNCTION (svst3, svst234_impl, (3))
3700 FUNCTION (svst4, svst234_impl, (4))
3701 FUNCTION (svstnt1, svstnt1_impl,)
3702 FUNCTION (svsub, svsub_impl,)
3703 FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB))
3704 FUNCTION (svsudot, svusdot_impl, (true))
3705 FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1))
3706 FUNCTION (svtbl, quiet<unspec_based_uncond_function>, (UNSPEC_TBL, UNSPEC_TBL,
3707                                                        UNSPEC_TBL))
3708 FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),)
3709 FUNCTION (svtrn1, svtrn_impl, (0))
3710 FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q,
3711                                            UNSPEC_TRN1Q))
3712 FUNCTION (svtrn2, svtrn_impl, (1))
3713 FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q,
3714                                            UNSPEC_TRN2Q))
3715 FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL))
3716 FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL))
3717 FUNCTION (svundef, svundef_impl, (1))
3718 FUNCTION (svundef2, svundef_impl, (2))
3719 FUNCTION (svundef3, svundef_impl, (3))
3720 FUNCTION (svundef4, svundef_impl, (4))
3721 FUNCTION (svunpkhi, svunpk_impl, (true))
3722 FUNCTION (svunpklo, svunpk_impl, (false))
3723 FUNCTION (svusdot, svusdot_impl, (false))
3724 FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1))
3725 FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1))
3726 FUNCTION (svuzp1, svuzp_impl, (0))
3727 FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q,
3728                                            UNSPEC_UZP1Q))
3729 FUNCTION (svuzp2, svuzp_impl, (1))
3730 FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q,
3731                                            UNSPEC_UZP2Q))
3732 FUNCTION (svwhilele, svwhilelx_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true))
3733 FUNCTION (svwhilelt, svwhilelx_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false))
3734 FUNCTION (svwrffr, svwrffr_impl,)
3735 FUNCTION (svzip1, svzip_impl, (0))
3736 FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q,
3737                                            UNSPEC_ZIP1Q))
3738 FUNCTION (svzip2, svzip_impl, (1))
3739 FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q,
3740                                            UNSPEC_ZIP2Q))
3741 NEON_SVE_BRIDGE_FUNCTION (svget_neonq, svget_neonq_impl,)
3742 NEON_SVE_BRIDGE_FUNCTION (svset_neonq, svset_neonq_impl,)
3743 NEON_SVE_BRIDGE_FUNCTION (svdup_neonq, svdup_neonq_impl,)
3744
3745 } /* end namespace aarch64_sve */