llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt < %s -analyze -cost-model -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s
   3
   4 define void @vector_insert_extract(<vscale x 4 x i32> %v0, <vscale x 16 x i32> %v1, <16 x i32> %v2) {
   5 ; CHECK-LABEL: 'vector_insert_extract'
   6 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %v0, i64 0)
   7 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %insert_fixed_into_scalable = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %v0, <16 x i32> %v2, i64 0)
   8 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %extract_scalable_from_scalable = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %v1, i64 0)
   9 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %insert_scalable_into_scalable = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v1, <vscale x 4 x i32> %v0, i64 0)
  10 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  11 ;
  12   %extract_fixed_from_scalable = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %v0, i64 0)
  13   %insert_fixed_into_scalable = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %v0, <16 x i32> %v2, i64 0)
  14   %extract_scalable_from_scalable = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %v1, i64 0)
  15   %insert_scalable_into_scalable = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v1, <vscale x 4 x i32> %v0, i64 0)
  16   ret void
  17 }
  18 declare <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32>, i64)
  19 declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32>, <16 x i32>, i64)
  20 declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32>, i64)
  21 declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32>, <vscale x 4 x i32>, i64)
  22
  23
  24 define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale x 4 x float> %v2, <vscale x 4 x double> %v3) {
  25 ; CHECK-LABEL: 'reductions'
  26 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0)
  27 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1)
  28 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0)
  29 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1)
  30 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0)
  31 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1)
  32 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0)
  33 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1)
  34 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0)
  35 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1)
  36 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0)
  37 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1)
  38 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0)
  39 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1)
  40 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0)
  41 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1)
  42 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0)
  43 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1)
  44 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2)
  45 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
  46 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
  47 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
  48 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
  49 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
  50 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  51 ;
  52   %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0)
  53   %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1)
  54   %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0)
  55   %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1)
  56   %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0)
  57   %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1)
  58   %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0)
  59   %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1)
  60   %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0)
  61   %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1)
  62   %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0)
  63   %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1)
  64   %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0)
  65   %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1)
  66   %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0)
  67   %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1)
  68   %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0)
  69   %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1)
  70
  71   %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> %v2)
  72   %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> %v3)
  73   %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
  74   %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
  75   %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
  76   %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
  77
  78   ret void
  79 }
  80
  81 define void @strict_fp_reductions(<vscale x 4 x float> %v0, <vscale x 4 x double> %v1) {
  82 ; CHECK-LABEL: 'strict_fp_reductions'
  83 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_nxv4f32 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v0)
  84 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %fadd_nxv4f64 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v1)
  85 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %fmul_nxv4f32 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v0)
  86 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %fmul_nxv4f64 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v1)
  87 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  88 ;
  89   %fadd_nxv4f32 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> %v0)
  90   %fadd_nxv4f64 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> %v1)
  91   %fmul_nxv4f32 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> %v0)
  92   %fmul_nxv4f64 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> %v1)
  93
  94   ret void
  95 }
  96
  97 declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
  98 declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
  99 declare i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32>)
 100 declare i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64>)
 101 declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
 102 declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
 103 declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
 104 declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
 105 declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
 106 declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
 107 declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
 108 declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
 109 declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
 110 declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
 111 declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
 112 declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
 113 declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
 114 declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
 115 declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
 116 declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
 117 declare float @llvm.vector.reduce.fmul.nxv4f32(float, <vscale x 4 x float>)
 118 declare double @llvm.vector.reduce.fmul.nxv4f64(double, <vscale x 4 x double>)
 119 declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
 120 declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
 121 declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
 122 declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
 123
 124
 125 define void  @count_zeroes(<vscale x 4 x i32> %A) {
 126 ; CHECK-LABEL: 'count_zeroes'
 127 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctlz = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
 128 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
 129 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 130 ;
 131   %ctlz = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
 132   %cttz = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
 133   ret void
 134 }
 135 declare <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32>, i1)
 136 declare <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32>, i1)
 137
 138
 139 define void @vector_reverse() #0 {
 140 ; CHECK-LABEL: 'vector_reverse'
 141 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
 142 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
 143 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.experimental.vector.reverse.nxv2i16(<vscale x 2 x i16> undef)
 144 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.experimental.vector.reverse.nxv4i16(<vscale x 4 x i16> undef)
 145 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
 146 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
 147 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
 148 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
 149 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
 150 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
 151 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.experimental.vector.reverse.nxv2f16(<vscale x 2 x half> undef)
 152 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.experimental.vector.reverse.nxv4f16(<vscale x 4 x half> undef)
 153 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
 154 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> undef)
 155 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.experimental.vector.reverse.nxv2f32(<vscale x 2 x float> undef)
 156 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> undef)
 157 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> undef)
 158 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> undef)
 159 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> undef)
 160 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.experimental.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef)
 161 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.experimental.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef)
 162 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef)
 163 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef)
 164 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> undef)
 165 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> undef)
 166 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> undef)
 167 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> undef)
 168 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 169 ;
 170
 171   %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
 172   %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
 173   %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.experimental.vector.reverse.nxv2i16(<vscale x 2 x i16> undef)
 174   %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.experimental.vector.reverse.nxv4i16(<vscale x 4 x i16> undef)
 175   %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
 176   %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
 177   %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
 178   %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
 179   %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
 180   %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
 181   %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.experimental.vector.reverse.nxv2f16(<vscale x 2 x half> undef)
 182   %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.experimental.vector.reverse.nxv4f16(<vscale x 4 x half> undef)
 183   %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
 184   %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> undef)
 185   %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.experimental.vector.reverse.nxv2f32(<vscale x 2 x float> undef)
 186   %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> undef)
 187   %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> undef)
 188   %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> undef)
 189   %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> undef)
 190   %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.experimental.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef)
 191   %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.experimental.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef)
 192   %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef)
 193   %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef)
 194   %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> undef)
 195   %reverse_nxv8i1 =  call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> undef)
 196   %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> undef)
 197   %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> undef)
 198   ret void
 199 }
 200 declare <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8>)
 201 declare <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8>)
 202 declare <vscale x 2 x i16> @llvm.experimental.vector.reverse.nxv2i16(<vscale x 2 x i16>)
 203 declare <vscale x 4 x i16> @llvm.experimental.vector.reverse.nxv4i16(<vscale x 4 x i16>)
 204 declare <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16>)
 205 declare <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16>)
 206 declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32>)
 207 declare <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32>)
 208 declare <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64>)
 209 declare <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64>)
 210 declare <vscale x 2 x half> @llvm.experimental.vector.reverse.nxv2f16(<vscale x 2 x half>)
 211 declare <vscale x 4 x half> @llvm.experimental.vector.reverse.nxv4f16(<vscale x 4 x half>)
 212 declare <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half>)
 213 declare <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half>)
 214 declare <vscale x 2 x float> @llvm.experimental.vector.reverse.nxv2f32(<vscale x 2 x float>)
 215 declare <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float>)
 216 declare <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float>)
 217 declare <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double>)
 218 declare <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double>)
 219 declare <vscale x 2 x bfloat> @llvm.experimental.vector.reverse.nxv2bf16(<vscale x 2 x bfloat>)
 220 declare <vscale x 4 x bfloat> @llvm.experimental.vector.reverse.nxv4bf16(<vscale x 4 x bfloat>)
 221 declare <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat>)
 222 declare <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat>)
 223 declare <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1>)
 224 declare <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1>)
 225 declare <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1>)
 226 declare <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1>)
 227
 228 define void @unsupported_fp_ops(<vscale x 4 x float> %vec) {
 229 ; CHECK-LABEL: 'unsupported_fp_ops'
 230 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %sin = call <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %vec)
 231 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %cos = call <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %vec)
 232 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %pow = call <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %vec, <vscale x 4 x float> %vec)
 233 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 42)
 234 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %exp = call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %vec)
 235 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %exp2 = call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %vec)
 236 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %log = call <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %vec)
 237 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %log2 = call <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %vec)
 238 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %log10 = call <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %vec)
 239 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 240 ;
 241
 242   %sin = call <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %vec)
 243   %cos = call <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %vec)
 244   %pow = call <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %vec, <vscale x 4 x float> %vec)
 245   %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 42)
 246   %exp = call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %vec)
 247   %exp2 = call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %vec)
 248   %log = call <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %vec)
 249   %log2 = call <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %vec)
 250   %log10 = call <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %vec)
 251   ret void
 252 }
 253
 254 declare <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float>)
 255 declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
 256 declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
 257 declare <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float>, i32)
 258 declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>)
 259 declare <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float>)
 260 declare <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float>)
 261 declare <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float>)
 262 declare <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float>)
 263
 264 define void @vector_splice() #0 {
 265 ; CHECK-LABEL: 'vector_splice'
 266 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1)
 267 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1)
 268 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.experimental.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1)
 269 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1)
 270 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1)
 271 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1)
 272 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1)
 273 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1)
 274 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1)
 275 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1)
 276 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv2f16 = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1)
 277 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4f16 = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1)
 278 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv8f16 = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1)
 279 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16f16 = call <vscale x 16 x half> @llvm.experimental.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1)
 280 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv2f32 = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1)
 281 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4f32 = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1)
 282 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8f32 = call <vscale x 8 x float> @llvm.experimental.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1)
 283 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv2f64 = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1)
 284 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4f64 = call <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1)
 285 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.experimental.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1)
 286 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.experimental.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1)
 287 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %splice_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.experimental.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1)
 288 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.experimental.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1)
 289 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1)
 290 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1)
 291 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1)
 292 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1)
 293 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv16i8_neg = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 -1)
 294 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv32i8_neg = call <vscale x 32 x i8> @llvm.experimental.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 -1)
 295 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2i16_neg = call <vscale x 2 x i16> @llvm.experimental.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 -1)
 296 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4i16_neg = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 -1)
 297 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8i16_neg = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 -1)
 298 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16i16_neg = call <vscale x 16 x i16> @llvm.experimental.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 -1)
 299 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4i32_neg = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 -1)
 300 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv8i32_neg = call <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 -1)
 301 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2i64_neg = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 -1)
 302 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv4i64_neg = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 -1)
 303 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2f16_neg = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 -1)
 304 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4f16_neg = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 -1)
 305 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8f16_neg = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 -1)
 306 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16f16_neg = call <vscale x 16 x half> @llvm.experimental.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 -1)
 307 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2f32_neg = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 -1)
 308 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4f32_neg = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 -1)
 309 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv8f32_neg = call <vscale x 8 x float> @llvm.experimental.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 -1)
 310 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2f64_neg = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 -1)
 311 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv4f64_neg = call <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 -1)
 312 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv2bf16_neg = call <vscale x 2 x bfloat> @llvm.experimental.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 -1)
 313 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv4bf16_neg = call <vscale x 4 x bfloat> @llvm.experimental.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1)
 314 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %splice_nxv8bf16_neg = call <vscale x 8 x bfloat> @llvm.experimental.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1)
 315 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %splice_nxv16bf16_neg = call <vscale x 16 x bfloat> @llvm.experimental.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1)
 316 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1)
 317 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv8i1_neg = call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1)
 318 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1)
 319 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1)
 320 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 321 ;
 322
 323   %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1)
 324   %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1)
 325   %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.experimental.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1)
 326   %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1)
 327   %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1)
 328   %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1)
 329   %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1)
 330   %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1)
 331   %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1)
 332   %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1)
 333   %splice_nxv2f16 = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 1)
 334   %splice_nxv4f16 = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 1)
 335   %splice_nxv8f16 = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 1)
 336   %splice_nxv16f16 = call <vscale x 16 x half> @llvm.experimental.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 1)
 337   %splice_nxv2f32 = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 1)
 338   %splice_nxv4f32 = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 1)
 339   %splice_nxv8f32 = call <vscale x 8 x float> @llvm.experimental.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 1)
 340   %splice_nxv2f64 = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 1)
 341   %splice_nxv4f64 = call <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 1)
 342   %splice_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.experimental.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 1)
 343   %splice_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.experimental.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 1)
 344   %splice_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.experimental.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 1)
 345   %splice_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.experimental.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 1)
 346   %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1)
 347   %splice_nxv8i1 =  call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1)
 348   %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1)
 349   %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1)
 350 ;; negative Index
 351   %splice_nxv16i8_neg = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 -1)
 352   %splice_nxv32i8_neg = call <vscale x 32 x i8> @llvm.experimental.vector.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 -1)
 353   %splice_nxv2i16_neg = call <vscale x 2 x i16> @llvm.experimental.vector.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 -1)
 354   %splice_nxv4i16_neg = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 -1)
 355   %splice_nxv8i16_neg = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 -1)
 356   %splice_nxv16i16_neg = call <vscale x 16 x i16> @llvm.experimental.vector.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 -1)
 357   %splice_nxv4i32_neg = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 -1)
 358   %splice_nxv8i32_neg = call <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 -1)
 359   %splice_nxv2i64_neg= call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 -1)
 360   %splice_nxv4i64_neg = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 -1)
 361   %splice_nxv2f16_neg = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 -1)
 362   %splice_nxv4f16_neg = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 -1)
 363   %splice_nxv8f16_neg = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x half> zeroinitializer, i32 -1)
 364   %splice_nxv16f16_neg = call <vscale x 16 x half> @llvm.experimental.vector.splice.nxv16f16(<vscale x 16 x half> zeroinitializer, <vscale x 16 x half> zeroinitializer, i32 -1)
 365   %splice_nxv2f32_neg = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x float> zeroinitializer, i32 -1)
 366   %splice_nxv4f32_neg = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i32 -1)
 367   %splice_nxv8f32_neg = call <vscale x 8 x float> @llvm.experimental.vector.splice.nxv8f32(<vscale x 8 x float> zeroinitializer, <vscale x 8 x float> zeroinitializer, i32 -1)
 368   %splice_nxv2f64_neg = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer, i32 -1)
 369   %splice_nxv4f64_neg = call <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double> zeroinitializer, <vscale x 4 x double> zeroinitializer, i32 -1)
 370   %splice_nxv2bf16_neg = call <vscale x 2 x bfloat> @llvm.experimental.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 -1)
 371   %splice_nxv4bf16_neg = call <vscale x 4 x bfloat> @llvm.experimental.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1)
 372   %splice_nxv8bf16_neg = call <vscale x 8 x bfloat> @llvm.experimental.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1)
 373   %splice_nxv16bf16_neg = call <vscale x 16 x bfloat> @llvm.experimental.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1)
 374   %splice_nxv16i1_neg = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 -1)
 375   %splice_nxv8i1_neg =  call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 -1)
 376   %splice_nxv4i1_neg = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 -1)
 377   %splice_nxv2i1_neg = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 -1)
 378   ret void
 379 }
 380
 381 declare <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, i32)
 382 declare <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, i32)
 383 declare <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, i32)
 384 declare <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i32)
 385 declare <vscale x 2 x i8> @llvm.experimental.vector.splice.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, i32)
 386 declare <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
 387 declare <vscale x 32 x i8> @llvm.experimental.vector.splice.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, i32)
 388 declare <vscale x 2 x i16> @llvm.experimental.vector.splice.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, i32)
 389 declare <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, i32)
 390 declare <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
 391 declare <vscale x 16 x i16> @llvm.experimental.vector.splice.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, i32)
 392 declare <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
 393 declare <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, i32)
 394 declare <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
 395 declare <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, i32)
 396 declare <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, i32)
 397 declare <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, i32)
 398 declare <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
 399 declare <vscale x 16 x half> @llvm.experimental.vector.splice.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, i32)
 400 declare <vscale x 2 x bfloat> @llvm.experimental.vector.splice.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i32)
 401 declare <vscale x 4 x bfloat> @llvm.experimental.vector.splice.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i32)
 402 declare <vscale x 8 x bfloat> @llvm.experimental.vector.splice.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
 403 declare <vscale x 16 x bfloat> @llvm.experimental.vector.splice.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, i32)
 404 declare <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i32)
 405 declare <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
 406 declare <vscale x 8 x float> @llvm.experimental.vector.splice.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, i32)
 407 declare <vscale x 16 x float> @llvm.experimental.vector.splice.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, i32)
 408 declare <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
 409 declare <vscale x 4 x double> @llvm.experimental.vector.splice.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, i32)
 410
 411 attributes #0 = { "target-features"="+sve,+bf16" }