llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s --check-prefixes=CHECK,ARGBASED
   3 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin --type-based-intrinsic-cost=true | FileCheck %s --check-prefixes=CHECK,TYPEBASED
   4
   5 define void @unsupported_fp_ops(<vscale x 4 x float> %vec, i32 %extraarg) {
   6 ; CHECK-LABEL: 'unsupported_fp_ops'
   7 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %pow = call <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %vec, <vscale x 4 x float> %vec)
   8 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 %extraarg)
   9 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  10 ;
  11   %pow = call <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %vec, <vscale x 4 x float> %vec)
  12   %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 %extraarg)
  13   ret void
  14 }
  15
  16 define void @powi(<vscale x 4 x float> %vec) {
  17 ; ARGBASED-LABEL: 'powi'
  18 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 42)
  19 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  20 ;
  21 ; TYPEBASED-LABEL: 'powi'
  22 ; TYPEBASED-NEXT:  Cost Model: Invalid cost for instruction: %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 42)
  23 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  24 ;
  25   %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float> %vec, i32 42)
  26   ret void
  27 }
  28
  29 define void @fshr(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c) {
  30 ; CHECK-LABEL: 'fshr'
  31 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <vscale x 1 x i32> @llvm.fshr.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
  32 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  33 ;
  34   call <vscale x 1 x i32> @llvm.fshr.nxv4i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
  35   ret void
  36 }
  37
  38 define void @fshl(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c) {
  39 ; CHECK-LABEL: 'fshl'
  40 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <vscale x 1 x i32> @llvm.fshl.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
  41 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  42 ;
  43   call <vscale x 1 x i32> @llvm.fshl.nxv4i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
  44   ret void
  45 }
  46
  47 define void @vp_fshr() {
  48 ; CHECK-LABEL: 'vp_fshr'
  49 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x i8> @llvm.vp.fshr.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
  50 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x i8> @llvm.vp.fshr.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
  51 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x i8> @llvm.vp.fshr.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
  52 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x i8> @llvm.vp.fshr.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
  53 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.fshr.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
  54 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.fshr.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
  55 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.fshr.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
  56 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.fshr.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
  57 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
  58 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.fshr.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
  59 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.fshr.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
  60 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x i16> @llvm.vp.fshr.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
  61 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x i16> @llvm.vp.fshr.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
  62 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x i16> @llvm.vp.fshr.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
  63 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x i16> @llvm.vp.fshr.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
  64 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.fshr.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
  65 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.fshr.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
  66 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.fshr.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
  67 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
  68 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.fshr.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
  69 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.fshr.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
  70 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <2 x i32> @llvm.vp.fshr.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
  71 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <4 x i32> @llvm.vp.fshr.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
  72 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <8 x i32> @llvm.vp.fshr.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
  73 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <16 x i32> @llvm.vp.fshr.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
  74 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 1 x i32> @llvm.vp.fshr.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
  75 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %27 = call <vscale x 2 x i32> @llvm.vp.fshr.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
  76 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %28 = call <vscale x 4 x i32> @llvm.vp.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
  77 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %29 = call <vscale x 8 x i32> @llvm.vp.fshr.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
  78 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %30 = call <vscale x 16 x i32> @llvm.vp.fshr.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
  79 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %31 = call <2 x i64> @llvm.vp.fshr.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
  80 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %32 = call <4 x i64> @llvm.vp.fshr.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
  81 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %33 = call <8 x i64> @llvm.vp.fshr.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
  82 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %34 = call <16 x i64> @llvm.vp.fshr.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
  83 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %35 = call <vscale x 1 x i64> @llvm.vp.fshr.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
  84 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %36 = call <vscale x 2 x i64> @llvm.vp.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
  85 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %37 = call <vscale x 4 x i64> @llvm.vp.fshr.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
  86 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %38 = call <vscale x 8 x i64> @llvm.vp.fshr.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
  87 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  88 ;
  89   call <2 x i8> @llvm.vp.fshr.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
  90   call <4 x i8> @llvm.vp.fshr.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
  91   call <8 x i8> @llvm.vp.fshr.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
  92   call <16 x i8> @llvm.vp.fshr.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
  93   call <vscale x 1 x i8> @llvm.vp.fshr.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
  94   call <vscale x 2 x i8> @llvm.vp.fshr.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
  95   call <vscale x 4 x i8> @llvm.vp.fshr.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
  96   call <vscale x 8 x i8> @llvm.vp.fshr.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
  97   call <vscale x 16 x i8> @llvm.vp.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
  98   call <vscale x 32 x i8> @llvm.vp.fshr.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
  99   call <vscale x 64 x i8> @llvm.vp.fshr.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
 100   call <2 x i16> @llvm.vp.fshr.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 101   call <4 x i16> @llvm.vp.fshr.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 102   call <8 x i16> @llvm.vp.fshr.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 103   call <16 x i16> @llvm.vp.fshr.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 104   call <vscale x 1 x i16> @llvm.vp.fshr.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 105   call <vscale x 2 x i16> @llvm.vp.fshr.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 106   call <vscale x 4 x i16> @llvm.vp.fshr.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 107   call <vscale x 8 x i16> @llvm.vp.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 108   call <vscale x 16 x i16> @llvm.vp.fshr.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 109   call <vscale x 32 x i16> @llvm.vp.fshr.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
 110   call <2 x i32> @llvm.vp.fshr.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 111   call <4 x i32> @llvm.vp.fshr.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 112   call <8 x i32> @llvm.vp.fshr.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 113   call <16 x i32> @llvm.vp.fshr.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 114   call <vscale x 1 x i32> @llvm.vp.fshr.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
 115   call <vscale x 2 x i32> @llvm.vp.fshr.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 116   call <vscale x 4 x i32> @llvm.vp.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 117   call <vscale x 8 x i32> @llvm.vp.fshr.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 118   call <vscale x 16 x i32> @llvm.vp.fshr.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 119   call <2 x i64> @llvm.vp.fshr.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 120   call <4 x i64> @llvm.vp.fshr.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 121   call <8 x i64> @llvm.vp.fshr.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 122   call <16 x i64> @llvm.vp.fshr.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 123   call <vscale x 1 x i64> @llvm.vp.fshr.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 124   call <vscale x 2 x i64> @llvm.vp.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 125   call <vscale x 4 x i64> @llvm.vp.fshr.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 126   call <vscale x 8 x i64> @llvm.vp.fshr.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 127   ret void
 128 }
 129
 130 define void @vp_fshl() {
 131 ; CHECK-LABEL: 'vp_fshl'
 132 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x i8> @llvm.vp.fshl.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 133 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x i8> @llvm.vp.fshl.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 134 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x i8> @llvm.vp.fshl.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 135 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x i8> @llvm.vp.fshl.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 136 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.fshl.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
 137 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.fshl.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 138 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.fshl.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 139 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.fshl.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 140 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 141 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.fshl.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
 142 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.fshl.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
 143 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x i16> @llvm.vp.fshl.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 144 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x i16> @llvm.vp.fshl.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 145 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x i16> @llvm.vp.fshl.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 146 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x i16> @llvm.vp.fshl.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 147 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.fshl.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 148 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.fshl.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 149 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.fshl.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 150 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 151 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.fshl.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 152 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.fshl.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
 153 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <2 x i32> @llvm.vp.fshl.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 154 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <4 x i32> @llvm.vp.fshl.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 155 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <8 x i32> @llvm.vp.fshl.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 156 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <16 x i32> @llvm.vp.fshl.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 157 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 1 x i32> @llvm.vp.fshl.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
 158 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %27 = call <vscale x 2 x i32> @llvm.vp.fshl.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 159 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %28 = call <vscale x 4 x i32> @llvm.vp.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 160 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %29 = call <vscale x 8 x i32> @llvm.vp.fshl.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 161 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %30 = call <vscale x 16 x i32> @llvm.vp.fshl.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 162 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %31 = call <2 x i64> @llvm.vp.fshl.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 163 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %32 = call <4 x i64> @llvm.vp.fshl.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 164 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %33 = call <8 x i64> @llvm.vp.fshl.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 165 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %34 = call <16 x i64> @llvm.vp.fshl.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 166 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %35 = call <vscale x 1 x i64> @llvm.vp.fshl.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 167 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %36 = call <vscale x 2 x i64> @llvm.vp.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 168 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %37 = call <vscale x 4 x i64> @llvm.vp.fshl.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 169 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %38 = call <vscale x 8 x i64> @llvm.vp.fshl.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 170 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 171 ;
 172   call <2 x i8> @llvm.vp.fshl.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 173   call <4 x i8> @llvm.vp.fshl.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 174   call <8 x i8> @llvm.vp.fshl.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 175   call <16 x i8> @llvm.vp.fshl.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 176   call <vscale x 1 x i8> @llvm.vp.fshl.nxv1i8(<vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i8> undef, <vscale x 1 x i1> undef, i32 undef)
 177   call <vscale x 2 x i8> @llvm.vp.fshl.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 178   call <vscale x 4 x i8> @llvm.vp.fshl.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 179   call <vscale x 8 x i8> @llvm.vp.fshl.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 180   call <vscale x 16 x i8> @llvm.vp.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 181   call <vscale x 32 x i8> @llvm.vp.fshl.nxv32i8(<vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i8> undef, <vscale x 32 x i1> undef, i32 undef)
 182   call <vscale x 64 x i8> @llvm.vp.fshl.nxv64i8(<vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i8> undef, <vscale x 64 x i1> undef, i32 undef)
 183   call <2 x i16> @llvm.vp.fshl.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 184   call <4 x i16> @llvm.vp.fshl.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 185   call <8 x i16> @llvm.vp.fshl.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 186   call <16 x i16> @llvm.vp.fshl.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 187   call <vscale x 1 x i16> @llvm.vp.fshl.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 188   call <vscale x 2 x i16> @llvm.vp.fshl.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 189   call <vscale x 4 x i16> @llvm.vp.fshl.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 190   call <vscale x 8 x i16> @llvm.vp.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 191   call <vscale x 16 x i16> @llvm.vp.fshl.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 192   call <vscale x 32 x i16> @llvm.vp.fshl.nxv32i16(<vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i16> undef, <vscale x 32 x i1> undef, i32 undef)
 193   call <2 x i32> @llvm.vp.fshl.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 194   call <4 x i32> @llvm.vp.fshl.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 195   call <8 x i32> @llvm.vp.fshl.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 196   call <16 x i32> @llvm.vp.fshl.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 197   call <vscale x 1 x i32> @llvm.vp.fshl.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
 198   call <vscale x 2 x i32> @llvm.vp.fshl.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 199   call <vscale x 4 x i32> @llvm.vp.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 200   call <vscale x 8 x i32> @llvm.vp.fshl.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 201   call <vscale x 16 x i32> @llvm.vp.fshl.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 202   call <2 x i64> @llvm.vp.fshl.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 203   call <4 x i64> @llvm.vp.fshl.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 204   call <8 x i64> @llvm.vp.fshl.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 205   call <16 x i64> @llvm.vp.fshl.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 206   call <vscale x 1 x i64> @llvm.vp.fshl.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 207   call <vscale x 2 x i64> @llvm.vp.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 208   call <vscale x 4 x i64> @llvm.vp.fshl.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 209   call <vscale x 8 x i64> @llvm.vp.fshl.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 210   ret void
 211 }
 212
 213 define void @add() {
 214 ; CHECK-LABEL: 'add'
 215 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 216 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t1 = add <2 x i8> undef, undef
 217 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 218 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t3 = add <4 x i8> undef, undef
 219 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 220 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t5 = add <8 x i8> undef, undef
 221 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 222 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t7 = add <16 x i8> undef, undef
 223 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 224 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t9 = add <2 x i16> undef, undef
 225 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 226 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t11 = add <4 x i16> undef, undef
 227 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 228 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t13 = add <8 x i16> undef, undef
 229 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 230 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t15 = add <16 x i16> undef, undef
 231 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 232 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t17 = add <2 x i32> undef, undef
 233 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 234 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t19 = add <4 x i32> undef, undef
 235 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 236 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t21 = add <8 x i32> undef, undef
 237 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 238 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t23 = add <16 x i32> undef, undef
 239 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 240 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t25 = add <2 x i64> undef, undef
 241 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 242 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t27 = add <4 x i64> undef, undef
 243 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 244 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t29 = add <8 x i64> undef, undef
 245 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 246 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t31 = add <16 x i64> undef, undef
 247 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 248 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t33 = add <vscale x 2 x i8> undef, undef
 249 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 250 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t35 = add <vscale x 4 x i8> undef, undef
 251 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 252 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t37 = add <vscale x 8 x i8> undef, undef
 253 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 254 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t39 = add <vscale x 16 x i8> undef, undef
 255 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 256 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t41 = add <vscale x 2 x i16> undef, undef
 257 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 258 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t43 = add <vscale x 4 x i16> undef, undef
 259 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 260 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t45 = add <vscale x 8 x i16> undef, undef
 261 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 262 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t47 = add <vscale x 16 x i16> undef, undef
 263 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 264 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t49 = add <vscale x 2 x i32> undef, undef
 265 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 266 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t51 = add <vscale x 4 x i32> undef, undef
 267 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 268 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t53 = add <vscale x 8 x i32> undef, undef
 269 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 270 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t55 = add <vscale x 16 x i32> undef, undef
 271 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 272 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t57 = add <vscale x 2 x i64> undef, undef
 273 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 274 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t59 = add <vscale x 4 x i64> undef, undef
 275 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 276 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t61 = add <vscale x 8 x i64> undef, undef
 277 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.add.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 278 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t63 = add <vscale x 16 x i64> undef, undef
 279 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 280 ;
 281   %t0 = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 282   %t1 = add <2 x i8> undef, undef
 283   %t2 = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 284   %t3 = add <4 x i8> undef, undef
 285   %t4 = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 286   %t5 = add <8 x i8> undef, undef
 287   %t6 = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 288   %t7 = add <16 x i8> undef, undef
 289   %t8 = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 290   %t9 = add <2 x i16> undef, undef
 291   %t10 = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 292   %t11 = add <4 x i16> undef, undef
 293   %t12 = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 294   %t13 = add <8 x i16> undef, undef
 295   %t14 = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 296   %t15 = add <16 x i16> undef, undef
 297   %t16 = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 298   %t17 = add <2 x i32> undef, undef
 299   %t18 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 300   %t19 = add <4 x i32> undef, undef
 301   %t20 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 302   %t21 = add <8 x i32> undef, undef
 303   %t22 = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 304   %t23 = add <16 x i32> undef, undef
 305   %t24 = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 306   %t25 = add <2 x i64> undef, undef
 307   %t26 = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 308   %t27 = add <4 x i64> undef, undef
 309   %t28 = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 310   %t29 = add <8 x i64> undef, undef
 311   %t30 = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 312   %t31 = add <16 x i64> undef, undef
 313   %t32 = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 314   %t33 = add <vscale x 2 x i8> undef, undef
 315   %t34 = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 316   %t35 = add <vscale x 4 x i8> undef, undef
 317   %t36 = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 318   %t37 = add <vscale x 8 x i8> undef, undef
 319   %t38 = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 320   %t39 = add <vscale x 16 x i8> undef, undef
 321   %t40 = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 322   %t41 = add <vscale x 2 x i16> undef, undef
 323   %t42 = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 324   %t43 = add <vscale x 4 x i16> undef, undef
 325   %t44 = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 326   %t45 = add <vscale x 8 x i16> undef, undef
 327   %t46 = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 328   %t47 = add <vscale x 16 x i16> undef, undef
 329   %t48 = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 330   %t49 = add <vscale x 2 x i32> undef, undef
 331   %t50 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 332   %t51 = add <vscale x 4 x i32> undef, undef
 333   %t52 = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 334   %t53 = add <vscale x 8 x i32> undef, undef
 335   %t54 = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 336   %t55 = add <vscale x 16 x i32> undef, undef
 337   %t56 = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 338   %t57 = add <vscale x 2 x i64> undef, undef
 339   %t58 = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 340   %t59 = add <vscale x 4 x i64> undef, undef
 341   %t60 = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 342   %t61 = add <vscale x 8 x i64> undef, undef
 343   %t62 = call <vscale x 16 x i64> @llvm.vp.add.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 344   %t63 = add <vscale x 16 x i64> undef, undef
 345   ret void
 346 }
 347
 348 define void @and() {
 349 ; CHECK-LABEL: 'and'
 350 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 351 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t1 = and <2 x i8> undef, undef
 352 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.and.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 353 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t3 = and <4 x i8> undef, undef
 354 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.and.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 355 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t5 = and <8 x i8> undef, undef
 356 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.and.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 357 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t7 = and <16 x i8> undef, undef
 358 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.and.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 359 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t9 = and <2 x i16> undef, undef
 360 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.and.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 361 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t11 = and <4 x i16> undef, undef
 362 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.and.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 363 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t13 = and <8 x i16> undef, undef
 364 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.and.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 365 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t15 = and <16 x i16> undef, undef
 366 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.and.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 367 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t17 = and <2 x i32> undef, undef
 368 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.and.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 369 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t19 = and <4 x i32> undef, undef
 370 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 371 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t21 = and <8 x i32> undef, undef
 372 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.and.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 373 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t23 = and <16 x i32> undef, undef
 374 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.and.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 375 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t25 = and <2 x i64> undef, undef
 376 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.and.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 377 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t27 = and <4 x i64> undef, undef
 378 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.and.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 379 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t29 = and <8 x i64> undef, undef
 380 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.and.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 381 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t31 = and <16 x i64> undef, undef
 382 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.and.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 383 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t33 = and <vscale x 2 x i8> undef, undef
 384 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.and.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 385 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t35 = and <vscale x 4 x i8> undef, undef
 386 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.and.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 387 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t37 = and <vscale x 8 x i8> undef, undef
 388 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.and.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 389 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t39 = and <vscale x 16 x i8> undef, undef
 390 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.and.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 391 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t41 = and <vscale x 2 x i16> undef, undef
 392 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.and.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 393 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t43 = and <vscale x 4 x i16> undef, undef
 394 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.and.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 395 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t45 = and <vscale x 8 x i16> undef, undef
 396 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.and.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 397 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t47 = and <vscale x 16 x i16> undef, undef
 398 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.and.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 399 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t49 = and <vscale x 2 x i32> undef, undef
 400 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 401 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t51 = and <vscale x 4 x i32> undef, undef
 402 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.and.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 403 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t53 = and <vscale x 8 x i32> undef, undef
 404 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.and.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 405 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t55 = and <vscale x 16 x i32> undef, undef
 406 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.and.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 407 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t57 = and <vscale x 2 x i64> undef, undef
 408 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.and.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 409 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t59 = and <vscale x 4 x i64> undef, undef
 410 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.and.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 411 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t61 = and <vscale x 8 x i64> undef, undef
 412 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.and.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 413 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t63 = and <vscale x 16 x i64> undef, undef
 414 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 415 ;
 416   %t0 = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 417   %t1 = and <2 x i8> undef, undef
 418   %t2 = call <4 x i8> @llvm.vp.and.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 419   %t3 = and <4 x i8> undef, undef
 420   %t4 = call <8 x i8> @llvm.vp.and.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 421   %t5 = and <8 x i8> undef, undef
 422   %t6 = call <16 x i8> @llvm.vp.and.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 423   %t7 = and <16 x i8> undef, undef
 424   %t8 = call <2 x i16> @llvm.vp.and.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 425   %t9 = and <2 x i16> undef, undef
 426   %t10 = call <4 x i16> @llvm.vp.and.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 427   %t11 = and <4 x i16> undef, undef
 428   %t12 = call <8 x i16> @llvm.vp.and.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 429   %t13 = and <8 x i16> undef, undef
 430   %t14 = call <16 x i16> @llvm.vp.and.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 431   %t15 = and <16 x i16> undef, undef
 432   %t16 = call <2 x i32> @llvm.vp.and.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 433   %t17 = and <2 x i32> undef, undef
 434   %t18 = call <4 x i32> @llvm.vp.and.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 435   %t19 = and <4 x i32> undef, undef
 436   %t20 = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 437   %t21 = and <8 x i32> undef, undef
 438   %t22 = call <16 x i32> @llvm.vp.and.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 439   %t23 = and <16 x i32> undef, undef
 440   %t24 = call <2 x i64> @llvm.vp.and.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 441   %t25 = and <2 x i64> undef, undef
 442   %t26 = call <4 x i64> @llvm.vp.and.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 443   %t27 = and <4 x i64> undef, undef
 444   %t28 = call <8 x i64> @llvm.vp.and.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 445   %t29 = and <8 x i64> undef, undef %t30 = call <16 x i64> @llvm.vp.and.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 446   %t31 = and <16 x i64> undef, undef
 447   %t32 = call <vscale x 2 x i8> @llvm.vp.and.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 448   %t33 = and <vscale x 2 x i8> undef, undef
 449   %t34 = call <vscale x 4 x i8> @llvm.vp.and.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 450   %t35 = and <vscale x 4 x i8> undef, undef
 451   %t36 = call <vscale x 8 x i8> @llvm.vp.and.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 452   %t37 = and <vscale x 8 x i8> undef, undef
 453   %t38 = call <vscale x 16 x i8> @llvm.vp.and.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 454   %t39 = and <vscale x 16 x i8> undef, undef
 455   %t40 = call <vscale x 2 x i16> @llvm.vp.and.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 456   %t41 = and <vscale x 2 x i16> undef, undef
 457   %t42 = call <vscale x 4 x i16> @llvm.vp.and.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 458   %t43 = and <vscale x 4 x i16> undef, undef
 459   %t44 = call <vscale x 8 x i16> @llvm.vp.and.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 460   %t45 = and <vscale x 8 x i16> undef, undef
 461   %t46 = call <vscale x 16 x i16> @llvm.vp.and.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 462   %t47 = and <vscale x 16 x i16> undef, undef
 463   %t48 = call <vscale x 2 x i32> @llvm.vp.and.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 464   %t49 = and <vscale x 2 x i32> undef, undef
 465   %t50 = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 466   %t51 = and <vscale x 4 x i32> undef, undef
 467   %t52 = call <vscale x 8 x i32> @llvm.vp.and.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 468   %t53 = and <vscale x 8 x i32> undef, undef
 469   %t54 = call <vscale x 16 x i32> @llvm.vp.and.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 470   %t55 = and <vscale x 16 x i32> undef, undef
 471   %t56 = call <vscale x 2 x i64> @llvm.vp.and.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 472   %t57 = and <vscale x 2 x i64> undef, undef
 473   %t58 = call <vscale x 4 x i64> @llvm.vp.and.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 474   %t59 = and <vscale x 4 x i64> undef, undef
 475   %t60 = call <vscale x 8 x i64> @llvm.vp.and.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 476   %t61 = and <vscale x 8 x i64> undef, undef
 477   %t62 = call <vscale x 16 x i64> @llvm.vp.and.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 478   %t63 = and <vscale x 16 x i64> undef, undef
 479   ret void
 480 }
 481
 482
 483 define void @smax() {
 484 ; CHECK-LABEL: 'smax'
 485 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 486 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 487 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 488 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 489 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 490 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 491 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 492 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 493 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 494 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 495 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 496 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 497 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 498 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 499 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 500 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 501 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.smax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 502 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.smax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 503 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.smax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 504 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.smax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 505 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.smax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 506 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.smax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 507 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.smax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 508 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.smax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 509 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.smax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 510 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.smax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 511 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.smax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 512 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.smax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 513 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.smax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 514 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.smax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 515 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.smax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 516 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.smax.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 517 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 518 ;
 519   %t0 = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 520   %t2 = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 521   %t4 = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 522   %t6 = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 523   %t8 = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 524   %t10 = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 525   %t12 = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 526   %t14 = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 527   %t16 = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 528   %t18 = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 529   %t20 = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 530   %t22 = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 531   %t24 = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 532   %t26 = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 533   %t28 = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 534   %t30 = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 535   %t32 = call <vscale x 2 x i8> @llvm.vp.smax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 536   %t34 = call <vscale x 4 x i8> @llvm.vp.smax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 537   %t36 = call <vscale x 8 x i8> @llvm.vp.smax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 538   %t38 = call <vscale x 16 x i8> @llvm.vp.smax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 539   %t40 = call <vscale x 2 x i16> @llvm.vp.smax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 540   %t42 = call <vscale x 4 x i16> @llvm.vp.smax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 541   %t44 = call <vscale x 8 x i16> @llvm.vp.smax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 542   %t46 = call <vscale x 16 x i16> @llvm.vp.smax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 543   %t48 = call <vscale x 2 x i32> @llvm.vp.smax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 544   %t50 = call <vscale x 4 x i32> @llvm.vp.smax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 545   %t52 = call <vscale x 8 x i32> @llvm.vp.smax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 546   %t54 = call <vscale x 16 x i32> @llvm.vp.smax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 547   %t56 = call <vscale x 2 x i64> @llvm.vp.smax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 548   %t58 = call <vscale x 4 x i64> @llvm.vp.smax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 549   %t60 = call <vscale x 8 x i64> @llvm.vp.smax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 550   %t62 = call <vscale x 16 x i64> @llvm.vp.smax.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 551   ret void
 552 }
 553
 554
 555 define void @smin() {
 556 ; CHECK-LABEL: 'smin'
 557 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 558 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 559 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 560 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 561 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 562 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 563 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 564 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 565 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 566 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 567 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 568 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 569 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 570 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 571 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 572 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 573 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.smin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 574 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.smin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 575 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.smin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 576 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.smin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 577 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.smin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 578 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.smin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 579 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.smin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 580 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.smin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 581 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.smin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 582 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.smin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 583 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.smin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 584 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.smin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 585 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.smin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 586 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.smin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 587 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.smin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 588 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.smin.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 589 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 590 ;
 591   %t0 = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 592   %t2 = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 593   %t4 = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 594   %t6 = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 595   %t8 = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 596   %t10 = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 597   %t12 = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 598   %t14 = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 599   %t16 = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 600   %t18 = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 601   %t20 = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 602   %t22 = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 603   %t24 = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 604   %t26 = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 605   %t28 = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 606   %t30 = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 607   %t32 = call <vscale x 2 x i8> @llvm.vp.smin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 608   %t34 = call <vscale x 4 x i8> @llvm.vp.smin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 609   %t36 = call <vscale x 8 x i8> @llvm.vp.smin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 610   %t38 = call <vscale x 16 x i8> @llvm.vp.smin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 611   %t40 = call <vscale x 2 x i16> @llvm.vp.smin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 612   %t42 = call <vscale x 4 x i16> @llvm.vp.smin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 613   %t44 = call <vscale x 8 x i16> @llvm.vp.smin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 614   %t46 = call <vscale x 16 x i16> @llvm.vp.smin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 615   %t48 = call <vscale x 2 x i32> @llvm.vp.smin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 616   %t50 = call <vscale x 4 x i32> @llvm.vp.smin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 617   %t52 = call <vscale x 8 x i32> @llvm.vp.smin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 618   %t54 = call <vscale x 16 x i32> @llvm.vp.smin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 619   %t56 = call <vscale x 2 x i64> @llvm.vp.smin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 620   %t58 = call <vscale x 4 x i64> @llvm.vp.smin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 621   %t60 = call <vscale x 8 x i64> @llvm.vp.smin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 622   %t62 = call <vscale x 16 x i64> @llvm.vp.smin.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 623   ret void
 624 }
 625
 626 define void @umax() {
 627 ; CHECK-LABEL: 'umax'
 628 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 629 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 630 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 631 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 632 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 633 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 634 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 635 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 636 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 637 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 638 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 639 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 640 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 641 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 642 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 643 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 644 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.umax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 645 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.umax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 646 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.umax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 647 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.umax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 648 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.umax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 649 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.umax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 650 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.umax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 651 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.umax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 652 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.umax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 653 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.umax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 654 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.umax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 655 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.umax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 656 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.umax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 657 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.umax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 658 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.umax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 659 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.umax.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 660 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 661 ;
 662   %t0 = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 663   %t2 = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 664   %t4 = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 665   %t6 = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 666   %t8 = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 667   %t10 = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 668   %t12 = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 669   %t14 = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 670   %t16 = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 671   %t18 = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 672   %t20 = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 673   %t22 = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 674   %t24 = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 675   %t26 = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 676   %t28 = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 677   %t30 = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 678   %t32 = call <vscale x 2 x i8> @llvm.vp.umax.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 679   %t34 = call <vscale x 4 x i8> @llvm.vp.umax.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 680   %t36 = call <vscale x 8 x i8> @llvm.vp.umax.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 681   %t38 = call <vscale x 16 x i8> @llvm.vp.umax.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 682   %t40 = call <vscale x 2 x i16> @llvm.vp.umax.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 683   %t42 = call <vscale x 4 x i16> @llvm.vp.umax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 684   %t44 = call <vscale x 8 x i16> @llvm.vp.umax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 685   %t46 = call <vscale x 16 x i16> @llvm.vp.umax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 686   %t48 = call <vscale x 2 x i32> @llvm.vp.umax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 687   %t50 = call <vscale x 4 x i32> @llvm.vp.umax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 688   %t52 = call <vscale x 8 x i32> @llvm.vp.umax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 689   %t54 = call <vscale x 16 x i32> @llvm.vp.umax.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 690   %t56 = call <vscale x 2 x i64> @llvm.vp.umax.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 691   %t58 = call <vscale x 4 x i64> @llvm.vp.umax.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 692   %t60 = call <vscale x 8 x i64> @llvm.vp.umax.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 693   %t62 = call <vscale x 16 x i64> @llvm.vp.umax.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 694   ret void
 695 }
 696
 697 define void @umin() {
 698 ; CHECK-LABEL: 'umin'
 699 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 700 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 701 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 702 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 703 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 704 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t10 = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 705 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t12 = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 706 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t14 = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 707 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t16 = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 708 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 709 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t20 = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 710 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t22 = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 711 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t24 = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 712 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 713 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 714 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 715 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t32 = call <vscale x 2 x i8> @llvm.vp.umin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 716 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t34 = call <vscale x 4 x i8> @llvm.vp.umin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 717 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t36 = call <vscale x 8 x i8> @llvm.vp.umin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 718 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t38 = call <vscale x 16 x i8> @llvm.vp.umin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 719 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t40 = call <vscale x 2 x i16> @llvm.vp.umin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 720 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t42 = call <vscale x 4 x i16> @llvm.vp.umin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 721 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t44 = call <vscale x 8 x i16> @llvm.vp.umin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 722 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t46 = call <vscale x 16 x i16> @llvm.vp.umin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 723 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t48 = call <vscale x 2 x i32> @llvm.vp.umin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 724 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t50 = call <vscale x 4 x i32> @llvm.vp.umin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 725 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t52 = call <vscale x 8 x i32> @llvm.vp.umin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 726 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t54 = call <vscale x 16 x i32> @llvm.vp.umin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 727 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t56 = call <vscale x 2 x i64> @llvm.vp.umin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 728 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t58 = call <vscale x 4 x i64> @llvm.vp.umin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 729 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t60 = call <vscale x 8 x i64> @llvm.vp.umin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 730 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t62 = call <vscale x 16 x i64> @llvm.vp.umin.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 731 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 732 ;
 733   %t0 = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
 734   %t2 = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
 735   %t4 = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
 736   %t6 = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
 737   %t8 = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> undef, <2 x i16> undef, <2 x i1> undef, i32 undef)
 738   %t10 = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i1> undef, i32 undef)
 739   %t12 = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef, i32 undef)
 740   %t14 = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> undef, <16 x i16> undef, <16 x i1> undef, i32 undef)
 741   %t16 = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> undef, <2 x i32> undef, <2 x i1> undef, i32 undef)
 742   %t18 = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
 743   %t20 = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> undef, <8 x i32> undef, <8 x i1> undef, i32 undef)
 744   %t22 = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> undef, <16 x i32> undef, <16 x i1> undef, i32 undef)
 745   %t24 = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
 746   %t26 = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
 747   %t28 = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
 748   %t30 = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
 749   %t32 = call <vscale x 2 x i8> @llvm.vp.umin.nxv2i8(<vscale x 2 x i8> undef, <vscale x 2 x i8> undef, <vscale x 2 x i1> undef, i32 undef)
 750   %t34 = call <vscale x 4 x i8> @llvm.vp.umin.nxv4i8(<vscale x 4 x i8> undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
 751   %t36 = call <vscale x 8 x i8> @llvm.vp.umin.nxv8i8(<vscale x 8 x i8> undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
 752   %t38 = call <vscale x 16 x i8> @llvm.vp.umin.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
 753   %t40 = call <vscale x 2 x i16> @llvm.vp.umin.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 754   %t42 = call <vscale x 4 x i16> @llvm.vp.umin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 755   %t44 = call <vscale x 8 x i16> @llvm.vp.umin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 756   %t46 = call <vscale x 16 x i16> @llvm.vp.umin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 757   %t48 = call <vscale x 2 x i32> @llvm.vp.umin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 758   %t50 = call <vscale x 4 x i32> @llvm.vp.umin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 759   %t52 = call <vscale x 8 x i32> @llvm.vp.umin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 760   %t54 = call <vscale x 16 x i32> @llvm.vp.umin.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 761   %t56 = call <vscale x 2 x i64> @llvm.vp.umin.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 762   %t58 = call <vscale x 4 x i64> @llvm.vp.umin.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 763   %t60 = call <vscale x 8 x i64> @llvm.vp.umin.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 764   %t62 = call <vscale x 16 x i64> @llvm.vp.umin.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 765   ret void
 766 }
 767
 768 define void @abs() {
 769 ; CHECK-LABEL: 'abs'
 770 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
 771 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
 772 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x i8> @llvm.vp.abs.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
 773 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
 774 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <8 x i8> @llvm.vp.abs.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
 775 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %6 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
 776 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <16 x i8> @llvm.vp.abs.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
 777 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
 778 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
 779 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
 780 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
 781 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %12 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
 782 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %13 = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
 783 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %14 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
 784 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %15 = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
 785 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %16 = call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 false)
 786 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x i8> @llvm.vp.abs.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 787 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <vscale x 2 x i8> @llvm.abs.nxv2i8(<vscale x 2 x i8> undef, i1 false)
 788 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 4 x i8> @llvm.vp.abs.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 789 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = call <vscale x 4 x i8> @llvm.abs.nxv4i8(<vscale x 4 x i8> undef, i1 false)
 790 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = call <vscale x 8 x i8> @llvm.vp.abs.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 791 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = call <vscale x 8 x i8> @llvm.abs.nxv8i8(<vscale x 8 x i8> undef, i1 false)
 792 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = call <vscale x 16 x i8> @llvm.vp.abs.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 793 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> undef, i1 false)
 794 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call <vscale x 2 x i64> @llvm.vp.abs.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 795 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %26 = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> undef, i1 false)
 796 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %27 = call <vscale x 4 x i64> @llvm.vp.abs.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 797 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %28 = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> undef, i1 false)
 798 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %29 = call <vscale x 8 x i64> @llvm.vp.abs.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 799 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %30 = call <vscale x 8 x i64> @llvm.abs.nxv8i64(<vscale x 8 x i64> undef, i1 false)
 800 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %31 = call <vscale x 16 x i64> @llvm.vp.abs.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 801 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %32 = call <vscale x 16 x i64> @llvm.abs.nxv16i64(<vscale x 16 x i64> undef, i1 false)
 802 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 803 ;
 804   call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> undef, i1 0, <2 x i1> undef, i32 undef)
 805   call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 0)
 806   call <4 x i8> @llvm.vp.abs.v4i8(<4 x i8> undef, i1 0, <4 x i1> undef, i32 undef)
 807   call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 0)
 808   call <8 x i8> @llvm.vp.abs.v8i8(<8 x i8> undef, i1 0, <8 x i1> undef, i32 undef)
 809   call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 0)
 810   call <16 x i8> @llvm.vp.abs.v16i8(<16 x i8> undef, i1 0, <16 x i1> undef, i32 undef)
 811   call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 0)
 812   call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> undef, i1 0, <2 x i1> undef, i32 undef)
 813   call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 0)
 814   call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> undef, i1 0, <4 x i1> undef, i32 undef)
 815   call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 0)
 816   call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> undef, i1 0, <8 x i1> undef, i32 undef)
 817   call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 0)
 818   call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> undef, i1 0, <16 x i1> undef, i32 undef)
 819   call <16 x i64> @llvm.abs.v16i64(<16 x i64> undef, i1 0)
 820   call <vscale x 2 x i8> @llvm.vp.abs.nxv2i8(<vscale x 2 x i8> undef, i1 0, <vscale x 2 x i1> undef, i32 undef)
 821   call <vscale x 2 x i8> @llvm.abs.nxv2i8(<vscale x 2 x i8> undef, i1 0)
 822   call <vscale x 4 x i8> @llvm.vp.abs.nxv4i8(<vscale x 4 x i8> undef, i1 0, <vscale x 4 x i1> undef, i32 undef)
 823   call <vscale x 4 x i8> @llvm.abs.nxv4i8(<vscale x 4 x i8> undef, i1 0)
 824   call <vscale x 8 x i8> @llvm.vp.abs.nxv8i8(<vscale x 8 x i8> undef, i1 0, <vscale x 8 x i1> undef, i32 undef)
 825   call <vscale x 8 x i8> @llvm.abs.nxv8i8(<vscale x 8 x i8> undef, i1 0)
 826   call <vscale x 16 x i8> @llvm.vp.abs.nxv16i8(<vscale x 16 x i8> undef, i1 0, <vscale x 16 x i1> undef, i32 undef)
 827   call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> undef, i1 0)
 828   call <vscale x 2 x i64> @llvm.vp.abs.nxv2i64(<vscale x 2 x i64> undef, i1 0, <vscale x 2 x i1> undef, i32 undef)
 829   call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> undef, i1 0)
 830   call <vscale x 4 x i64> @llvm.vp.abs.nxv4i64(<vscale x 4 x i64> undef, i1 0, <vscale x 4 x i1> undef, i32 undef)
 831   call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> undef, i1 0)
 832   call <vscale x 8 x i64> @llvm.vp.abs.nxv8i64(<vscale x 8 x i64> undef, i1 0, <vscale x 8 x i1> undef, i32 undef)
 833   call <vscale x 8 x i64> @llvm.abs.nxv8i64(<vscale x 8 x i64> undef, i1 0)
 834   call <vscale x 16 x i64> @llvm.vp.abs.nxv16i64(<vscale x 16 x i64> undef, i1 0, <vscale x 16 x i1> undef, i32 undef)
 835   call <vscale x 16 x i64> @llvm.abs.nxv16i64(<vscale x 16 x i64> undef, i1 0)
 836   ret void
 837 }
 838
 839 define void @load() {
 840 ; CHECK-LABEL: 'load'
 841 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr undef, <2 x i1> undef, i32 undef)
 842 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t1 = load <2 x i8>, ptr undef, align 2
 843 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr undef, <4 x i1> undef, i32 undef)
 844 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t3 = load <4 x i8>, ptr undef, align 4
 845 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr undef, <8 x i1> undef, i32 undef)
 846 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t5 = load <8 x i8>, ptr undef, align 8
 847 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr undef, <16 x i1> undef, i32 undef)
 848 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <16 x i8>, ptr undef, align 16
 849 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr undef, <2 x i1> undef, i32 undef)
 850 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t9 = load <2 x i64>, ptr undef, align 16
 851 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr undef, <4 x i1> undef, i32 undef)
 852 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t12 = load <4 x i64>, ptr undef, align 32
 853 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t13 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr undef, <8 x i1> undef, i32 undef)
 854 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t14 = load <8 x i64>, ptr undef, align 64
 855 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t15 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr undef, <16 x i1> undef, i32 undef)
 856 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t16 = load <16 x i64>, ptr undef, align 128
 857 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t17 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
 858 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t18 = load <vscale x 2 x i8>, ptr undef, align 2
 859 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t19 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
 860 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t20 = load <vscale x 4 x i8>, ptr undef, align 4
 861 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t21 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
 862 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %t22 = load <vscale x 8 x i8>, ptr undef, align 8
 863 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t23 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
 864 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t24 = load <vscale x 16 x i8>, ptr undef, align 16
 865 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t25 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr undef, <vscale x 2 x i1> undef, i32 undef)
 866 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t26 = load <vscale x 2 x i64>, ptr undef, align 16
 867 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t27 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64.p0(ptr undef, <vscale x 4 x i1> undef, i32 undef)
 868 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t28 = load <vscale x 4 x i64>, ptr undef, align 32
 869 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t29 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64.p0(ptr undef, <vscale x 8 x i1> undef, i32 undef)
 870 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t30 = load <vscale x 8 x i64>, ptr undef, align 64
 871 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t31 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64.p0(ptr undef, <vscale x 16 x i1> undef, i32 undef)
 872 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t32 = load <vscale x 16 x i64>, ptr undef, align 128
 873 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 874 ;
 875   %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr undef, <2 x i1> undef, i32 undef)
 876   %t1 = load <2 x i8>, ptr undef
 877   %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr undef, <4 x i1> undef, i32 undef)
 878   %t3 = load <4 x i8>, ptr undef
 879   %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr undef, <8 x i1> undef, i32 undef)
 880   %t5 = load <8 x i8>, ptr undef
 881   %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr undef, <16 x i1> undef, i32 undef)
 882   %t7 = load <16 x i8>, ptr undef
 883   %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr undef, <2 x i1> undef, i32 undef)
 884   %t9 = load <2 x i64>, ptr undef
 885   %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr undef, <4 x i1> undef, i32 undef)
 886   %t12 = load <4 x i64>, ptr undef
 887   %t13 = call <8 x i64> @llvm.vp.load.v8i64(ptr undef, <8 x i1> undef, i32 undef)
 888   %t14 = load <8 x i64>, ptr undef
 889   %t15 = call <16 x i64> @llvm.vp.load.v16i64(ptr undef, <16 x i1> undef, i32 undef)
 890   %t16 = load <16 x i64>, ptr undef
 891   %t17 = call <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr undef, <vscale x 2 x i1> undef, i32 undef)
 892   %t18 = load <vscale x 2 x i8>, ptr undef
 893   %t19 = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr undef, <vscale x 4 x i1> undef, i32 undef)
 894   %t20 = load <vscale x 4 x i8>, ptr undef
 895   %t21 = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr undef, <vscale x 8 x i1> undef, i32 undef)
 896   %t22 = load <vscale x 8 x i8>, ptr undef
 897   %t23 = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr undef, <vscale x 16 x i1> undef, i32 undef)
 898   %t24 = load <vscale x 16 x i8>, ptr undef
 899   %t25 = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr undef, <vscale x 2 x i1> undef, i32 undef)
 900   %t26 = load <vscale x 2 x i64>, ptr undef
 901   %t27 = call <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr undef, <vscale x 4 x i1> undef, i32 undef)
 902   %t28 = load <vscale x 4 x i64>, ptr undef
 903   %t29 = call <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr undef, <vscale x 8 x i1> undef, i32 undef)
 904   %t30 = load <vscale x 8 x i64>, ptr undef
 905   %t31 = call <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr undef, <vscale x 16 x i1> undef, i32 undef)
 906   %t32 = load <vscale x 16 x i64>, ptr undef
 907   ret void
 908 }
 909
 910 define void @store() {
 911 ; CHECK-LABEL: 'store'
 912 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr undef, <2 x i1> undef, i32 undef)
 913 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 2
 914 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v4i8.p0(<4 x i8> undef, ptr undef, <4 x i1> undef, i32 undef)
 915 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 4
 916 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v8i8.p0(<8 x i8> undef, ptr undef, <8 x i1> undef, i32 undef)
 917 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 8
 918 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v16i8.p0(<16 x i8> undef, ptr undef, <16 x i1> undef, i32 undef)
 919 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 16
 920 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i64.p0(<2 x i64> undef, ptr undef, <2 x i1> undef, i32 undef)
 921 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 16
 922 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.vp.store.v4i64.p0(<4 x i64> undef, ptr undef, <4 x i1> undef, i32 undef)
 923 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, ptr undef, align 32
 924 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.v8i64.p0(<8 x i64> undef, ptr undef, <8 x i1> undef, i32 undef)
 925 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, ptr undef, align 64
 926 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.v16i64.p0(<16 x i64> undef, ptr undef, <16 x i1> undef, i32 undef)
 927 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <16 x i64> undef, ptr undef, align 128
 928 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.nxv2i8.p0(<vscale x 2 x i8> undef, ptr undef, <vscale x 2 x i1> undef, i32 undef)
 929 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <vscale x 2 x i8> undef, ptr undef, align 2
 930 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> undef, ptr undef, <vscale x 4 x i1> undef, i32 undef)
 931 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <vscale x 4 x i8> undef, ptr undef, align 4
 932 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> undef, ptr undef, <vscale x 8 x i1> undef, i32 undef)
 933 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <vscale x 8 x i8> undef, ptr undef, align 8
 934 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> undef, ptr undef, <vscale x 16 x i1> undef, i32 undef)
 935 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <vscale x 16 x i8> undef, ptr undef, align 16
 936 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> undef, ptr undef, <vscale x 2 x i1> undef, i32 undef)
 937 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <vscale x 2 x i64> undef, ptr undef, align 16
 938 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.vp.store.nxv4i64.p0(<vscale x 4 x i64> undef, ptr undef, <vscale x 4 x i1> undef, i32 undef)
 939 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <vscale x 4 x i64> undef, ptr undef, align 32
 940 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.vp.store.nxv8i64.p0(<vscale x 8 x i64> undef, ptr undef, <vscale x 8 x i1> undef, i32 undef)
 941 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <vscale x 8 x i64> undef, ptr undef, align 64
 942 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.vp.store.nxv16i64.p0(<vscale x 16 x i64> undef, ptr undef, <vscale x 16 x i1> undef, i32 undef)
 943 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <vscale x 16 x i64> undef, ptr undef, align 128
 944 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 945 ;
 946   call void @llvm.vp.store.v2i8(<2 x i8> undef, ptr undef, <2 x i1> undef, i32 undef)
 947   store <2 x i8> undef, ptr undef
 948   call void @llvm.vp.store.v4i8(<4 x i8> undef, ptr undef, <4 x i1> undef, i32 undef)
 949   store <4 x i8> undef, ptr undef
 950   call void @llvm.vp.store.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef, i32 undef)
 951   store <8 x i8> undef, ptr undef
 952   call void @llvm.vp.store.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef, i32 undef)
 953   store <16 x i8> undef, ptr undef
 954   call void @llvm.vp.store.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef, i32 undef)
 955   store <2 x i64> undef, ptr undef
 956   call void @llvm.vp.store.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef, i32 undef)
 957   store <4 x i64> undef, ptr undef
 958   call void @llvm.vp.store.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef, i32 undef)
 959   store <8 x i64> undef, ptr undef
 960   call void @llvm.vp.store.v16i64(<16 x i64> undef, ptr undef, <16 x i1> undef, i32 undef)
 961   store <16 x i64> undef, ptr undef
 962   call void @llvm.vp.store.nxv2i8(<vscale x 2 x i8> undef, ptr undef, <vscale x 2 x i1> undef, i32 undef)
 963   store <vscale x 2 x i8> undef, ptr undef
 964   call void @llvm.vp.store.nxv4i8(<vscale x 4 x i8> undef, ptr undef, <vscale x 4 x i1> undef, i32 undef)
 965   store <vscale x 4 x i8> undef, ptr undef
 966   call void @llvm.vp.store.nxv8i8(<vscale x 8 x i8> undef, ptr undef, <vscale x 8 x i1> undef, i32 undef)
 967   store <vscale x 8 x i8> undef, ptr undef
 968   call void @llvm.vp.store.nxv16i8(<vscale x 16 x i8> undef, ptr undef, <vscale x 16 x i1> undef, i32 undef)
 969   store <vscale x 16 x i8> undef, ptr undef
 970   call void @llvm.vp.store.nxv2i64(<vscale x 2 x i64> undef, ptr undef, <vscale x 2 x i1> undef, i32 undef)
 971   store <vscale x 2 x i64> undef, ptr undef
 972   call void @llvm.vp.store.nxv4i64(<vscale x 4 x i64> undef, ptr undef, <vscale x 4 x i1> undef, i32 undef)
 973   store <vscale x 4 x i64> undef, ptr undef
 974   call void @llvm.vp.store.nxv8i64(<vscale x 8 x i64> undef, ptr undef, <vscale x 8 x i1> undef, i32 undef)
 975   store <vscale x 8 x i64> undef, ptr undef
 976   call void @llvm.vp.store.nxv16i64(<vscale x 16 x i64> undef, ptr undef, <vscale x 16 x i1> undef, i32 undef)
 977   store <vscale x 16 x i64> undef, ptr undef
 978   ret void
 979 }
 980
 981 define void @strided_load() {
 982 ; ARGBASED-LABEL: 'strided_load'
 983 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %ti1_2 = call <2 x i1> @llvm.experimental.vp.strided.load.v2i1.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
 984 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %ti1_4 = call <4 x i1> @llvm.experimental.vp.strided.load.v4i1.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
 985 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %ti1_8 = call <8 x i1> @llvm.experimental.vp.strided.load.v8i1.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
 986 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %ti1_16 = call <16 x i1> @llvm.experimental.vp.strided.load.v16i1.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
 987 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
 988 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = call <4 x i8> @llvm.experimental.vp.strided.load.v4i8.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
 989 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t4 = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
 990 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t6 = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
 991 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8.a = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr align 8 undef, i64 undef, <2 x i1> undef, i32 undef)
 992 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10.a = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 undef, i64 undef, <4 x i1> undef, i32 undef)
 993 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t13.a = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr align 8 undef, i64 undef, <8 x i1> undef, i32 undef)
 994 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t15.a = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 undef, i64 undef, <16 x i1> undef, i32 undef)
 995 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
 996 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
 997 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t13 = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
 998 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t15 = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
 999 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t17 = call <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.p0.i64(ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
1000 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t19 = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.p0.i64(ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
1001 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t21 = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.p0.i64(ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
1002 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t23 = call <vscale x 16 x i8> @llvm.experimental.vp.strided.load.nxv16i8.p0.i64(ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
1003 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.p0.i64(ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
1004 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.p0.i64(ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
1005 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.p0.i64(ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
1006 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x i64> @llvm.experimental.vp.strided.load.nxv16i64.p0.i64(ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
1007 ; ARGBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1008 ;
1009 ; TYPEBASED-LABEL: 'strided_load'
1010 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %ti1_2 = call <2 x i1> @llvm.experimental.vp.strided.load.v2i1.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
1011 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %ti1_4 = call <4 x i1> @llvm.experimental.vp.strided.load.v4i1.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
1012 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %ti1_8 = call <8 x i1> @llvm.experimental.vp.strided.load.v8i1.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
1013 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %ti1_16 = call <16 x i1> @llvm.experimental.vp.strided.load.v16i1.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
1014 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
1015 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = call <4 x i8> @llvm.experimental.vp.strided.load.v4i8.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
1016 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t4 = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
1017 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t6 = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
1018 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8.a = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr align 8 undef, i64 undef, <2 x i1> undef, i32 undef)
1019 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10.a = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 undef, i64 undef, <4 x i1> undef, i32 undef)
1020 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t13.a = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr align 8 undef, i64 undef, <8 x i1> undef, i32 undef)
1021 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t15.a = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 undef, i64 undef, <16 x i1> undef, i32 undef)
1022 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
1023 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
1024 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t13 = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
1025 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t15 = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
1026 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t17 = call <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.p0.i64(ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
1027 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t19 = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.p0.i64(ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
1028 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t21 = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.p0.i64(ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
1029 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t23 = call <vscale x 16 x i8> @llvm.experimental.vp.strided.load.nxv16i8.p0.i64(ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
1030 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.p0.i64(ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
1031 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.p0.i64(ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
1032 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.p0.i64(ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
1033 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x i64> @llvm.experimental.vp.strided.load.nxv16i64.p0.i64(ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
1034 ; TYPEBASED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1035 ;
1036   %ti1_2 = call <2 x i1> @llvm.experimental.vp.strided.load.v2i1.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
1037   %ti1_4 = call <4 x i1> @llvm.experimental.vp.strided.load.v4i1.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
1038   %ti1_8 = call <8 x i1> @llvm.experimental.vp.strided.load.v8i1.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
1039   %ti1_16 = call <16 x i1> @llvm.experimental.vp.strided.load.v16i1.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
1040   %t0 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
1041   %t2 = call <4 x i8> @llvm.experimental.vp.strided.load.v4i8.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
1042   %t4 = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
1043   %t6 = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
1044   %t8.a = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.i64(ptr align(8)  undef, i64 undef, <2 x i1> undef, i32 undef)
1045   %t10.a = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.i64(ptr align(8)  undef, i64 undef, <4 x i1> undef, i32 undef)
1046   %t13.a = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.i64(ptr align(8)  undef, i64 undef, <8 x i1> undef, i32 undef)
1047   %t15.a = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.i64(ptr align(8) undef, i64 undef, <16 x i1> undef, i32 undef)
1048   %t8 = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef)
1049   %t10 = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef)
1050   %t13 = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef)
1051   %t15 = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef)
1052   %t17 = call <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.i64(ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
1053   %t19 = call <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.i64(ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
1054   %t21 = call <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.i64(ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
1055   %t23 = call <vscale x 16 x i8> @llvm.experimental.vp.strided.load.nxv16i8.i64(ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
1056   %t25 = call <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.i64(ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
1057   %t27 = call <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.i64(ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
1058   %t29 = call <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.i64(ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
1059   %t31 = call <vscale x 16 x i64> @llvm.experimental.vp.strided.load.nxv16i64.i64(ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
1060   ret void
1061 }
1062
1063 define void @strided_store() {
1064 ; CHECK-LABEL: 'strided_store'
1065 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr undef, i64 undef, <2 x i1> undef, i32 undef)
1066 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.v4i8.p0.i64(<4 x i8> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef)
1067 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.v8i8.p0.i64(<8 x i8> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef)
1068 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.v16i8.p0.i64(<16 x i8> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef)
1069 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.experimental.vp.strided.store.v2i64.p0.i64(<2 x i64> undef, ptr undef, i64 undef, <2 x i1> undef, i32 undef)
1070 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef)
1071 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.v8i64.p0.i64(<8 x i64> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef)
1072 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.v16i64.p0.i64(<16 x i64> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef)
1073 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.experimental.vp.strided.store.v2i64.p0.i64(<2 x i64> undef, ptr align 8 undef, i64 undef, <2 x i1> undef, i32 undef)
1074 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> undef, ptr align 8 undef, i64 undef, <4 x i1> undef, i32 undef)
1075 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.v8i64.p0.i64(<8 x i64> undef, ptr align 8 undef, i64 undef, <8 x i1> undef, i32 undef)
1076 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.v16i64.p0.i64(<16 x i64> undef, ptr align 8 undef, i64 undef, <16 x i1> undef, i32 undef)
1077 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.nxv2i8.p0.i64(<vscale x 2 x i8> undef, ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
1078 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.nxv4i8.p0.i64(<vscale x 4 x i8> undef, ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
1079 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.nxv8i8.p0.i64(<vscale x 8 x i8> undef, ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
1080 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vp.strided.store.nxv16i8.p0.i64(<vscale x 16 x i8> undef, ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
1081 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.nxv2i64.p0.i64(<vscale x 2 x i64> undef, ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
1082 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.nxv4i64.p0.i64(<vscale x 4 x i64> undef, ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
1083 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.nxv8i64.p0.i64(<vscale x 8 x i64> undef, ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
1084 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vp.strided.store.nxv16i64.p0.i64(<vscale x 16 x i64> undef, ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
1085 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1086 ;
1087   call void @llvm.experimental.vp.strided.store.v2i8.i64(<2 x i8> undef, ptr undef, i64 undef, <2 x i1> undef, i32 undef)
1088   call void @llvm.experimental.vp.strided.store.v4i8.i64(<4 x i8> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef)
1089   call void @llvm.experimental.vp.strided.store.v8i8.i64(<8 x i8> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef)
1090   call void @llvm.experimental.vp.strided.store.v16i8.i64(<16 x i8> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef)
1091   call void @llvm.experimental.vp.strided.store.v2i64.i64(<2 x i64> undef, ptr undef, i64 undef, <2 x i1> undef, i32 undef)
1092   call void @llvm.experimental.vp.strided.store.v4i64.i64(<4 x i64> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef)
1093   call void @llvm.experimental.vp.strided.store.v8i64.i64(<8 x i64> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef)
1094   call void @llvm.experimental.vp.strided.store.v16i64.i64(<16 x i64> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef)
1095   call void @llvm.experimental.vp.strided.store.v2i64.i64(<2 x i64> undef, ptr align(8) undef, i64 undef, <2 x i1> undef, i32 undef)
1096   call void @llvm.experimental.vp.strided.store.v4i64.i64(<4 x i64> undef, ptr align(8) undef, i64 undef, <4 x i1> undef, i32 undef)
1097   call void @llvm.experimental.vp.strided.store.v8i64.i64(<8 x i64> undef, ptr align(8) undef, i64 undef, <8 x i1> undef, i32 undef)
1098   call void @llvm.experimental.vp.strided.store.v16i64.i64(<16 x i64> undef, ptr align(8) undef, i64 undef, <16 x i1> undef, i32 undef)
1099   call void @llvm.experimental.vp.strided.store.nxv2i8.i64(<vscale x 2 x i8> undef, ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
1100   call void @llvm.experimental.vp.strided.store.nxv4i8.i64(<vscale x 4 x i8> undef, ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
1101   call void @llvm.experimental.vp.strided.store.nxv8i8.i64(<vscale x 8 x i8> undef, ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
1102   call void @llvm.experimental.vp.strided.store.nxv16i8.i64(<vscale x 16 x i8> undef, ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
1103   call void @llvm.experimental.vp.strided.store.nxv2i64.i64(<vscale x 2 x i64> undef, ptr undef, i64 undef, <vscale x 2 x i1> undef, i32 undef)
1104   call void @llvm.experimental.vp.strided.store.nxv4i64.i64(<vscale x 4 x i64> undef, ptr undef, i64 undef, <vscale x 4 x i1> undef, i32 undef)
1105   call void @llvm.experimental.vp.strided.store.nxv8i64.i64(<vscale x 8 x i64> undef, ptr undef, i64 undef, <vscale x 8 x i1> undef, i32 undef)
1106   call void @llvm.experimental.vp.strided.store.nxv16i64.i64(<vscale x 16 x i64> undef, ptr undef, i64 undef, <vscale x 16 x i1> undef, i32 undef)
1107   ret void
1108 }
1109
1110
1111
1112 define void @reduce_add() {
1113 ; CHECK-LABEL: 'reduce_add'
1114 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
1115 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
1116 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
1117 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
1118 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %5 = call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
1119 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %6 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
1120 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %7 = call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
1121 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
1122 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
1123 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
1124 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
1125 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %12 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
1126 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %13 = call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
1127 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %14 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
1128 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %15 = call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
1129 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
1130 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %17 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
1131 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> undef)
1132 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = call i8 @llvm.vp.reduce.add.nxv4i8(i8 undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
1133 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %20 = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> undef)
1134 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %21 = call i8 @llvm.vp.reduce.add.nxv8i8(i8 undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
1135 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %22 = call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> undef)
1136 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call i8 @llvm.vp.reduce.add.nxv16i8(i8 undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
1137 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> undef)
1138 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call i64 @llvm.vp.reduce.add.nxv2i64(i64 undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
1139 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %26 = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> undef)
1140 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %27 = call i64 @llvm.vp.reduce.add.nxv4i64(i64 undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
1141 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %28 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
1142 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %29 = call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
1143 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %30 = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
1144 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %31 = call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
1145 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %32 = call i64 @llvm.vector.reduce.add.nxv16i64(<vscale x 16 x i64> undef)
1146 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1147 ;
1148   call i8 @llvm.vp.reduce.add.v2i8(i8 undef, <2 x i8> undef, <2 x i1> undef, i32 undef)
1149   call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
1150   call i8 @llvm.vp.reduce.add.v4i8(i8 undef, <4 x i8> undef, <4 x i1> undef, i32 undef)
1151   call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
1152   call i8 @llvm.vp.reduce.add.v8i8(i8 undef, <8 x i8> undef, <8 x i1> undef, i32 undef)
1153   call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
1154   call i8 @llvm.vp.reduce.add.v16i8(i8 undef, <16 x i8> undef, <16 x i1> undef, i32 undef)
1155   call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
1156   call i64 @llvm.vp.reduce.add.v2i64(i64 undef, <2 x i64> undef, <2 x i1> undef, i32 undef)
1157   call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
1158   call i64 @llvm.vp.reduce.add.v4i64(i64 undef, <4 x i64> undef, <4 x i1> undef, i32 undef)
1159   call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
1160   call i64 @llvm.vp.reduce.add.v8i64(i64 undef, <8 x i64> undef, <8 x i1> undef, i32 undef)
1161   call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
1162   call i64 @llvm.vp.reduce.add.v16i64(i64 undef, <16 x i64> undef, <16 x i1> undef, i32 undef)
1163   call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
1164   call i8 @llvm.vp.reduce.add.nxv8i8(i8  undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
1165   call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> undef)
1166   call i8 @llvm.vp.reduce.add.nxv4i8(i8  undef, <vscale x 4 x i8> undef, <vscale x 4 x i1> undef, i32 undef)
1167   call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> undef)
1168   call i8 @llvm.vp.reduce.add.nxv8i8(i8  undef, <vscale x 8 x i8> undef, <vscale x 8 x i1> undef, i32 undef)
1169   call i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8> undef)
1170   call i8 @llvm.vp.reduce.add.nxv16i8(i8  undef, <vscale x 16 x i8> undef, <vscale x 16 x i1> undef, i32 undef)
1171   call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> undef)
1172   call i64 @llvm.vp.reduce.add.nxv2i64(i64  undef, <vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
1173   call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> undef)
1174   call i64 @llvm.vp.reduce.add.nxv4i64(i64  undef, <vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
1175   call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> undef)
1176   call i64 @llvm.vp.reduce.add.nxv8i64(i64 undef, <vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
1177   call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> undef)
1178   call i64 @llvm.vp.reduce.add.nxv16i64(i64 undef, <vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
1179   call i64 @llvm.vector.reduce.add.nxv16i64(<vscale x 16 x i64> undef)
1180   ret void
1181 }
1182
1183 define void @reduce_fadd() {
1184 ; CHECK-LABEL: 'reduce_fadd'
1185 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef)
1186 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = call float @llvm.vector.reduce.fadd.v2f32(float undef, <2 x float> undef)
1187 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %3 = call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef)
1188 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %4 = call float @llvm.vector.reduce.fadd.v4f32(float undef, <4 x float> undef)
1189 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %5 = call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef)
1190 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %6 = call float @llvm.vector.reduce.fadd.v8f32(float undef, <8 x float> undef)
1191 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %7 = call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef)
1192 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %8 = call float @llvm.vector.reduce.fadd.v16f32(float undef, <16 x float> undef)
1193 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef)
1194 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call double @llvm.vector.reduce.fadd.v2f64(double undef, <2 x double> undef)
1195 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %11 = call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef)
1196 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %12 = call double @llvm.vector.reduce.fadd.v4f64(double undef, <4 x double> undef)
1197 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %13 = call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef)
1198 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %14 = call double @llvm.vector.reduce.fadd.v8f64(double undef, <8 x double> undef)
1199 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %15 = call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef)
1200 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %16 = call double @llvm.vector.reduce.fadd.v16f64(double undef, <16 x double> undef)
1201 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %17 = call float @llvm.vp.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
1202 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %18 = call float @llvm.vector.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef)
1203 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %19 = call float @llvm.vp.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1204 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %20 = call float @llvm.vector.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef)
1205 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %21 = call float @llvm.vp.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
1206 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %22 = call float @llvm.vector.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef)
1207 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %23 = call float @llvm.vp.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
1208 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %24 = call float @llvm.vector.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef)
1209 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %25 = call double @llvm.vp.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
1210 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %26 = call double @llvm.vector.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef)
1211 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %27 = call double @llvm.vp.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
1212 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %28 = call double @llvm.vector.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef)
1213 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %29 = call double @llvm.vp.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
1214 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %30 = call double @llvm.vector.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef)
1215 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %31 = call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
1216 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %32 = call double @llvm.vector.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef)
1217 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1218 ;
1219   call float @llvm.vp.reduce.fadd.v2f32(float undef, <2 x float> undef, <2 x i1> undef, i32 undef)
1220   call float @llvm.vector.reduce.fadd.v2f32(float undef, <2 x float> undef)
1221   call float @llvm.vp.reduce.fadd.v4f32(float undef, <4 x float> undef, <4 x i1> undef, i32 undef)
1222   call float @llvm.vector.reduce.fadd.v4f32(float undef, <4 x float> undef)
1223   call float @llvm.vp.reduce.fadd.v8f32(float undef, <8 x float> undef, <8 x i1> undef, i32 undef)
1224   call float @llvm.vector.reduce.fadd.v8f32(float undef, <8 x float> undef)
1225   call float @llvm.vp.reduce.fadd.v16f32(float undef, <16 x float> undef, <16 x i1> undef, i32 undef)
1226   call float @llvm.vector.reduce.fadd.v16f32(float undef, <16 x float> undef)
1227   call double @llvm.vp.reduce.fadd.v2f64(double undef, <2 x double> undef, <2 x i1> undef, i32 undef)
1228   call double @llvm.vector.reduce.fadd.v2f64(double undef, <2 x double> undef)
1229   call double @llvm.vp.reduce.fadd.v4f64(double undef, <4 x double> undef, <4 x i1> undef, i32 undef)
1230   call double @llvm.vector.reduce.fadd.v4f64(double undef, <4 x double> undef)
1231   call double @llvm.vp.reduce.fadd.v8f64(double undef, <8 x double> undef, <8 x i1> undef, i32 undef)
1232   call double @llvm.vector.reduce.fadd.v8f64(double undef, <8 x double> undef)
1233   call double @llvm.vp.reduce.fadd.v16f64(double undef, <16 x double> undef, <16 x i1> undef, i32 undef)
1234   call double @llvm.vector.reduce.fadd.v16f64(double undef, <16 x double> undef)
1235   call float @llvm.vp.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
1236   call float @llvm.vector.reduce.fadd.nxv2f32(float undef, <vscale x 2 x float> undef)
1237   call float @llvm.vp.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1238   call float @llvm.vector.reduce.fadd.nxv4f32(float undef, <vscale x 4 x float> undef)
1239   call float @llvm.vp.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
1240   call float @llvm.vector.reduce.fadd.nxv8f32(float undef, <vscale x 8 x float> undef)
1241   call float @llvm.vp.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
1242   call float @llvm.vector.reduce.fadd.nxv16f32(float undef, <vscale x 16 x float> undef)
1243   call double @llvm.vp.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
1244   call double @llvm.vector.reduce.fadd.nxv2f64(double undef, <vscale x 2 x double> undef)
1245   call double @llvm.vp.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
1246   call double @llvm.vector.reduce.fadd.nxv4f64(double undef, <vscale x 4 x double> undef)
1247   call double @llvm.vp.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
1248   call double @llvm.vector.reduce.fadd.nxv8f64(double undef, <vscale x 8 x double> undef)
1249   call double @llvm.vp.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
1250   call double @llvm.vector.reduce.fadd.nxv16f64(double undef, <vscale x 16 x double> undef)
1251   ret void
1252 }
1253
1254 define void @reduce_other() {
1255 ; CHECK-LABEL: 'reduce_other'
1256 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1257 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1258 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1259 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %4 = call i32 @llvm.vp.reduce.mul.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1260 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1261 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1262 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1263 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1264 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %9 = call i32 @llvm.vp.reduce.mul.nxv4i32(i32 undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
1265 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %10 = call float @llvm.vp.reduce.fmul.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1266 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1267 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call float @llvm.vp.reduce.fminimum.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1268 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %13 = call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1269 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call float @llvm.vp.reduce.fmaximum.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1270 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1271 ;
1272   call i32 @llvm.vp.reduce.xor.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1273   call i32 @llvm.vp.reduce.and.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1274   call i32 @llvm.vp.reduce.or.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1275   call i32 @llvm.vp.reduce.mul.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1276   call i32 @llvm.vp.reduce.smin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1277   call i32 @llvm.vp.reduce.smax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1278   call i32 @llvm.vp.reduce.umin.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1279   call i32 @llvm.vp.reduce.umax.v4i32(i32 undef, <4 x i32> undef, <4 x i1> undef, i32 undef)
1280
1281   call i32 @llvm.vp.reduce.mul.v4i32(i32 undef, <vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
1282
1283   call float @llvm.vp.reduce.fmul.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1284   call float @llvm.vp.reduce.fmin.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1285   call float @llvm.vp.reduce.fminimum.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1286   call float @llvm.vp.reduce.fmax.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1287   call float @llvm.vp.reduce.fmaximum.nxv4f32(float undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1288   ret void
1289 }
1290
1291
1292 define void @vp_fadd(){
1293 ; CHECK-LABEL: 'vp_fadd'
1294 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
1295 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
1296 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
1297 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t6 = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
1298 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
1299 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
1300 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t12 = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
1301 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t14 = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)
1302 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t17 = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
1303 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t19 = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1304 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
1305 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t23 = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
1306 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
1307 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
1308 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
1309 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x double> @llvm.vp.fadd.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
1310 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t32 = fadd <vscale x 16 x double> undef, undef
1311 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1312 ;
1313   %t0 = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
1314   %t2 = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
1315   %t4 = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
1316   %t6 = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
1317
1318   %t8 = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
1319   %t10 = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
1320   %t12 = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
1321   %t14 = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)
1322
1323   %t17 = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
1324   %t19 = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1325   %t21 = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
1326   %t23 = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
1327
1328   %t25 = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
1329   %t27 = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
1330   %t29 = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
1331   %t31 = call <vscale x 16 x double> @llvm.vp.fadd.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
1332   %t32 = fadd <vscale x 16 x double> undef, undef
1333
1334   ret void
1335 }
1336
1337 define void @vp_fsub(){
1338 ; CHECK-LABEL: 'vp_fsub'
1339 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
1340 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
1341 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
1342 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t6 = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
1343 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
1344 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
1345 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t12 = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
1346 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t14 = call <16 x double> @llvm.vp.fsub.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)
1347 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t17 = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
1348 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t19 = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1349 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
1350 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t23 = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
1351 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
1352 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
1353 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
1354 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x double> @llvm.vp.fsub.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
1355 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1356 ;
1357   %t0 = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
1358   %t2 = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
1359   %t4 = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
1360   %t6 = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
1361
1362   %t8 = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
1363   %t10 = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
1364   %t12 = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
1365   %t14 = call <16 x double> @llvm.vp.fsub.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)
1366
1367   %t17 = call <vscale x 2 x float> @llvm.vp.fsub.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
1368   %t19 = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1369   %t21 = call <vscale x 8 x float> @llvm.vp.fsub.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
1370   %t23 = call <vscale x 16 x float> @llvm.vp.fsub.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
1371
1372   %t25 = call <vscale x 2 x double> @llvm.vp.fsub.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
1373   %t27 = call <vscale x 4 x double> @llvm.vp.fsub.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
1374   %t29 = call <vscale x 8 x double> @llvm.vp.fsub.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
1375   %t31 = call <vscale x 16 x double> @llvm.vp.fsub.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
1376
1377   ret void
1378 }
1379
1380 define void @vp_fmul(){
1381 ; CHECK-LABEL: 'vp_fmul'
1382 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
1383 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
1384 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
1385 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t6 = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
1386 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
1387 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
1388 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t12 = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
1389 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t14 = call <16 x double> @llvm.vp.fmul.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)
1390 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t17 = call <vscale x 2 x float> @llvm.vp.fmul.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
1391 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t19 = call <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1392 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <vscale x 8 x float> @llvm.vp.fmul.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
1393 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t23 = call <vscale x 16 x float> @llvm.vp.fmul.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
1394 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x double> @llvm.vp.fmul.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
1395 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x double> @llvm.vp.fmul.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
1396 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x double> @llvm.vp.fmul.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
1397 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x double> @llvm.vp.fmul.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
1398 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1399 ;
1400   %t0 = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
1401   %t2 = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
1402   %t4 = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
1403   %t6 = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
1404
1405   %t8 = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
1406   %t10 = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
1407   %t12 = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
1408   %t14 = call <16 x double> @llvm.vp.fmul.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)
1409
1410   %t17 = call <vscale x 2 x float> @llvm.vp.fmul.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
1411   %t19 = call <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1412   %t21 = call <vscale x 8 x float> @llvm.vp.fmul.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
1413   %t23 = call <vscale x 16 x float> @llvm.vp.fmul.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
1414
1415   %t25 = call <vscale x 2 x double> @llvm.vp.fmul.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
1416   %t27 = call <vscale x 4 x double> @llvm.vp.fmul.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
1417   %t29 = call <vscale x 8 x double> @llvm.vp.fmul.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
1418   %t31 = call <vscale x 16 x double> @llvm.vp.fmul.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
1419
1420   ret void
1421 }
1422
1423 define void @vp_fdiv(){
1424 ; CHECK-LABEL: 'vp_fdiv'
1425 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
1426 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
1427 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
1428 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t6 = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
1429 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
1430 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
1431 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t12 = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
1432 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t14 = call <16 x double> @llvm.vp.fdiv.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)
1433 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t17 = call <vscale x 2 x float> @llvm.vp.fdiv.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
1434 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t19 = call <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1435 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t21 = call <vscale x 8 x float> @llvm.vp.fdiv.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
1436 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t23 = call <vscale x 16 x float> @llvm.vp.fdiv.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
1437 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t25 = call <vscale x 2 x double> @llvm.vp.fdiv.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
1438 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %t27 = call <vscale x 4 x double> @llvm.vp.fdiv.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
1439 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %t29 = call <vscale x 8 x double> @llvm.vp.fdiv.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
1440 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %t31 = call <vscale x 16 x double> @llvm.vp.fdiv.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
1441 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1442 ;
1443   %t0 = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef)
1444   %t2 = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef)
1445   %t4 = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef)
1446   %t6 = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef)
1447
1448   %t8 = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef)
1449   %t10 = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef)
1450   %t12 = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef)
1451   %t14 = call <16 x double> @llvm.vp.fdiv.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef)
1452
1453   %t17 = call <vscale x 2 x float> @llvm.vp.fdiv.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
1454   %t19 = call <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
1455   %t21 = call <vscale x 8 x float> @llvm.vp.fdiv.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
1456   %t23 = call <vscale x 16 x float> @llvm.vp.fdiv.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
1457
1458   %t25 = call <vscale x 2 x double> @llvm.vp.fdiv.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
1459   %t27 = call <vscale x 4 x double> @llvm.vp.fdiv.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
1460   %t29 = call <vscale x 8 x double> @llvm.vp.fdiv.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
1461   %t31 = call <vscale x 16 x double> @llvm.vp.fdiv.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x i1> undef, i32 undef)
1462
1463   ret void
1464 }
1465
1466 define void @splat() {
1467 ; CHECK-LABEL: 'splat'
1468 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %1 = call <2 x i1> @llvm.experimental.vp.splat.v2i1(i1 undef, <2 x i1> undef, i32 undef)
1469 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %2 = call <4 x i1> @llvm.experimental.vp.splat.v4i1(i1 undef, <4 x i1> undef, i32 undef)
1470 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %3 = call <8 x i1> @llvm.experimental.vp.splat.v8i1(i1 undef, <8 x i1> undef, i32 undef)
1471 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %4 = call <16 x i1> @llvm.experimental.vp.splat.v16i1(i1 undef, <16 x i1> undef, i32 undef)
1472 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 undef, <2 x i1> undef, i32 undef)
1473 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 undef, <4 x i1> undef, i32 undef)
1474 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 undef, <8 x i1> undef, i32 undef)
1475 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 undef, <16 x i1> undef, i32 undef)
1476 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 undef, <2 x i1> undef, i32 undef)
1477 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 undef, <4 x i1> undef, i32 undef)
1478 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 undef, <8 x i1> undef, i32 undef)
1479 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 undef, <16 x i1> undef, i32 undef)
1480 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 undef, <2 x i1> undef, i32 undef)
1481 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 undef, <4 x i1> undef, i32 undef)
1482 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 undef, <8 x i1> undef, i32 undef)
1483 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %16 = call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 undef, <16 x i1> undef, i32 undef)
1484 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 undef, <2 x i1> undef, i32 undef)
1485 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %18 = call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 undef, <4 x i1> undef, i32 undef)
1486 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %19 = call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 undef, <8 x i1> undef, i32 undef)
1487 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %20 = call <16 x i64> @llvm.experimental.vp.splat.v16i64(i64 undef, <16 x i1> undef, i32 undef)
1488 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat undef, <2 x i1> undef, i32 undef)
1489 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat undef, <4 x i1> undef, i32 undef)
1490 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat undef, <8 x i1> undef, i32 undef)
1491 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat undef, <16 x i1> undef, i32 undef)
1492 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = call <2 x half> @llvm.experimental.vp.splat.v2f16(half undef, <2 x i1> undef, i32 undef)
1493 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <4 x half> @llvm.experimental.vp.splat.v4f16(half undef, <4 x i1> undef, i32 undef)
1494 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <8 x half> @llvm.experimental.vp.splat.v8f16(half undef, <8 x i1> undef, i32 undef)
1495 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call <16 x half> @llvm.experimental.vp.splat.v16f16(half undef, <16 x i1> undef, i32 undef)
1496 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %29 = call <2 x float> @llvm.experimental.vp.splat.v2f32(float undef, <2 x i1> undef, i32 undef)
1497 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %30 = call <4 x float> @llvm.experimental.vp.splat.v4f32(float undef, <4 x i1> undef, i32 undef)
1498 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = call <8 x float> @llvm.experimental.vp.splat.v8f32(float undef, <8 x i1> undef, i32 undef)
1499 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %32 = call <16 x float> @llvm.experimental.vp.splat.v16f32(float undef, <16 x i1> undef, i32 undef)
1500 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %33 = call <2 x double> @llvm.experimental.vp.splat.v2f64(double undef, <2 x i1> undef, i32 undef)
1501 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %34 = call <4 x double> @llvm.experimental.vp.splat.v4f64(double undef, <4 x i1> undef, i32 undef)
1502 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %35 = call <8 x double> @llvm.experimental.vp.splat.v8f64(double undef, <8 x i1> undef, i32 undef)
1503 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %36 = call <16 x double> @llvm.experimental.vp.splat.v16f64(double undef, <16 x i1> undef, i32 undef)
1504 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %37 = call <vscale x 2 x i1> @llvm.experimental.vp.splat.nxv2i1(i1 undef, <vscale x 2 x i1> undef, i32 undef)
1505 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %38 = call <vscale x 4 x i1> @llvm.experimental.vp.splat.nxv4i1(i1 undef, <vscale x 4 x i1> undef, i32 undef)
1506 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %39 = call <vscale x 8 x i1> @llvm.experimental.vp.splat.nxv8i1(i1 undef, <vscale x 8 x i1> undef, i32 undef)
1507 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %40 = call <vscale x 16 x i1> @llvm.experimental.vp.splat.nxv16i1(i1 undef, <vscale x 16 x i1> undef, i32 undef)
1508 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %41 = call <vscale x 2 x i8> @llvm.experimental.vp.splat.nxv2i8(i8 undef, <vscale x 2 x i1> undef, i32 undef)
1509 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %42 = call <vscale x 4 x i8> @llvm.experimental.vp.splat.nxv4i8(i8 undef, <vscale x 4 x i1> undef, i32 undef)
1510 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %43 = call <vscale x 8 x i8> @llvm.experimental.vp.splat.nxv8i8(i8 undef, <vscale x 8 x i1> undef, i32 undef)
1511 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %44 = call <vscale x 16 x i8> @llvm.experimental.vp.splat.nxv16i8(i8 undef, <vscale x 16 x i1> undef, i32 undef)
1512 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %45 = call <vscale x 2 x i16> @llvm.experimental.vp.splat.nxv2i16(i16 undef, <vscale x 2 x i1> undef, i32 undef)
1513 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %46 = call <vscale x 4 x i16> @llvm.experimental.vp.splat.nxv4i16(i16 undef, <vscale x 4 x i1> undef, i32 undef)
1514 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %47 = call <vscale x 8 x i16> @llvm.experimental.vp.splat.nxv8i16(i16 undef, <vscale x 8 x i1> undef, i32 undef)
1515 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %48 = call <vscale x 16 x i16> @llvm.experimental.vp.splat.nxv16i16(i16 undef, <vscale x 16 x i1> undef, i32 undef)
1516 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %49 = call <vscale x 2 x i32> @llvm.experimental.vp.splat.nxv2i32(i32 undef, <vscale x 2 x i1> undef, i32 undef)
1517 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %50 = call <vscale x 4 x i32> @llvm.experimental.vp.splat.nxv4i32(i32 undef, <vscale x 4 x i1> undef, i32 undef)
1518 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %51 = call <vscale x 8 x i32> @llvm.experimental.vp.splat.nxv8i32(i32 undef, <vscale x 8 x i1> undef, i32 undef)
1519 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %52 = call <vscale x 16 x i32> @llvm.experimental.vp.splat.nxv16i32(i32 undef, <vscale x 16 x i1> undef, i32 undef)
1520 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %53 = call <vscale x 2 x i64> @llvm.experimental.vp.splat.nxv2i64(i64 undef, <vscale x 2 x i1> undef, i32 undef)
1521 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %54 = call <vscale x 4 x i64> @llvm.experimental.vp.splat.nxv4i64(i64 undef, <vscale x 4 x i1> undef, i32 undef)
1522 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %55 = call <vscale x 8 x i64> @llvm.experimental.vp.splat.nxv8i64(i64 undef, <vscale x 8 x i1> undef, i32 undef)
1523 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %56 = call <vscale x 16 x i64> @llvm.experimental.vp.splat.nxv16i64(i64 undef, <vscale x 16 x i1> undef, i32 undef)
1524 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %57 = call <vscale x 2 x bfloat> @llvm.experimental.vp.splat.nxv2bf16(bfloat undef, <vscale x 2 x i1> undef, i32 undef)
1525 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %58 = call <vscale x 4 x bfloat> @llvm.experimental.vp.splat.nxv4bf16(bfloat undef, <vscale x 4 x i1> undef, i32 undef)
1526 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %59 = call <vscale x 8 x bfloat> @llvm.experimental.vp.splat.nxv8bf16(bfloat undef, <vscale x 8 x i1> undef, i32 undef)
1527 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %60 = call <vscale x 16 x bfloat> @llvm.experimental.vp.splat.nxv16bf16(bfloat undef, <vscale x 16 x i1> undef, i32 undef)
1528 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %61 = call <vscale x 2 x half> @llvm.experimental.vp.splat.nxv2f16(half undef, <vscale x 2 x i1> undef, i32 undef)
1529 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %62 = call <vscale x 4 x half> @llvm.experimental.vp.splat.nxv4f16(half undef, <vscale x 4 x i1> undef, i32 undef)
1530 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %63 = call <vscale x 8 x half> @llvm.experimental.vp.splat.nxv8f16(half undef, <vscale x 8 x i1> undef, i32 undef)
1531 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %64 = call <vscale x 16 x half> @llvm.experimental.vp.splat.nxv16f16(half undef, <vscale x 16 x i1> undef, i32 undef)
1532 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %65 = call <vscale x 2 x float> @llvm.experimental.vp.splat.nxv2f32(float undef, <vscale x 2 x i1> undef, i32 undef)
1533 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %66 = call <vscale x 4 x float> @llvm.experimental.vp.splat.nxv4f32(float undef, <vscale x 4 x i1> undef, i32 undef)
1534 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %67 = call <vscale x 8 x float> @llvm.experimental.vp.splat.nxv8f32(float undef, <vscale x 8 x i1> undef, i32 undef)
1535 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %68 = call <vscale x 16 x float> @llvm.experimental.vp.splat.nxv16f32(float undef, <vscale x 16 x i1> undef, i32 undef)
1536 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %69 = call <vscale x 2 x double> @llvm.experimental.vp.splat.nxv2f64(double undef, <vscale x 2 x i1> undef, i32 undef)
1537 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %70 = call <vscale x 4 x double> @llvm.experimental.vp.splat.nxv4f64(double undef, <vscale x 4 x i1> undef, i32 undef)
1538 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %71 = call <vscale x 8 x double> @llvm.experimental.vp.splat.nxv8f64(double undef, <vscale x 8 x i1> undef, i32 undef)
1539 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %72 = call <vscale x 16 x double> @llvm.experimental.vp.splat.nxv16f64(double undef, <vscale x 16 x i1> undef, i32 undef)
1540 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
1541 ;
1542   call <2 x i1> @llvm.experimental.vp.splat.v2i1(i1 undef, <2 x i1> undef, i32 undef)
1543   call <4 x i1> @llvm.experimental.vp.splat.v4i1(i1 undef, <4 x i1> undef, i32 undef)
1544   call <8 x i1> @llvm.experimental.vp.splat.v8i1(i1 undef, <8 x i1> undef, i32 undef)
1545   call <16 x i1> @llvm.experimental.vp.splat.v16i1(i1 undef, <16 x i1> undef, i32 undef)
1546   call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 undef, <2 x i1> undef, i32 undef)
1547   call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 undef, <4 x i1> undef, i32 undef)
1548   call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 undef, <8 x i1> undef, i32 undef)
1549   call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 undef, <16 x i1> undef, i32 undef)
1550   call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 undef, <2 x i1> undef, i32 undef)
1551   call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 undef, <4 x i1> undef, i32 undef)
1552   call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 undef, <8 x i1> undef, i32 undef)
1553   call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 undef, <16 x i1> undef, i32 undef)
1554   call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 undef, <2 x i1> undef, i32 undef)
1555   call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 undef, <4 x i1> undef, i32 undef)
1556   call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 undef, <8 x i1> undef, i32 undef)
1557   call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 undef, <16 x i1> undef, i32 undef)
1558   call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 undef, <2 x i1> undef, i32 undef)
1559   call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 undef, <4 x i1> undef, i32 undef)
1560   call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 undef, <8 x i1> undef, i32 undef)
1561   call <16 x i64> @llvm.experimental.vp.splat.v16i64(i64 undef, <16 x i1> undef, i32 undef)
1562   call <2 x bfloat> @llvm.experimental.vp.splat.v2bf16(bfloat undef, <2 x i1> undef, i32 undef)
1563   call <4 x bfloat> @llvm.experimental.vp.splat.v4bf16(bfloat undef, <4 x i1> undef, i32 undef)
1564   call <8 x bfloat> @llvm.experimental.vp.splat.v8bf16(bfloat undef, <8 x i1> undef, i32 undef)
1565   call <16 x bfloat> @llvm.experimental.vp.splat.v16bf16(bfloat undef, <16 x i1> undef, i32 undef)
1566   call <2 x half> @llvm.experimental.vp.splat.v2f16(half undef, <2 x i1> undef, i32 undef)
1567   call <4 x half> @llvm.experimental.vp.splat.v4f16(half undef, <4 x i1> undef, i32 undef)
1568   call <8 x half> @llvm.experimental.vp.splat.v8f16(half undef, <8 x i1> undef, i32 undef)
1569   call <16 x half> @llvm.experimental.vp.splat.v16f16(half undef, <16 x i1> undef, i32 undef)
1570   call <2 x float> @llvm.experimental.vp.splat.v2f32(float undef, <2 x i1> undef, i32 undef)
1571   call <4 x float> @llvm.experimental.vp.splat.v4f32(float undef, <4 x i1> undef, i32 undef)
1572   call <8 x float> @llvm.experimental.vp.splat.v8f32(float undef, <8 x i1> undef, i32 undef)
1573   call <16 x float> @llvm.experimental.vp.splat.v16f32(float undef, <16 x i1> undef, i32 undef)
1574   call <2 x double> @llvm.experimental.vp.splat.v2f64(double undef, <2 x i1> undef, i32 undef)
1575   call <4 x double> @llvm.experimental.vp.splat.v4f64(double undef, <4 x i1> undef, i32 undef)
1576   call <8 x double> @llvm.experimental.vp.splat.v8f64(double undef, <8 x i1> undef, i32 undef)
1577   call <16 x double> @llvm.experimental.vp.splat.v16f64(double undef, <16 x i1> undef, i32 undef)
1578   call <vscale x 2 x i1> @llvm.experimental.vp.splat.nxv2i1(i1 undef, <vscale x 2 x i1> undef, i32 undef)
1579   call <vscale x 4 x i1> @llvm.experimental.vp.splat.nxv4i1(i1 undef, <vscale x 4 x i1> undef, i32 undef)
1580   call <vscale x 8 x i1> @llvm.experimental.vp.splat.nxv8i1(i1 undef, <vscale x 8 x i1> undef, i32 undef)
1581   call <vscale x 16 x i1> @llvm.experimental.vp.splat.nxv16i1(i1 undef, <vscale x 16 x i1> undef, i32 undef)
1582   call <vscale x 2 x i8> @llvm.experimental.vp.splat.nxv2i8(i8 undef, <vscale x 2 x i1> undef, i32 undef)
1583   call <vscale x 4 x i8> @llvm.experimental.vp.splat.nxv4i8(i8 undef, <vscale x 4 x i1> undef, i32 undef)
1584   call <vscale x 8 x i8> @llvm.experimental.vp.splat.nxv8i8(i8 undef, <vscale x 8 x i1> undef, i32 undef)
1585   call <vscale x 16 x i8> @llvm.experimental.vp.splat.nxv16i8(i8 undef, <vscale x 16 x i1> undef, i32 undef)
1586   call <vscale x 2 x i16> @llvm.experimental.vp.splat.nxv2i16(i16 undef, <vscale x 2 x i1> undef, i32 undef)
1587   call <vscale x 4 x i16> @llvm.experimental.vp.splat.nxv4i16(i16 undef, <vscale x 4 x i1> undef, i32 undef)
1588   call <vscale x 8 x i16> @llvm.experimental.vp.splat.nxv8i16(i16 undef, <vscale x 8 x i1> undef, i32 undef)
1589   call <vscale x 16 x i16> @llvm.experimental.vp.splat.nxv16i16(i16 undef, <vscale x 16 x i1> undef, i32 undef)
1590   call <vscale x 2 x i32> @llvm.experimental.vp.splat.nxv2i32(i32 undef, <vscale x 2 x i1> undef, i32 undef)
1591   call <vscale x 4 x i32> @llvm.experimental.vp.splat.nxv4i32(i32 undef, <vscale x 4 x i1> undef, i32 undef)
1592   call <vscale x 8 x i32> @llvm.experimental.vp.splat.nxv8i32(i32 undef, <vscale x 8 x i1> undef, i32 undef)
1593   call <vscale x 16 x i32> @llvm.experimental.vp.splat.nxv16i32(i32 undef, <vscale x 16 x i1> undef, i32 undef)
1594   call <vscale x 2 x i64> @llvm.experimental.vp.splat.nxv2i64(i64 undef, <vscale x 2 x i1> undef, i32 undef)
1595   call <vscale x 4 x i64> @llvm.experimental.vp.splat.nxv4i64(i64 undef, <vscale x 4 x i1> undef, i32 undef)
1596   call <vscale x 8 x i64> @llvm.experimental.vp.splat.nxv8i64(i64 undef, <vscale x 8 x i1> undef, i32 undef)
1597   call <vscale x 16 x i64> @llvm.experimental.vp.splat.nxv16i64(i64 undef, <vscale x 16 x i1> undef, i32 undef)
1598   call <vscale x 2 x bfloat> @llvm.experimental.vp.splat.nxv2bf16(bfloat undef, <vscale x 2 x i1> undef, i32 undef)
1599   call <vscale x 4 x bfloat> @llvm.experimental.vp.splat.nxv4bf16(bfloat undef, <vscale x 4 x i1> undef, i32 undef)
1600   call <vscale x 8 x bfloat> @llvm.experimental.vp.splat.nxv8bf16(bfloat undef, <vscale x 8 x i1> undef, i32 undef)
1601   call <vscale x 16 x bfloat> @llvm.experimental.vp.splat.nxv16bf16(bfloat undef, <vscale x 16 x i1> undef, i32 undef)
1602   call <vscale x 2 x half> @llvm.experimental.vp.splat.nxv2f16(half undef, <vscale x 2 x i1> undef, i32 undef)
1603   call <vscale x 4 x half> @llvm.experimental.vp.splat.nxv4f16(half undef, <vscale x 4 x i1> undef, i32 undef)
1604   call <vscale x 8 x half> @llvm.experimental.vp.splat.nxv8f16(half undef, <vscale x 8 x i1> undef, i32 undef)
1605   call <vscale x 16 x half> @llvm.experimental.vp.splat.nxv16f16(half undef, <vscale x 16 x i1> undef, i32 undef)
1606   call <vscale x 2 x float> @llvm.experimental.vp.splat.nxv2f32(float undef, <vscale x 2 x i1> undef, i32 undef)
1607   call <vscale x 4 x float> @llvm.experimental.vp.splat.nxv4f32(float undef, <vscale x 4 x i1> undef, i32 undef)
1608   call <vscale x 8 x float> @llvm.experimental.vp.splat.nxv8f32(float undef, <vscale x 8 x i1> undef, i32 undef)
1609   call <vscale x 16 x float> @llvm.experimental.vp.splat.nxv16f32(float undef, <vscale x 16 x i1> undef, i32 undef)
1610   call <vscale x 2 x double> @llvm.experimental.vp.splat.nxv2f64(double undef, <vscale x 2 x i1> undef, i32 undef)
1611   call <vscale x 4 x double> @llvm.experimental.vp.splat.nxv4f64(double undef, <vscale x 4 x i1> undef, i32 undef)
1612   call <vscale x 8 x double> @llvm.experimental.vp.splat.nxv8f64(double undef, <vscale x 8 x i1> undef, i32 undef)
1613   call <vscale x 16 x double> @llvm.experimental.vp.splat.nxv16f64(double undef, <vscale x 16 x i1> undef, i32 undef)
1614   ret void
1615 }
1616
1617 declare <2 x i8> @llvm.vp.add.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
1618 declare <4 x i8> @llvm.vp.add.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
1619 declare <8 x i8> @llvm.vp.add.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
1620 declare <16 x i8> @llvm.vp.add.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
1621 declare <2 x i64> @llvm.vp.add.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
1622 declare <4 x i64> @llvm.vp.add.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
1623 declare <8 x i64> @llvm.vp.add.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
1624 declare <16 x i64> @llvm.vp.add.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
1625 declare <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
1626 declare <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
1627 declare <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
1628 declare <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
1629 declare <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1630 declare <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1631 declare <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
1632 declare <vscale x 16 x i64> @llvm.vp.add.nxv16i64(<vscale x 16 x i64>, <vscale x 16 x i64>, <vscale x 16 x i1>, i32)
1633
1634 declare <2 x i8> @llvm.vp.abs.v2i8(<2 x i8>, i1, <2 x i1>, i32)
1635 declare <4 x i8> @llvm.vp.abs.v4i8(<4 x i8>, i1, <4 x i1>, i32)
1636 declare <8 x i8> @llvm.vp.abs.v8i8(<8 x i8>, i1, <8 x i1>, i32)
1637 declare <16 x i8> @llvm.vp.abs.v16i8(<16 x i8>, i1, <16 x i1>, i32)
1638 declare <2 x i64> @llvm.vp.abs.v2i64(<2 x i64>, i1, <2 x i1>, i32)
1639 declare <4 x i64> @llvm.vp.abs.v4i64(<4 x i64>, i1, <4 x i1>, i32)
1640 declare <8 x i64> @llvm.vp.abs.v8i64(<8 x i64>, i1, <8 x i1>, i32)
1641 declare <16 x i64> @llvm.vp.abs.v16i64(<16 x i64>, i1, <16 x i1>, i32)
1642 declare <vscale x 2 x i8> @llvm.vp.abs.nxv2i8(<vscale x 2 x i8>, i1, <vscale x 2 x i1>, i32)
1643 declare <vscale x 4 x i8> @llvm.vp.abs.nxv4i8(<vscale x 4 x i8>, i1, <vscale x 4 x i1>, i32)
1644 declare <vscale x 8 x i8> @llvm.vp.abs.nxv8i8(<vscale x 8 x i8>, i1, <vscale x 8 x i1>, i32)
1645 declare <vscale x 16 x i8> @llvm.vp.abs.nxv16i8(<vscale x 16 x i8>, i1, <vscale x 16 x i1>, i32)
1646 declare <vscale x 2 x i64> @llvm.vp.abs.nxv2i64(<vscale x 2 x i64>, i1, <vscale x 2 x i1>, i32)
1647 declare <vscale x 4 x i64> @llvm.vp.abs.nxv4i64(<vscale x 4 x i64>, i1, <vscale x 4 x i1>, i32)
1648 declare <vscale x 8 x i64> @llvm.vp.abs.nxv8i64(<vscale x 8 x i64>, i1, <vscale x 8 x i1>, i32)
1649 declare <vscale x 16 x i64> @llvm.vp.abs.nxv16i64(<vscale x 16 x i64>, i1, <vscale x 16 x i1>, i32)
1650
1651 declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1)
1652 declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
1653 declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1)
1654 declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
1655 declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)
1656 declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
1657 declare <8 x i64> @llvm.abs.v8i64(<8 x i64>, i1)
1658 declare <16 x i64> @llvm.abs.v16i64(<16 x i64>, i1)
1659 declare <vscale x 2 x i8> @llvm.abs.nxv2i8(<vscale x 2 x i8>, i1)
1660 declare <vscale x 4 x i8> @llvm.abs.nxv4i8(<vscale x 4 x i8>, i1)
1661 declare <vscale x 8 x i8> @llvm.abs.nxv8i8(<vscale x 8 x i8>, i1)
1662 declare <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8>, i1)
1663 declare <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64>, i1)
1664 declare <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64>, i1)
1665 declare <vscale x 8 x i64> @llvm.abs.nxv8i64(<vscale x 8 x i64>, i1)
1666 declare <vscale x 16 x i64> @llvm.abs.nxv16i64(<vscale x 16 x i64>, i1)
1667
1668 declare <2 x i8> @llvm.vp.load.v2i8(ptr, <2 x i1>, i32)
1669 declare <4 x i8> @llvm.vp.load.v4i8(ptr, <4 x i1>, i32)
1670 declare <8 x i8> @llvm.vp.load.v8i8(ptr, <8 x i1>, i32)
1671 declare <16 x i8> @llvm.vp.load.v16i8(ptr, <16 x i1>, i32)
1672 declare <2 x i64> @llvm.vp.load.v2i64(ptr, <2 x i1>, i32)
1673 declare <4 x i64> @llvm.vp.load.v4i64(ptr, <4 x i1>, i32)
1674 declare <8 x i64> @llvm.vp.load.v8i64(ptr, <8 x i1>, i32)
1675 declare <16 x i64> @llvm.vp.load.v16i64(ptr, <16 x i1>, i32)
1676 declare <vscale x 2 x i8> @llvm.vp.load.nxv2i8(ptr, <vscale x 2 x i1>, i32)
1677 declare <vscale x 4 x i8> @llvm.vp.load.nxv4i8(ptr, <vscale x 4 x i1>, i32)
1678 declare <vscale x 8 x i8> @llvm.vp.load.nxv8i8(ptr, <vscale x 8 x i1>, i32)
1679 declare <vscale x 16 x i8> @llvm.vp.load.nxv16i8(ptr, <vscale x 16 x i1>, i32)
1680 declare <vscale x 2 x i64> @llvm.vp.load.nxv2i64(ptr, <vscale x 2 x i1>, i32)
1681 declare <vscale x 4 x i64> @llvm.vp.load.nxv4i64(ptr, <vscale x 4 x i1>, i32)
1682 declare <vscale x 8 x i64> @llvm.vp.load.nxv8i64(ptr, <vscale x 8 x i1>, i32)
1683 declare <vscale x 16 x i64> @llvm.vp.load.nxv16i64(ptr, <vscale x 16 x i1>, i32)
1684
1685 declare void @llvm.vp.store.v2i8(<2 x i8>, ptr, <2 x i1>, i32)
1686 declare void @llvm.vp.store.v4i8(<4 x i8>, ptr, <4 x i1>, i32)
1687 declare void @llvm.vp.store.v8i8(<8 x i8>, ptr, <8 x i1>, i32)
1688 declare void @llvm.vp.store.v16i8(<16 x i8>, ptr, <16 x i1>, i32)
1689 declare void @llvm.vp.store.v2i64(<2 x i64>, ptr, <2 x i1>, i32)
1690 declare void @llvm.vp.store.v4i64(<4 x i64>, ptr, <4 x i1>, i32)
1691 declare void @llvm.vp.store.v8i64(<8 x i64>, ptr, <8 x i1>, i32)
1692 declare void @llvm.vp.store.v16i64(<16 x i64>, ptr, <16 x i1>, i32)
1693 declare void @llvm.vp.store.nxv2i8(<vscale x 2 x i8>, ptr, <vscale x 2 x i1>, i32)
1694 declare void @llvm.vp.store.nxv4i8(<vscale x 4 x i8>, ptr, <vscale x 4 x i1>, i32)
1695 declare void @llvm.vp.store.nxv8i8(<vscale x 8 x i8>, ptr, <vscale x 8 x i1>, i32)
1696 declare void @llvm.vp.store.nxv16i8(<vscale x 16 x i8>, ptr, <vscale x 16 x i1>, i32)
1697 declare void @llvm.vp.store.nxv2i64(<vscale x 2 x i64>, ptr, <vscale x 2 x i1>, i32)
1698 declare void @llvm.vp.store.nxv4i64(<vscale x 4 x i64>, ptr, <vscale x 4 x i1>, i32)
1699 declare void @llvm.vp.store.nxv8i64(<vscale x 8 x i64>, ptr, <vscale x 8 x i1>, i32)
1700 declare void @llvm.vp.store.nxv16i64(<vscale x 16 x i64>, ptr, <vscale x 16 x i1>, i32)
1701
1702 declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.i64(ptr, i64, <2 x i1>, i32)
1703 declare <4 x i8> @llvm.experimental.vp.strided.load.v4i8.i64(ptr, i64, <4 x i1>, i32)
1704 declare <8 x i8> @llvm.experimental.vp.strided.load.v8i8.i64(ptr, i64, <8 x i1>, i32)
1705 declare <16 x i8> @llvm.experimental.vp.strided.load.v16i8.i64(ptr, i64, <16 x i1>, i32)
1706 declare <2 x i64> @llvm.experimental.vp.strided.load.v2i64.i64(ptr, i64, <2 x i1>, i32)
1707 declare <4 x i64> @llvm.experimental.vp.strided.load.v4i64.i64(ptr, i64, <4 x i1>, i32)
1708 declare <8 x i64> @llvm.experimental.vp.strided.load.v8i64.i64(ptr, i64, <8 x i1>, i32)
1709 declare <16 x i64> @llvm.experimental.vp.strided.load.v16i64.i64(ptr, i64, <16 x i1>, i32)
1710 declare <vscale x 2 x i8> @llvm.experimental.vp.strided.load.nxv2i8.i64(ptr, i64, <vscale x 2 x i1>, i32)
1711 declare <vscale x 4 x i8> @llvm.experimental.vp.strided.load.nxv4i8.i64(ptr, i64, <vscale x 4 x i1>, i32)
1712 declare <vscale x 8 x i8> @llvm.experimental.vp.strided.load.nxv8i8.i64(ptr, i64, <vscale x 8 x i1>, i32)
1713 declare <vscale x 16 x i8> @llvm.experimental.vp.strided.load.nxv16i8.i64(ptr, i64, <vscale x 16 x i1>, i32)
1714 declare <vscale x 2 x i64> @llvm.experimental.vp.strided.load.nxv2i64.i64(ptr, i64, <vscale x 2 x i1>, i32)
1715 declare <vscale x 4 x i64> @llvm.experimental.vp.strided.load.nxv4i64.i64(ptr, i64, <vscale x 4 x i1>, i32)
1716 declare <vscale x 8 x i64> @llvm.experimental.vp.strided.load.nxv8i64.i64(ptr, i64, <vscale x 8 x i1>, i32)
1717 declare <vscale x 16 x i64> @llvm.experimental.vp.strided.load.nxv16i64.i64(ptr, i64, <vscale x 16 x i1>, i32)
1718
1719 declare void @llvm.experimental.vp.strided.store.v2i8.i64(<2 x i8>, ptr, i64, <2 x i1>, i32)
1720 declare void @llvm.experimental.vp.strided.store.v4i8.i64(<4 x i8>, ptr, i64, <4 x i1>, i32)
1721 declare void @llvm.experimental.vp.strided.store.v8i8.i64(<8 x i8>, ptr, i64, <8 x i1>, i32)
1722 declare void @llvm.experimental.vp.strided.store.v16i8.i64(<16 x i8>, ptr, i64, <16 x i1>, i32)
1723 declare void @llvm.experimental.vp.strided.store.v2i64.i64(<2 x i64>, ptr, i64, <2 x i1>, i32)
1724 declare void @llvm.experimental.vp.strided.store.v4i64.i64(<4 x i64>, ptr, i64, <4 x i1>, i32)
1725 declare void @llvm.experimental.vp.strided.store.v8i64.i64(<8 x i64>, ptr, i64, <8 x i1>, i32)
1726 declare void @llvm.experimental.vp.strided.store.v16i64.i64(<16 x i64>, ptr, i64, <16 x i1>, i32)
1727 declare void @llvm.experimental.vp.strided.store.nxv2i8.i64(<vscale x 2 x i8>, ptr, i64, <vscale x 2 x i1>, i32)
1728 declare void @llvm.experimental.vp.strided.store.nxv4i8.i64(<vscale x 4 x i8>, ptr, i64, <vscale x 4 x i1>, i32)
1729 declare void @llvm.experimental.vp.strided.store.nxv8i8.i64(<vscale x 8 x i8>, ptr, i64, <vscale x 8 x i1>, i32)
1730 declare void @llvm.experimental.vp.strided.store.nxv16i8.i64(<vscale x 16 x i8>, ptr, i64, <vscale x 16 x i1>, i32)
1731 declare void @llvm.experimental.vp.strided.store.nxv2i64.i64(<vscale x 2 x i64>, ptr, i64, <vscale x 2 x i1>, i32)
1732 declare void @llvm.experimental.vp.strided.store.nxv4i64.i64(<vscale x 4 x i64>, ptr, i64, <vscale x 4 x i1>, i32)
1733 declare void @llvm.experimental.vp.strided.store.nxv8i64.i64(<vscale x 8 x i64>, ptr, i64, <vscale x 8 x i1>, i32)
1734 declare void @llvm.experimental.vp.strided.store.nxv16i64.i64(<vscale x 16 x i64>, ptr, i64, <vscale x 16 x i1>, i32)
1735
1736 declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
1737 declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
1738 declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
1739 declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
1740 declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
1741 declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
1742 declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
1743 declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
1744 declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>)
1745 declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>)
1746 declare i8 @llvm.vector.reduce.add.nxv8i8(<vscale x 8 x i8>)
1747 declare i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8>)
1748 declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
1749 declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
1750 declare i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64>)
1751 declare i64 @llvm.vector.reduce.add.nxv16i64(<vscale x 16 x i64>)
1752
1753 declare i8 @llvm.vp.reduce.add.v2i8(i8, <2 x i8>, <2 x i1>, i32)
1754 declare i8 @llvm.vp.reduce.add.v4i8(i8, <4 x i8>, <4 x i1>, i32)
1755 declare i8 @llvm.vp.reduce.add.v8i8(i8, <8 x i8>, <8 x i1>, i32)
1756 declare i8 @llvm.vp.reduce.add.v16i8(i8, <16 x i8>, <16 x i1>, i32)
1757 declare i64 @llvm.vp.reduce.add.v2i64(i64, <2 x i64>, <2 x i1>, i32)
1758 declare i64 @llvm.vp.reduce.add.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1759 declare i64 @llvm.vp.reduce.add.v8i64(i64, <8 x i64>, <8 x i1>, i32)
1760 declare i64 @llvm.vp.reduce.add.v16i64(i64, <16 x i64>, <16 x i1>, i32)
1761 declare i8 @llvm.vp.reduce.add.nxv2i8(i8, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
1762 declare i8 @llvm.vp.reduce.add.nxv4i8(i8, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
1763 declare i8 @llvm.vp.reduce.add.nxv8i8(i8, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
1764 declare i8 @llvm.vp.reduce.add.nxv16i8(i8, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
1765 declare i64 @llvm.vp.reduce.add.nxv2i64(i64, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1766 declare i64 @llvm.vp.reduce.add.nxv4i64(i64, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1767 declare i64 @llvm.vp.reduce.add.nxv8i64(i64, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
1768 declare i64 @llvm.vp.reduce.add.nxv16i64(i64, <vscale x 16 x i64>, <vscale x 16 x i1>, i32)
1769
1770 declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
1771 declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
1772 declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
1773 declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
1774 declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
1775 declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
1776 declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>)
1777 declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>)
1778 declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>)
1779 declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
1780 declare float @llvm.vector.reduce.fadd.nxv8f32(float, <vscale x 8 x float>)
1781 declare float @llvm.vector.reduce.fadd.nxv16f32(float, <vscale x 16 x float>)
1782 declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
1783 declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
1784 declare double @llvm.vector.reduce.fadd.nxv8f64(double, <vscale x 8 x double>)
1785 declare double @llvm.vector.reduce.fadd.nxv16f64(double, <vscale x 16 x double>)
1786
1787 declare float @llvm.vp.reduce.fadd.v2f32(float, <2 x float>, <2 x i1>, i32)
1788 declare float @llvm.vp.reduce.fadd.v4f32(float, <4 x float>, <4 x i1>, i32)
1789 declare float @llvm.vp.reduce.fadd.v8f32(float, <8 x float>, <8 x i1>, i32)
1790 declare float @llvm.vp.reduce.fadd.v16f32(float, <16 x float>, <16 x i1>, i32)
1791 declare double @llvm.vp.reduce.fadd.v2f64(double, <2 x double>, <2 x i1>, i32)
1792 declare double @llvm.vp.reduce.fadd.v4f64(double, <4 x double>, <4 x i1>, i32)
1793 declare double @llvm.vp.reduce.fadd.v8f64(double, <8 x double>, <8 x i1>, i32)
1794 declare double @llvm.vp.reduce.fadd.v16f64(double, <16 x double>, <16 x i1>, i32)
1795 declare float @llvm.vp.reduce.fadd.nxv2f32(float, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
1796 declare float @llvm.vp.reduce.fadd.nxv4f32(float, <vscale x 4 x float>, <vscale x 4 x i1>, i32)
1797 declare float @llvm.vp.reduce.fadd.nxv8f32(float, <vscale x 8 x float>, <vscale x 8 x i1>, i32)
1798 declare float @llvm.vp.reduce.fadd.nxv16f32(float, <vscale x 16 x float>, <vscale x 16 x i1>, i32)
1799 declare double @llvm.vp.reduce.fadd.nxv2f64(double, <vscale x 2 x double>, <vscale x 2 x i1>, i32)
1800 declare double @llvm.vp.reduce.fadd.nxv4f64(double, <vscale x 4 x double>, <vscale x 4 x i1>, i32)
1801 declare double @llvm.vp.reduce.fadd.nxv8f64(double, <vscale x 8 x double>, <vscale x 8 x i1>, i32)
1802 declare double @llvm.vp.reduce.fadd.nxv16f64(double, <vscale x 16 x double>, <vscale x 16 x i1>, i32)
1803
1804 declare <vscale x 1 x i32> @llvm.fshr.nxv4i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
1805 declare <vscale x 1 x i32> @llvm.fshl.nxv4i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c)
1806
1807 declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
1808 declare <vscale x 4 x float> @llvm.powi.nxv4f32.i32(<vscale x 4 x float>, i32)
1809 declare <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float>)
1810
1811 declare <2 x i8> @llvm.vp.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>, <2 x i1>, i32)
1812 declare <4 x i8> @llvm.vp.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>, <4 x i1>, i32)
1813 declare <8 x i8> @llvm.vp.fshr.v8i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i1>, i32)
1814 declare <16 x i8> @llvm.vp.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i1>, i32)
1815 declare <vscale x 1 x i8> @llvm.vp.fshr.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
1816 declare <vscale x 2 x i8> @llvm.vp.fshr.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
1817 declare <vscale x 4 x i8> @llvm.vp.fshr.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
1818 declare <vscale x 8 x i8> @llvm.vp.fshr.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
1819 declare <vscale x 16 x i8> @llvm.vp.fshr.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
1820 declare <vscale x 32 x i8> @llvm.vp.fshr.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
1821 declare <vscale x 64 x i8> @llvm.vp.fshr.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
1822 declare <2 x i16> @llvm.vp.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>, <2 x i1>, i32)
1823 declare <4 x i16> @llvm.vp.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i1>, i32)
1824 declare <8 x i16> @llvm.vp.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i1>, i32)
1825 declare <16 x i16> @llvm.vp.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>, <16 x i1>, i32)
1826 declare <vscale x 1 x i16> @llvm.vp.fshr.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
1827 declare <vscale x 2 x i16> @llvm.vp.fshr.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
1828 declare <vscale x 4 x i16> @llvm.vp.fshr.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
1829 declare <vscale x 8 x i16> @llvm.vp.fshr.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
1830 declare <vscale x 16 x i16> @llvm.vp.fshr.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
1831 declare <vscale x 32 x i16> @llvm.vp.fshr.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
1832 declare <2 x i32> @llvm.vp.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i1>, i32)
1833 declare <4 x i32> @llvm.vp.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i1>, i32)
1834 declare <8 x i32> @llvm.vp.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>, <8 x i1>, i32)
1835 declare <16 x i32> @llvm.vp.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>, <16 x i1>, i32)
1836 declare <vscale x 1 x i32> @llvm.vp.fshr.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1837 declare <vscale x 2 x i32> @llvm.vp.fshr.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1838 declare <vscale x 4 x i32> @llvm.vp.fshr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1839 declare <vscale x 8 x i32> @llvm.vp.fshr.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1840 declare <vscale x 16 x i32> @llvm.vp.fshr.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1841 declare <2 x i64> @llvm.vp.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i1>, i32)
1842 declare <4 x i64> @llvm.vp.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>, <4 x i1>, i32)
1843 declare <8 x i64> @llvm.vp.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>, <8 x i1>, i32)
1844 declare <16 x i64> @llvm.vp.fshr.v16i64(<16 x i64>, <16 x i64>, <16 x i64>, <16 x i1>, i32)
1845 declare <vscale x 1 x i64> @llvm.vp.fshr.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1846 declare <vscale x 2 x i64> @llvm.vp.fshr.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1847 declare <vscale x 4 x i64> @llvm.vp.fshr.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1848 declare <vscale x 8 x i64> @llvm.vp.fshr.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
1849
1850 declare <2 x i8> @llvm.vp.fshl.v2i8(<2 x i8>, <2 x i8>, <2 x i8>, <2 x i1>, i32)
1851 declare <4 x i8> @llvm.vp.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>, <4 x i1>, i32)
1852 declare <8 x i8> @llvm.vp.fshl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i1>, i32)
1853 declare <16 x i8> @llvm.vp.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i1>, i32)
1854 declare <vscale x 1 x i8> @llvm.vp.fshl.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
1855 declare <vscale x 2 x i8> @llvm.vp.fshl.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
1856 declare <vscale x 4 x i8> @llvm.vp.fshl.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
1857 declare <vscale x 8 x i8> @llvm.vp.fshl.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
1858 declare <vscale x 16 x i8> @llvm.vp.fshl.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
1859 declare <vscale x 32 x i8> @llvm.vp.fshl.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
1860 declare <vscale x 64 x i8> @llvm.vp.fshl.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
1861 declare <2 x i16> @llvm.vp.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>, <2 x i1>, i32)
1862 declare <4 x i16> @llvm.vp.fshl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i1>, i32)
1863 declare <8 x i16> @llvm.vp.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i1>, i32)
1864 declare <16 x i16> @llvm.vp.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>, <16 x i1>, i32)
1865 declare <vscale x 1 x i16> @llvm.vp.fshl.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
1866 declare <vscale x 2 x i16> @llvm.vp.fshl.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
1867 declare <vscale x 4 x i16> @llvm.vp.fshl.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
1868 declare <vscale x 8 x i16> @llvm.vp.fshl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
1869 declare <vscale x 16 x i16> @llvm.vp.fshl.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
1870 declare <vscale x 32 x i16> @llvm.vp.fshl.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
1871 declare <2 x i32> @llvm.vp.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i1>, i32)
1872 declare <4 x i32> @llvm.vp.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i1>, i32)
1873 declare <8 x i32> @llvm.vp.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>, <8 x i1>, i32)
1874 declare <16 x i32> @llvm.vp.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>, <16 x i1>, i32)
1875 declare <vscale x 1 x i32> @llvm.vp.fshl.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1876 declare <vscale x 2 x i32> @llvm.vp.fshl.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1877 declare <vscale x 4 x i32> @llvm.vp.fshl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1878 declare <vscale x 8 x i32> @llvm.vp.fshl.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1879 declare <vscale x 16 x i32> @llvm.vp.fshl.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1880 declare <2 x i64> @llvm.vp.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i1>, i32)
1881 declare <4 x i64> @llvm.vp.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>, <4 x i1>, i32)
1882 declare <8 x i64> @llvm.vp.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>, <8 x i1>, i32)
1883 declare <16 x i64> @llvm.vp.fshl.v16i64(<16 x i64>, <16 x i64>, <16 x i64>, <16 x i1>, i32)
1884 declare <vscale x 1 x i64> @llvm.vp.fshl.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1885 declare <vscale x 2 x i64> @llvm.vp.fshl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1886 declare <vscale x 4 x i64> @llvm.vp.fshl.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1887 declare <vscale x 8 x i64> @llvm.vp.fshl.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)