llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d | FileCheck %s --check-prefixes=CHECK,NOZVBB
   3 ; Vector ctpop exists only under ZVBB
   4 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zvbb | FileCheck %s --check-prefixes=CHECK,ZVBB
   5
   6 define void @bswap() {
   7 ; CHECK-LABEL: 'bswap'
   8 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %1 = call i16 @llvm.bswap.i16(i16 undef)
   9 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> undef)
  10 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)
  11 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> undef)
  12 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef)
  13 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16> undef)
  14 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16> undef)
  15 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16> undef)
  16 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16> undef)
  17 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = call <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16> undef)
  18 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %11 = call i32 @llvm.bswap.i32(i32 undef)
  19 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %12 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> undef)
  20 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %13 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> undef)
  21 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> undef)
  22 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> undef)
  23 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %16 = call <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32> undef)
  24 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %17 = call <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32> undef)
  25 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %18 = call <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32> undef)
  26 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %19 = call <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32> undef)
  27 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %20 = call <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32> undef)
  28 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %21 = call i64 @llvm.bswap.i64(i64 undef)
  29 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %22 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
  30 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %23 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
  31 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %24 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> undef)
  32 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %25 = call <16 x i64> @llvm.bswap.v16i64(<16 x i64> undef)
  33 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %26 = call <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64> undef)
  34 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %27 = call <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64> undef)
  35 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %28 = call <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64> undef)
  36 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %29 = call <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64> undef)
  37 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %30 = call <vscale x 16 x i64> @llvm.bswap.nxv16i64(<vscale x 16 x i64> undef)
  38 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  39 ;
  40   call i16 @llvm.bswap.i16(i16 undef)
  41   call <2 x i16> @llvm.bswap.v2i16(<2 x i16> undef)
  42   call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)
  43   call <8 x i16> @llvm.bswap.v8i16(<8 x i16> undef)
  44   call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef)
  45   call <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16> undef)
  46   call <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16> undef)
  47   call <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16> undef)
  48   call <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16> undef)
  49   call <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16> undef)
  50   call i32 @llvm.bswap.i32(i32 undef)
  51   call <2 x i32> @llvm.bswap.v2i32(<2 x i32> undef)
  52   call <4 x i32> @llvm.bswap.v4i32(<4 x i32> undef)
  53   call <8 x i32> @llvm.bswap.v8i32(<8 x i32> undef)
  54   call <16 x i32> @llvm.bswap.v16i32(<16 x i32> undef)
  55   call <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32> undef)
  56   call <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32> undef)
  57   call <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32> undef)
  58   call <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32> undef)
  59   call <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32> undef)
  60   call i64 @llvm.bswap.i64(i64 undef)
  61   call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
  62   call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
  63   call <8 x i64> @llvm.bswap.v8i64(<8 x i64> undef)
  64   call <16 x i64> @llvm.bswap.v16i64(<16 x i64> undef)
  65   call <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64> undef)
  66   call <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64> undef)
  67   call <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64> undef)
  68   call <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64> undef)
  69   call <vscale x 16 x i64> @llvm.bswap.nxv16i64(<vscale x 16 x i64> undef)
  70   ret void
  71 }
  72
  73 define void @bitreverse() {
  74 ; CHECK-LABEL: 'bitreverse'
  75 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %1 = call i8 @llvm.bitreverse.i8(i8 undef)
  76 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %2 = call <2 x i8> @llvm.bitreverse.v2i8(<2 x i8> undef)
  77 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %3 = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> undef)
  78 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %4 = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> undef)
  79 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %5 = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> undef)
  80 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %6 = call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> undef)
  81 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %7 = call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> undef)
  82 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %8 = call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> undef)
  83 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %9 = call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> undef)
  84 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %10 = call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> undef)
  85 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %11 = call i16 @llvm.bitreverse.i16(i16 undef)
  86 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %12 = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
  87 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %13 = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> undef)
  88 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %14 = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> undef)
  89 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %15 = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> undef)
  90 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %16 = call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> undef)
  91 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %17 = call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> undef)
  92 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %18 = call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> undef)
  93 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %19 = call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> undef)
  94 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %20 = call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> undef)
  95 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %21 = call i32 @llvm.bitreverse.i32(i32 undef)
  96 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %22 = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> undef)
  97 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %23 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> undef)
  98 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %24 = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> undef)
  99 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %25 = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> undef)
 100 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %26 = call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> undef)
 101 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %27 = call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> undef)
 102 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %28 = call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> undef)
 103 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %29 = call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> undef)
 104 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %30 = call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> undef)
 105 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %31 = call i64 @llvm.bitreverse.i64(i64 undef)
 106 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %32 = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> undef)
 107 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %33 = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> undef)
 108 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %34 = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> undef)
 109 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %35 = call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> undef)
 110 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %36 = call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> undef)
 111 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %37 = call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> undef)
 112 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %38 = call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> undef)
 113 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %39 = call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> undef)
 114 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %40 = call <vscale x 16 x i64> @llvm.bitreverse.nxv16i64(<vscale x 16 x i64> undef)
 115 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 116 ;
 117   call i8 @llvm.bitreverse.i8(i8 undef)
 118   call <2 x i8> @llvm.bitreverse.v2i8(<2 x i8> undef)
 119   call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> undef)
 120   call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> undef)
 121   call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> undef)
 122   call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> undef)
 123   call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> undef)
 124   call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> undef)
 125   call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> undef)
 126   call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> undef)
 127   call i16 @llvm.bitreverse.i16(i16 undef)
 128   call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
 129   call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> undef)
 130   call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> undef)
 131   call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> undef)
 132   call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> undef)
 133   call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> undef)
 134   call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> undef)
 135   call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> undef)
 136   call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> undef)
 137   call i32 @llvm.bitreverse.i32(i32 undef)
 138   call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> undef)
 139   call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> undef)
 140   call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> undef)
 141   call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> undef)
 142   call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> undef)
 143   call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> undef)
 144   call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> undef)
 145   call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> undef)
 146   call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> undef)
 147   call i64 @llvm.bitreverse.i64(i64 undef)
 148   call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> undef)
 149   call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> undef)
 150   call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> undef)
 151   call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> undef)
 152   call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> undef)
 153   call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> undef)
 154   call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> undef)
 155   call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> undef)
 156   call <vscale x 16 x i64> @llvm.bitreverse.nxv16i64(<vscale x 16 x i64> undef)
 157   ret void
 158 }
 159
 160 define void @ctlz() {
 161 ; NOZVBB-LABEL: 'ctlz'
 162 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %1 = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> undef, i1 false)
 163 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %2 = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> undef, i1 false)
 164 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %3 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> undef, i1 false)
 165 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %4 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> undef, i1 false)
 166 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %5 = call <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false)
 167 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %6 = call <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false)
 168 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %7 = call <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false)
 169 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %8 = call <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false)
 170 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %9 = call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false)
 171 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %10 = call <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false)
 172 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %11 = call <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false)
 173 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %12 = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> undef, i1 false)
 174 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %13 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> undef, i1 false)
 175 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %14 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> undef, i1 false)
 176 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %15 = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false)
 177 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %16 = call <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false)
 178 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %17 = call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false)
 179 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false)
 180 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false)
 181 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false)
 182 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %21 = call <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false)
 183 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %22 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> undef, i1 false)
 184 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %23 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> undef, i1 false)
 185 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %24 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false)
 186 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %25 = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false)
 187 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %26 = call <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false)
 188 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %27 = call <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false)
 189 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false)
 190 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false)
 191 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false)
 192 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %31 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> undef, i1 false)
 193 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %32 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false)
 194 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %33 = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false)
 195 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %34 = call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false)
 196 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %35 = call <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false)
 197 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %36 = call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false)
 198 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %37 = call <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false)
 199 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %38 = call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false)
 200 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %39 = call <vscale x 16 x i64> @llvm.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false)
 201 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 202 ;
 203 ; ZVBB-LABEL: 'ctlz'
 204 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> undef, i1 false)
 205 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> undef, i1 false)
 206 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> undef, i1 false)
 207 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> undef, i1 false)
 208 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false)
 209 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false)
 210 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false)
 211 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false)
 212 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false)
 213 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false)
 214 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false)
 215 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> undef, i1 false)
 216 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> undef, i1 false)
 217 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> undef, i1 false)
 218 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false)
 219 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false)
 220 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false)
 221 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false)
 222 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false)
 223 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false)
 224 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %21 = call <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false)
 225 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> undef, i1 false)
 226 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> undef, i1 false)
 227 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false)
 228 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false)
 229 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false)
 230 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false)
 231 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false)
 232 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false)
 233 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false)
 234 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %31 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> undef, i1 false)
 235 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %32 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false)
 236 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %33 = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false)
 237 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false)
 238 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %35 = call <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false)
 239 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %36 = call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false)
 240 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %37 = call <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false)
 241 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %38 = call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false)
 242 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %39 = call <vscale x 16 x i64> @llvm.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false)
 243 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 244 ;
 245   call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> undef, i1 false)
 246   call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> undef, i1 false)
 247   call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> undef, i1 false)
 248   call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> undef, i1 false)
 249   call <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false)
 250   call <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false)
 251   call <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false)
 252   call <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false)
 253   call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false)
 254   call <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false)
 255   call <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false)
 256   call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> undef, i1 false)
 257   call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> undef, i1 false)
 258   call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> undef, i1 false)
 259   call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> undef, i1 false)
 260   call <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false)
 261   call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false)
 262   call <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false)
 263   call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false)
 264   call <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false)
 265   call <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false)
 266   call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> undef, i1 false)
 267   call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> undef, i1 false)
 268   call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> undef, i1 false)
 269   call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> undef, i1 false)
 270   call <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false)
 271   call <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false)
 272   call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false)
 273   call <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false)
 274   call <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false)
 275   call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> undef, i1 false)
 276   call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> undef, i1 false)
 277   call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> undef, i1 false)
 278   call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> undef, i1 false)
 279   call <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false)
 280   call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false)
 281   call <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false)
 282   call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false)
 283   call <vscale x 16 x i64> @llvm.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false)
 284   ret void
 285 }
 286
 287 define void @cttz() {
 288 ; NOZVBB-LABEL: 'cttz'
 289 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %1 = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> undef, i1 false)
 290 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %2 = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> undef, i1 false)
 291 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> undef, i1 false)
 292 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> undef, i1 false)
 293 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = call <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false)
 294 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = call <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false)
 295 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %7 = call <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false)
 296 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %8 = call <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false)
 297 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %9 = call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false)
 298 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %10 = call <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false)
 299 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %11 = call <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false)
 300 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> undef, i1 false)
 301 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %13 = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> undef, i1 false)
 302 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> undef, i1 false)
 303 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %15 = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false)
 304 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %16 = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false)
 305 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %17 = call <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false)
 306 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %18 = call <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false)
 307 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %19 = call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false)
 308 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %20 = call <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false)
 309 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %21 = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false)
 310 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %22 = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> undef, i1 false)
 311 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %23 = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> undef, i1 false)
 312 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %24 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false)
 313 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %25 = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false)
 314 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %26 = call <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false)
 315 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %27 = call <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false)
 316 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %28 = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false)
 317 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %29 = call <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false)
 318 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %30 = call <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false)
 319 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %31 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> undef, i1 false)
 320 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %32 = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false)
 321 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %33 = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false)
 322 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %34 = call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false)
 323 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %35 = call <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false)
 324 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %36 = call <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false)
 325 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %37 = call <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false)
 326 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %38 = call <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false)
 327 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %39 = call <vscale x 16 x i64> @llvm.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false)
 328 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 329 ;
 330 ; ZVBB-LABEL: 'cttz'
 331 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> undef, i1 false)
 332 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> undef, i1 false)
 333 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> undef, i1 false)
 334 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> undef, i1 false)
 335 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false)
 336 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false)
 337 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false)
 338 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false)
 339 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false)
 340 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false)
 341 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false)
 342 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> undef, i1 false)
 343 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> undef, i1 false)
 344 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> undef, i1 false)
 345 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false)
 346 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false)
 347 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false)
 348 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false)
 349 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false)
 350 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = call <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false)
 351 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %21 = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false)
 352 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> undef, i1 false)
 353 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> undef, i1 false)
 354 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false)
 355 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false)
 356 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false)
 357 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false)
 358 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false)
 359 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = call <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false)
 360 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %30 = call <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false)
 361 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %31 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> undef, i1 false)
 362 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %32 = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false)
 363 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %33 = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false)
 364 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %34 = call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false)
 365 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %35 = call <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false)
 366 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %36 = call <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false)
 367 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %37 = call <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false)
 368 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %38 = call <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false)
 369 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %39 = call <vscale x 16 x i64> @llvm.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false)
 370 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 371 ;
 372   call <2 x i8> @llvm.cttz.v2i8(<2 x i8> undef, i1 false)
 373   call <4 x i8> @llvm.cttz.v4i8(<4 x i8> undef, i1 false)
 374   call <8 x i8> @llvm.cttz.v8i8(<8 x i8> undef, i1 false)
 375   call <16 x i8> @llvm.cttz.v16i8(<16 x i8> undef, i1 false)
 376   call <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false)
 377   call <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false)
 378   call <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false)
 379   call <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false)
 380   call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false)
 381   call <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false)
 382   call <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false)
 383   call <2 x i16> @llvm.cttz.v2i16(<2 x i16> undef, i1 false)
 384   call <4 x i16> @llvm.cttz.v4i16(<4 x i16> undef, i1 false)
 385   call <8 x i16> @llvm.cttz.v8i16(<8 x i16> undef, i1 false)
 386   call <16 x i16> @llvm.cttz.v16i16(<16 x i16> undef, i1 false)
 387   call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false)
 388   call <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false)
 389   call <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false)
 390   call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false)
 391   call <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false)
 392   call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false)
 393   call <2 x i32> @llvm.cttz.v2i32(<2 x i32> undef, i1 false)
 394   call <4 x i32> @llvm.cttz.v4i32(<4 x i32> undef, i1 false)
 395   call <8 x i32> @llvm.cttz.v8i32(<8 x i32> undef, i1 false)
 396   call <16 x i32> @llvm.cttz.v16i32(<16 x i32> undef, i1 false)
 397   call <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false)
 398   call <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false)
 399   call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false)
 400   call <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false)
 401   call <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false)
 402   call <2 x i64> @llvm.cttz.v2i64(<2 x i64> undef, i1 false)
 403   call <4 x i64> @llvm.cttz.v4i64(<4 x i64> undef, i1 false)
 404   call <8 x i64> @llvm.cttz.v8i64(<8 x i64> undef, i1 false)
 405   call <16 x i64> @llvm.cttz.v16i64(<16 x i64> undef, i1 false)
 406   call <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false)
 407   call <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false)
 408   call <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false)
 409   call <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false)
 410   call <vscale x 16 x i64> @llvm.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false)
 411   ret void
 412 }
 413
 414 define void @ctpop() {
 415 ; NOZVBB-LABEL: 'ctpop'
 416 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
 417 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
 418 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
 419 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
 420 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %5 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
 421 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %6 = call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> undef)
 422 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %7 = call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> undef)
 423 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
 424 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
 425 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
 426 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
 427 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
 428 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
 429 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
 430 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %15 = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
 431 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %16 = call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> undef)
 432 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %17 = call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> undef)
 433 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
 434 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
 435 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
 436 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
 437 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
 438 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
 439 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
 440 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %25 = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
 441 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %26 = call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> undef)
 442 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %27 = call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> undef)
 443 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
 444 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
 445 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
 446 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
 447 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
 448 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
 449 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
 450 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %35 = call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
 451 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %36 = call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> undef)
 452 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %37 = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> undef)
 453 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %38 = call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> undef)
 454 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %39 = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> undef)
 455 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %40 = call <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64> undef)
 456 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 457 ;
 458 ; ZVBB-LABEL: 'ctpop'
 459 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
 460 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
 461 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
 462 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
 463 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
 464 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> undef)
 465 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> undef)
 466 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
 467 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
 468 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
 469 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
 470 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
 471 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
 472 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
 473 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
 474 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> undef)
 475 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> undef)
 476 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
 477 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
 478 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
 479 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
 480 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
 481 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
 482 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
 483 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %25 = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
 484 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> undef)
 485 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> undef)
 486 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
 487 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
 488 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
 489 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
 490 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
 491 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
 492 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
 493 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %35 = call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
 494 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %36 = call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> undef)
 495 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %37 = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> undef)
 496 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %38 = call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> undef)
 497 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %39 = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> undef)
 498 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %40 = call <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64> undef)
 499 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 500 ;
 501   call i8 @llvm.ctpop.i8(i8 undef)
 502   call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
 503   call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
 504   call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
 505   call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
 506   call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> undef)
 507   call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> undef)
 508   call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
 509   call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
 510   call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
 511   call i16 @llvm.ctpop.i16(i16 undef)
 512   call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
 513   call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
 514   call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
 515   call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
 516   call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> undef)
 517   call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> undef)
 518   call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
 519   call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
 520   call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
 521   call i32 @llvm.ctpop.i32(i32 undef)
 522   call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
 523   call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
 524   call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
 525   call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
 526   call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> undef)
 527   call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> undef)
 528   call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
 529   call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
 530   call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
 531   call i64 @llvm.ctpop.i64(i64 undef)
 532   call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
 533   call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
 534   call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
 535   call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
 536   call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> undef)
 537   call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> undef)
 538   call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> undef)
 539   call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> undef)
 540   call <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64> undef)
 541   ret void
 542 }
 543
 544 define void @vp_bswap() {
 545 ; CHECK-LABEL: 'vp_bswap'
 546 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
 547 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
 548 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
 549 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
 550 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 551 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 552 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 553 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 554 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 555 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %10 = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
 556 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %11 = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
 557 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %12 = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
 558 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %13 = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
 559 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
 560 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 561 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %16 = call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 562 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %17 = call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 563 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %18 = call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 564 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %19 = call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
 565 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %20 = call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
 566 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %21 = call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
 567 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %22 = call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
 568 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %23 = call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 569 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %24 = call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 570 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %25 = call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 571 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %26 = call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 572 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %27 = call <vscale x 16 x i64> @llvm.vp.bswap.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 573 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 574 ;
 575   call <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
 576   call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
 577   call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
 578   call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
 579   call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 580   call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 581   call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 582   call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 583   call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 584   call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
 585   call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
 586   call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
 587   call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
 588   call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
 589   call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 590   call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 591   call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 592   call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 593   call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
 594   call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
 595   call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
 596   call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
 597   call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 598   call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 599   call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 600   call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 601   call <vscale x 16 x i64> @llvm.vp.bswap.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 602   ret void
 603 }
 604
 605 define void @vp_ctpop() {
 606 ; CHECK-LABEL: 'vp_ctpop'
 607 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %1 = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
 608 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %2 = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
 609 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %3 = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
 610 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %4 = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
 611 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %5 = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 612 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %6 = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 613 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %7 = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 614 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %8 = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 615 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %9 = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 616 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %10 = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
 617 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %11 = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
 618 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %12 = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
 619 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %13 = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
 620 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %14 = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 621 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %15 = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 622 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %16 = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 623 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %17 = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 624 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %18 = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 625 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %19 = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
 626 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %20 = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
 627 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %21 = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
 628 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %22 = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
 629 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %23 = call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
 630 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %24 = call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 631 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %25 = call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 632 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %26 = call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 633 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %27 = call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 634 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %28 = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
 635 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %29 = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
 636 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %30 = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
 637 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %31 = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
 638 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %32 = call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 639 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %33 = call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 640 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %34 = call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 641 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %35 = call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 642 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %36 = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 643 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 644 ;
 645   call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
 646   call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
 647   call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
 648   call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
 649   call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 650   call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 651   call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 652   call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 653   call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 654   call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
 655   call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
 656   call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
 657   call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
 658   call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 659   call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 660   call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 661   call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 662   call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 663   call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
 664   call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
 665   call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
 666   call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
 667   call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
 668   call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 669   call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 670   call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 671   call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 672   call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
 673   call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
 674   call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
 675   call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
 676   call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 677   call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 678   call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 679   call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 680   call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 681   ret void
 682 }
 683
 684 define void @vp_ctlz() {
 685 ; CHECK-LABEL: 'vp_ctlz'
 686 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %1 = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
 687 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %2 = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
 688 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %3 = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
 689 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %4 = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
 690 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 691 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 692 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 693 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 694 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 695 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 696 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
 697 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %12 = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 698 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %13 = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 699 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %14 = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 700 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %15 = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 701 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 702 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 703 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 704 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 705 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 706 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 707 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %22 = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 708 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %23 = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 709 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %24 = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 710 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %25 = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 711 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %26 = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 712 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %27 = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 713 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %28 = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 714 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %29 = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 715 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %30 = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 716 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %31 = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 717 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %32 = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
 718 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %33 = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
 719 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %34 = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
 720 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %35 = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
 721 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %36 = call <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 722 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %37 = call <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 723 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %38 = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 724 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %39 = call <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 725 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %40 = call <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 726 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %41 = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
 727 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %42 = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
 728 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %43 = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
 729 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %44 = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
 730 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %45 = call <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 731 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %46 = call <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 732 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %47 = call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 733 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %48 = call <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 734 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %49 = call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 735 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 736 ;
 737   call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
 738   call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
 739   call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
 740   call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
 741   call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 742   call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 743   call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 744   call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 745   call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 746   call <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 747   call <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
 748   call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 749   call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 750   call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 751   call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 752   call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 753   call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 754   call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 755   call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 756   call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 757   call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 758   call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 759   call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 760   call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 761   call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 762   call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 763   call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 764   call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 765   call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 766   call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 767   call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 768   call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
 769   call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
 770   call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
 771   call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
 772   call <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 773   call <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 774   call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 775   call <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 776   call <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 777   call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
 778   call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
 779   call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
 780   call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
 781   call <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 782   call <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 783   call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 784   call <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 785   call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 786   ret void
 787 }
 788
 789 define void @vp_cttz() {
 790 ; CHECK-LABEL: 'vp_cttz'
 791 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %1 = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
 792 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %2 = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
 793 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
 794 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
 795 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 796 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 797 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 798 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 799 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 800 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 801 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
 802 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 803 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %13 = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 804 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 805 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %15 = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 806 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 807 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 808 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 809 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 810 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 811 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 812 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %22 = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 813 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %23 = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 814 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %24 = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 815 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %25 = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 816 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %26 = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 817 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %27 = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 818 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %28 = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 819 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %29 = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 820 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %30 = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 821 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %31 = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 822 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %32 = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
 823 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %33 = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
 824 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %34 = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
 825 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %35 = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
 826 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %36 = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 827 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %37 = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 828 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %38 = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 829 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %39 = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 830 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %40 = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 831 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %41 = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
 832 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %42 = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
 833 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %43 = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
 834 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %44 = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
 835 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %45 = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 836 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %46 = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 837 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %47 = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 838 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %48 = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 839 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %49 = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 840 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 841 ;
 842   call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
 843   call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
 844   call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
 845   call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
 846   call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 847   call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 848   call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 849   call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 850   call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 851   call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 852   call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
 853   call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 854   call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 855   call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 856   call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 857   call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 858   call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 859   call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 860   call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 861   call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 862   call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 863   call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 864   call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 865   call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 866   call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 867   call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 868   call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 869   call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 870   call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 871   call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 872   call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 873   call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
 874   call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
 875   call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
 876   call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
 877   call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 878   call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 879   call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 880   call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 881   call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 882   call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
 883   call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
 884   call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
 885   call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
 886   call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 887   call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 888   call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 889   call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 890   call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 891   ret void
 892 }
 893
 894 declare i16 @llvm.bswap.i16(i16)
 895 declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>)
 896 declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
 897 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
 898 declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
 899 declare <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16>)
 900 declare <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16>)
 901 declare <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16>)
 902 declare <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16>)
 903 declare <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16>)
 904 declare i32 @llvm.bswap.i32(i32)
 905 declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
 906 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
 907 declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
 908 declare <16 x i32> @llvm.bswap.v16i32(<16 x i32>)
 909 declare <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32>)
 910 declare <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32>)
 911 declare <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32>)
 912 declare <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32>)
 913 declare <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32>)
 914 declare i64 @llvm.bswap.i64(i64)
 915 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
 916 declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
 917 declare <8 x i64> @llvm.bswap.v8i64(<8 x i64>)
 918 declare <16 x i64> @llvm.bswap.v16i64(<16 x i64>)
 919 declare <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64>)
 920 declare <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64>)
 921 declare <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64>)
 922 declare <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64>)
 923 declare <vscale x 16 x i64> @llvm.bswap.nxv16i64(<vscale x 16 x i64>)
 924
 925 declare i8 @llvm.bitreverse.i8(i8)
 926 declare <2 x i8> @llvm.bitreverse.v2i8(<2 x i8>)
 927 declare <4 x i8> @llvm.bitreverse.v4i8(<4 x i8>)
 928 declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>)
 929 declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>)
 930 declare <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8>)
 931 declare <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8>)
 932 declare <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8>)
 933 declare <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8>)
 934 declare <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8>)
 935 declare i16 @llvm.bitreverse.i16(i16)
 936 declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>)
 937 declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>)
 938 declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
 939 declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
 940 declare <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16>)
 941 declare <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16>)
 942 declare <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16>)
 943 declare <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16>)
 944 declare <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16>)
 945 declare i32 @llvm.bitreverse.i32(i32)
 946 declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>)
 947 declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
 948 declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
 949 declare <16 x i32> @llvm.bitreverse.v16i32(<16 x i32>)
 950 declare <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32>)
 951 declare <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32>)
 952 declare <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32>)
 953 declare <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32>)
 954 declare <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32>)
 955 declare i64 @llvm.bitreverse.i64(i64)
 956 declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
 957 declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>)
 958 declare <8 x i64> @llvm.bitreverse.v8i64(<8 x i64>)
 959 declare <16 x i64> @llvm.bitreverse.v16i64(<16 x i64>)
 960 declare <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64>)
 961 declare <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64>)
 962 declare <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64>)
 963 declare <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64>)
 964 declare <vscale x 16 x i64> @llvm.bitreverse.nxv16i64(<vscale x 16 x i64>)
 965
 966 declare i8 @llvm.ctpop.i8(i8)
 967 declare <2 x i8> @llvm.ctpop.v2i8(<2 x i8>)
 968 declare <4 x i8> @llvm.ctpop.v4i8(<4 x i8>)
 969 declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>)
 970 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
 971 declare <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8>)
 972 declare <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8>)
 973 declare <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8>)
 974 declare <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8>)
 975 declare <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8>)
 976 declare i16 @llvm.ctpop.i16(i16)
 977 declare <2 x i16> @llvm.ctpop.v2i16(<2 x i16>)
 978 declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>)
 979 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
 980 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
 981 declare <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16>)
 982 declare <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16>)
 983 declare <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16>)
 984 declare <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16>)
 985 declare <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16>)
 986 declare i32 @llvm.ctpop.i32(i32)
 987 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
 988 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
 989 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
 990 declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
 991 declare <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32>)
 992 declare <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32>)
 993 declare <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32>)
 994 declare <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32>)
 995 declare <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32>)
 996 declare i64 @llvm.ctpop.i64(i64)
 997 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
 998 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
 999 declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>)
1000 declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>)
1001 declare <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64>)
1002 declare <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64>)
1003 declare <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64>)
1004 declare <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64>)
1005 declare <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64>)
1006
1007 declare <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16>, <2 x i1>, i32)
1008 declare <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16>, <4 x i1>, i32)
1009 declare <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16>, <8 x i1>, i32)
1010 declare <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16>, <16 x i1>, i32)
1011 declare <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
1012 declare <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
1013 declare <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
1014 declare <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
1015 declare <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
1016 declare <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32>, <2 x i1>, i32)
1017 declare <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32>, <4 x i1>, i32)
1018 declare <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32>, <8 x i1>, i32)
1019 declare <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32>, <16 x i1>, i32)
1020 declare <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1021 declare <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1022 declare <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1023 declare <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1024 declare <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1025 declare <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64>, <2 x i1>, i32)
1026 declare <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64>, <4 x i1>, i32)
1027 declare <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64>, <8 x i1>, i32)
1028 declare <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64>, <16 x i1>, i32)
1029 declare <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1030 declare <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1031 declare <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1032 declare <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
1033 declare <vscale x 16 x i64> @llvm.vp.bswap.nxv16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
1034
1035 declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1)
1036 declare <4 x i8> @llvm.ctlz.v4i8(<4 x i8>, i1)
1037 declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1)
1038 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
1039 declare <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8>, i1)
1040 declare <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8>, i1)
1041 declare <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8>, i1)
1042 declare <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8>, i1)
1043 declare <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8>, i1)
1044 declare <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8>, i1)
1045 declare <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8>, i1)
1046 declare <2 x i16> @llvm.ctlz.v2i16(<2 x i16>, i1)
1047 declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1)
1048 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
1049 declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
1050 declare <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16>, i1)
1051 declare <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16>, i1)
1052 declare <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16>, i1)
1053 declare <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16>, i1)
1054 declare <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16>, i1)
1055 declare <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16>, i1)
1056 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
1057 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
1058 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
1059 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
1060 declare <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32>, i1)
1061 declare <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32>, i1)
1062 declare <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32>, i1)
1063 declare <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32>, i1)
1064 declare <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32>, i1)
1065 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
1066 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
1067 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1)
1068 declare <16 x i64> @llvm.ctlz.v16i64(<16 x i64>, i1)
1069 declare <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64>, i1)
1070 declare <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64>, i1)
1071 declare <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64>, i1)
1072 declare <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64>, i1)
1073 declare <vscale x 16 x i64> @llvm.ctlz.nxv16i64(<vscale x 16 x i64>, i1)
1074
1075 declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1)
1076 declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>, i1)
1077 declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>, i1)
1078 declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
1079 declare <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8>, i1)
1080 declare <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8>, i1)
1081 declare <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8>, i1)
1082 declare <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8>, i1)
1083 declare <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8>, i1)
1084 declare <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8>, i1)
1085 declare <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8>, i1)
1086 declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>, i1)
1087 declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>, i1)
1088 declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
1089 declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
1090 declare <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16>, i1)
1091 declare <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16>, i1)
1092 declare <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16>, i1)
1093 declare <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16>, i1)
1094 declare <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16>, i1)
1095 declare <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16>, i1)
1096 declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
1097 declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
1098 declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
1099 declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
1100 declare <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32>, i1)
1101 declare <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32>, i1)
1102 declare <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32>, i1)
1103 declare <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32>, i1)
1104 declare <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32>, i1)
1105 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
1106 declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
1107 declare <8 x i64> @llvm.cttz.v8i64(<8 x i64>, i1)
1108 declare <16 x i64> @llvm.cttz.v16i64(<16 x i64>, i1)
1109 declare <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64>, i1)
1110 declare <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64>, i1)
1111 declare <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64>, i1)
1112 declare <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64>, i1)
1113 declare <vscale x 16 x i64> @llvm.cttz.nxv16i64(<vscale x 16 x i64>, i1)
1114
1115 declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32)
1116 declare <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8>, <4 x i1>, i32)
1117 declare <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8>, <8 x i1>, i32)
1118 declare <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8>, <16 x i1>, i32)
1119 declare <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
1120 declare <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
1121 declare <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
1122 declare <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32)
1123 declare <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32)
1124 declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32)
1125 declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32)
1126 declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32)
1127 declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32)
1128 declare <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
1129 declare <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
1130 declare <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
1131 declare <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
1132 declare <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
1133 declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32)
1134 declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32)
1135 declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32)
1136 declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32)
1137 declare <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1138 declare <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1139 declare <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1140 declare <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1141 declare <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1142 declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32)
1143 declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32)
1144 declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32)
1145 declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32)
1146 declare <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1147 declare <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1148 declare <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1149 declare <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
1150 declare <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
1151
1152 declare <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
1153 declare <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
1154 declare <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
1155 declare <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
1156 declare <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
1157 declare <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
1158 declare <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
1159 declare <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
1160 declare <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
1161 declare <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
1162 declare <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
1163 declare <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
1164 declare <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
1165 declare <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
1166 declare <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
1167 declare <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
1168 declare <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
1169 declare <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
1170 declare <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
1171 declare <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
1172 declare <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
1173 declare <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
1174 declare <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
1175 declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
1176 declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
1177 declare <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
1178 declare <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
1179 declare <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
1180 declare <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
1181 declare <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
1182 declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
1183 declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
1184 declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
1185 declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
1186 declare <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
1187 declare <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
1188 declare <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
1189 declare <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
1190 declare <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
1191
1192 declare <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
1193 declare <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
1194 declare <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
1195 declare <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
1196 declare <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
1197 declare <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
1198 declare <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
1199 declare <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
1200 declare <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
1201 declare <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
1202 declare <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
1203 declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
1204 declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
1205 declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
1206 declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
1207 declare <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
1208 declare <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
1209 declare <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
1210 declare <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
1211 declare <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
1212 declare <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
1213 declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
1214 declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
1215 declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
1216 declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
1217 declare <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
1218 declare <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
1219 declare <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
1220 declare <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
1221 declare <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
1222 declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
1223 declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
1224 declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
1225 declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
1226 declare <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
1227 declare <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
1228 declare <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
1229 declare <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
1230 declare <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)