llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d -riscv-v-vector-bits-min=-1 | FileCheck %s --check-prefixes=CHECK,NOZVBB
   3 ; Vector ctpop exists only under ZVBB
   4 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+experimental-zvbb -riscv-v-vector-bits-min=-1 | FileCheck %s --check-prefixes=CHECK,ZVBB
   5
   6 define void @bswap() {
   7 ; CHECK-LABEL: 'bswap'
   8 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %1 = call i16 @llvm.bswap.i16(i16 undef)
   9 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> undef)
  10 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)
  11 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> undef)
  12 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef)
  13 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16> undef)
  14 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16> undef)
  15 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16> undef)
  16 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16> undef)
  17 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %10 = call <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16> undef)
  18 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %11 = call i32 @llvm.bswap.i32(i32 undef)
  19 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %12 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> undef)
  20 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %13 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> undef)
  21 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> undef)
  22 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> undef)
  23 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %16 = call <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32> undef)
  24 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %17 = call <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32> undef)
  25 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %18 = call <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32> undef)
  26 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %19 = call <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32> undef)
  27 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %20 = call <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32> undef)
  28 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %21 = call i64 @llvm.bswap.i64(i64 undef)
  29 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %22 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
  30 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %23 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
  31 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %24 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> undef)
  32 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %25 = call <16 x i64> @llvm.bswap.v16i64(<16 x i64> undef)
  33 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %26 = call <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64> undef)
  34 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %27 = call <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64> undef)
  35 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %28 = call <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64> undef)
  36 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %29 = call <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64> undef)
  37 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %30 = call <vscale x 16 x i64> @llvm.bswap.nxv16i64(<vscale x 16 x i64> undef)
  38 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
  39 ;
  40   call i16 @llvm.bswap.i16(i16 undef)
  41   call <2 x i16> @llvm.bswap.v2i16(<2 x i16> undef)
  42   call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)
  43   call <8 x i16> @llvm.bswap.v8i16(<8 x i16> undef)
  44   call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef)
  45   call <vscale x 1 x i16> @llvm.bswap.nvx1i16(<vscale x 1 x i16> undef)
  46   call <vscale x 2 x i16> @llvm.bswap.nvx2i16(<vscale x 2 x i16> undef)
  47   call <vscale x 4 x i16> @llvm.bswap.nvx4i16(<vscale x 4 x i16> undef)
  48   call <vscale x 8 x i16> @llvm.bswap.nvx8i16(<vscale x 8 x i16> undef)
  49   call <vscale x 16 x i16> @llvm.bswap.nvx16i16(<vscale x 16 x i16> undef)
  50   call i32 @llvm.bswap.i32(i32 undef)
  51   call <2 x i32> @llvm.bswap.v2i32(<2 x i32> undef)
  52   call <4 x i32> @llvm.bswap.v4i32(<4 x i32> undef)
  53   call <8 x i32> @llvm.bswap.v8i32(<8 x i32> undef)
  54   call <16 x i32> @llvm.bswap.v16i32(<16 x i32> undef)
  55   call <vscale x 1 x i32> @llvm.bswap.nvx1i32(<vscale x 1 x i32> undef)
  56   call <vscale x 2 x i32> @llvm.bswap.nvx2i32(<vscale x 2 x i32> undef)
  57   call <vscale x 4 x i32> @llvm.bswap.nvx4i32(<vscale x 4 x i32> undef)
  58   call <vscale x 8 x i32> @llvm.bswap.nvx8i32(<vscale x 8 x i32> undef)
  59   call <vscale x 16 x i32> @llvm.bswap.nvx16i32(<vscale x 16 x i32> undef)
  60   call i64 @llvm.bswap.i64(i64 undef)
  61   call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
  62   call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
  63   call <8 x i64> @llvm.bswap.v8i64(<8 x i64> undef)
  64   call <16 x i64> @llvm.bswap.v16i64(<16 x i64> undef)
  65   call <vscale x 1 x i64> @llvm.bswap.nvx1i64(<vscale x 1 x i64> undef)
  66   call <vscale x 2 x i64> @llvm.bswap.nvx2i64(<vscale x 2 x i64> undef)
  67   call <vscale x 4 x i64> @llvm.bswap.nvx4i64(<vscale x 4 x i64> undef)
  68   call <vscale x 8 x i64> @llvm.bswap.nvx8i64(<vscale x 8 x i64> undef)
  69   call <vscale x 16 x i64> @llvm.bswap.nvx16i64(<vscale x 16 x i64> undef)
  70   ret void
  71 }
  72
  73 define void @bitreverse() {
  74 ; CHECK-LABEL: 'bitreverse'
  75 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %1 = call i8 @llvm.bitreverse.i8(i8 undef)
  76 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %2 = call <2 x i8> @llvm.bitreverse.v2i8(<2 x i8> undef)
  77 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %3 = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> undef)
  78 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %4 = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> undef)
  79 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %5 = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> undef)
  80 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %6 = call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> undef)
  81 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %7 = call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> undef)
  82 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %8 = call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> undef)
  83 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %9 = call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> undef)
  84 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %10 = call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> undef)
  85 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %11 = call i16 @llvm.bitreverse.i16(i16 undef)
  86 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %12 = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
  87 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %13 = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> undef)
  88 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %14 = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> undef)
  89 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %15 = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> undef)
  90 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %16 = call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> undef)
  91 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %17 = call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> undef)
  92 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %18 = call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> undef)
  93 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %19 = call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> undef)
  94 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %20 = call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> undef)
  95 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %21 = call i32 @llvm.bitreverse.i32(i32 undef)
  96 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %22 = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> undef)
  97 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %23 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> undef)
  98 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %24 = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> undef)
  99 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %25 = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> undef)
 100 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %26 = call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> undef)
 101 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %27 = call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> undef)
 102 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %28 = call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> undef)
 103 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %29 = call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> undef)
 104 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %30 = call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> undef)
 105 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %31 = call i64 @llvm.bitreverse.i64(i64 undef)
 106 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %32 = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> undef)
 107 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %33 = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> undef)
 108 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %34 = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> undef)
 109 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %35 = call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> undef)
 110 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %36 = call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> undef)
 111 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %37 = call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> undef)
 112 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %38 = call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> undef)
 113 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %39 = call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> undef)
 114 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %40 = call <vscale x 16 x i64> @llvm.bitreverse.nxv16i64(<vscale x 16 x i64> undef)
 115 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 116 ;
 117   call i8 @llvm.bitreverse.i8(i8 undef)
 118   call <2 x i8> @llvm.bitreverse.v2i8(<2 x i8> undef)
 119   call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> undef)
 120   call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> undef)
 121   call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> undef)
 122   call <vscale x 1 x i8> @llvm.bitreverse.nvx1i8(<vscale x 1 x i8> undef)
 123   call <vscale x 2 x i8> @llvm.bitreverse.nvx2i8(<vscale x 2 x i8> undef)
 124   call <vscale x 4 x i8> @llvm.bitreverse.nvx4i8(<vscale x 4 x i8> undef)
 125   call <vscale x 8 x i8> @llvm.bitreverse.nvx8i8(<vscale x 8 x i8> undef)
 126   call <vscale x 16 x i8> @llvm.bitreverse.nvx16i8(<vscale x 16 x i8> undef)
 127   call i16 @llvm.bitreverse.i16(i16 undef)
 128   call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
 129   call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> undef)
 130   call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> undef)
 131   call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> undef)
 132   call <vscale x 1 x i16> @llvm.bitreverse.nvx1i16(<vscale x 1 x i16> undef)
 133   call <vscale x 2 x i16> @llvm.bitreverse.nvx2i16(<vscale x 2 x i16> undef)
 134   call <vscale x 4 x i16> @llvm.bitreverse.nvx4i16(<vscale x 4 x i16> undef)
 135   call <vscale x 8 x i16> @llvm.bitreverse.nvx8i16(<vscale x 8 x i16> undef)
 136   call <vscale x 16 x i16> @llvm.bitreverse.nvx16i16(<vscale x 16 x i16> undef)
 137   call i32 @llvm.bitreverse.i32(i32 undef)
 138   call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> undef)
 139   call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> undef)
 140   call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> undef)
 141   call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> undef)
 142   call <vscale x 1 x i32> @llvm.bitreverse.nvx1i32(<vscale x 1 x i32> undef)
 143   call <vscale x 2 x i32> @llvm.bitreverse.nvx2i32(<vscale x 2 x i32> undef)
 144   call <vscale x 4 x i32> @llvm.bitreverse.nvx4i32(<vscale x 4 x i32> undef)
 145   call <vscale x 8 x i32> @llvm.bitreverse.nvx8i32(<vscale x 8 x i32> undef)
 146   call <vscale x 16 x i32> @llvm.bitreverse.nvx16i32(<vscale x 16 x i32> undef)
 147   call i64 @llvm.bitreverse.i64(i64 undef)
 148   call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> undef)
 149   call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> undef)
 150   call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> undef)
 151   call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> undef)
 152   call <vscale x 1 x i64> @llvm.bitreverse.nvx1i64(<vscale x 1 x i64> undef)
 153   call <vscale x 2 x i64> @llvm.bitreverse.nvx2i64(<vscale x 2 x i64> undef)
 154   call <vscale x 4 x i64> @llvm.bitreverse.nvx4i64(<vscale x 4 x i64> undef)
 155   call <vscale x 8 x i64> @llvm.bitreverse.nvx8i64(<vscale x 8 x i64> undef)
 156   call <vscale x 16 x i64> @llvm.bitreverse.nvx16i64(<vscale x 16 x i64> undef)
 157   ret void
 158 }
 159
 160 define void @ctpop() {
 161 ; NOZVBB-LABEL: 'ctpop'
 162 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
 163 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
 164 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
 165 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
 166 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %5 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
 167 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %6 = call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> undef)
 168 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %7 = call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> undef)
 169 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
 170 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
 171 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
 172 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
 173 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
 174 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
 175 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
 176 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %15 = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
 177 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %16 = call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> undef)
 178 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %17 = call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> undef)
 179 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
 180 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
 181 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
 182 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
 183 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
 184 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
 185 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
 186 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %25 = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
 187 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %26 = call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> undef)
 188 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %27 = call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> undef)
 189 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
 190 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
 191 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
 192 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
 193 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
 194 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
 195 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
 196 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %35 = call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
 197 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %36 = call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> undef)
 198 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %37 = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> undef)
 199 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %38 = call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> undef)
 200 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %39 = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> undef)
 201 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %40 = call <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64> undef)
 202 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 203 ;
 204 ; ZVBB-LABEL: 'ctpop'
 205 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
 206 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
 207 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
 208 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
 209 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
 210 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> undef)
 211 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> undef)
 212 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
 213 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
 214 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
 215 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
 216 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
 217 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
 218 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
 219 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
 220 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> undef)
 221 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> undef)
 222 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
 223 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
 224 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
 225 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
 226 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
 227 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
 228 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
 229 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
 230 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> undef)
 231 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> undef)
 232 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
 233 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
 234 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
 235 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
 236 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
 237 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
 238 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
 239 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %35 = call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
 240 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %36 = call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> undef)
 241 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %37 = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> undef)
 242 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %38 = call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> undef)
 243 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %39 = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> undef)
 244 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %40 = call <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64> undef)
 245 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 246 ;
 247   call i8 @llvm.ctpop.i8(i8 undef)
 248   call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
 249   call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
 250   call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
 251   call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
 252   call <vscale x 1 x i8> @llvm.ctpop.nvx1i8(<vscale x 1 x i8> undef)
 253   call <vscale x 2 x i8> @llvm.ctpop.nvx2i8(<vscale x 2 x i8> undef)
 254   call <vscale x 4 x i8> @llvm.ctpop.nvx4i8(<vscale x 4 x i8> undef)
 255   call <vscale x 8 x i8> @llvm.ctpop.nvx8i8(<vscale x 8 x i8> undef)
 256   call <vscale x 16 x i8> @llvm.ctpop.nvx16i8(<vscale x 16 x i8> undef)
 257   call i16 @llvm.ctpop.i16(i16 undef)
 258   call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
 259   call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
 260   call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
 261   call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
 262   call <vscale x 1 x i16> @llvm.ctpop.nvx1i16(<vscale x 1 x i16> undef)
 263   call <vscale x 2 x i16> @llvm.ctpop.nvx2i16(<vscale x 2 x i16> undef)
 264   call <vscale x 4 x i16> @llvm.ctpop.nvx4i16(<vscale x 4 x i16> undef)
 265   call <vscale x 8 x i16> @llvm.ctpop.nvx8i16(<vscale x 8 x i16> undef)
 266   call <vscale x 16 x i16> @llvm.ctpop.nvx16i16(<vscale x 16 x i16> undef)
 267   call i32 @llvm.ctpop.i32(i32 undef)
 268   call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
 269   call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
 270   call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
 271   call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
 272   call <vscale x 1 x i32> @llvm.ctpop.nvx1i32(<vscale x 1 x i32> undef)
 273   call <vscale x 2 x i32> @llvm.ctpop.nvx2i32(<vscale x 2 x i32> undef)
 274   call <vscale x 4 x i32> @llvm.ctpop.nvx4i32(<vscale x 4 x i32> undef)
 275   call <vscale x 8 x i32> @llvm.ctpop.nvx8i32(<vscale x 8 x i32> undef)
 276   call <vscale x 16 x i32> @llvm.ctpop.nvx16i32(<vscale x 16 x i32> undef)
 277   call i64 @llvm.ctpop.i64(i64 undef)
 278   call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
 279   call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
 280   call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
 281   call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
 282   call <vscale x 1 x i64> @llvm.ctpop.nvx1i64(<vscale x 1 x i64> undef)
 283   call <vscale x 2 x i64> @llvm.ctpop.nvx2i64(<vscale x 2 x i64> undef)
 284   call <vscale x 4 x i64> @llvm.ctpop.nvx4i64(<vscale x 4 x i64> undef)
 285   call <vscale x 8 x i64> @llvm.ctpop.nvx8i64(<vscale x 8 x i64> undef)
 286   call <vscale x 16 x i64> @llvm.ctpop.nvx16i64(<vscale x 16 x i64> undef)
 287   ret void
 288 }
 289
 290 define void @vp_bswap() {
 291 ; CHECK-LABEL: 'vp_bswap'
 292 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = call <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
 293 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
 294 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
 295 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
 296 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %5 = call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 297 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 298 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 299 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %8 = call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 300 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %9 = call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 301 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %10 = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
 302 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %11 = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
 303 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %12 = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
 304 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %13 = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
 305 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %14 = call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
 306 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %15 = call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 307 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %16 = call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 308 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %17 = call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 309 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %18 = call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 310 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %19 = call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
 311 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %20 = call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
 312 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %21 = call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
 313 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %22 = call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
 314 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %23 = call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 315 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %24 = call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 316 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %25 = call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 317 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %26 = call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 318 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %27 = call <vscale x 16 x i64> @llvm.vp.bswap.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 319 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 320 ;
 321   call <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
 322   call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
 323   call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
 324   call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
 325   call <vscale x 1 x i16> @llvm.vp.bswap.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 326   call <vscale x 2 x i16> @llvm.vp.bswap.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 327   call <vscale x 4 x i16> @llvm.vp.bswap.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 328   call <vscale x 8 x i16> @llvm.vp.bswap.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 329   call <vscale x 16 x i16> @llvm.vp.bswap.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 330   call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
 331   call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
 332   call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
 333   call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
 334   call <vscale x 1 x i32> @llvm.vp.bswap.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
 335   call <vscale x 2 x i32> @llvm.vp.bswap.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 336   call <vscale x 4 x i32> @llvm.vp.bswap.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 337   call <vscale x 8 x i32> @llvm.vp.bswap.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 338   call <vscale x 16 x i32> @llvm.vp.bswap.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 339   call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
 340   call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
 341   call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
 342   call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
 343   call <vscale x 1 x i64> @llvm.vp.bswap.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 344   call <vscale x 2 x i64> @llvm.vp.bswap.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 345   call <vscale x 4 x i64> @llvm.vp.bswap.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 346   call <vscale x 8 x i64> @llvm.vp.bswap.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 347   call <vscale x 16 x i64> @llvm.vp.bswap.nvx16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 348   ret void
 349 }
 350
 351 define void @vp_ctpop() {
 352 ; CHECK-LABEL: 'vp_ctpop'
 353 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %1 = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
 354 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %2 = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
 355 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %3 = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
 356 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %4 = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
 357 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %5 = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 358 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %6 = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 359 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %7 = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 360 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %8 = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 361 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %9 = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 362 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %10 = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
 363 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %11 = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
 364 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %12 = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
 365 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %13 = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
 366 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %14 = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 367 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %15 = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 368 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %16 = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 369 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %17 = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 370 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %18 = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 371 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %19 = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
 372 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %20 = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
 373 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %21 = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
 374 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %22 = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
 375 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %23 = call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
 376 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %24 = call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 377 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %25 = call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 378 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %26 = call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 379 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %27 = call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 380 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %28 = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
 381 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %29 = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
 382 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %30 = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
 383 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %31 = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
 384 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %32 = call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 385 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %33 = call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 386 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %34 = call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 387 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %35 = call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 388 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %36 = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 389 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 390 ;
 391   call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
 392   call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
 393   call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
 394   call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
 395   call <vscale x 1 x i16> @llvm.vp.ctpop.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 396   call <vscale x 2 x i16> @llvm.vp.ctpop.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 397   call <vscale x 4 x i16> @llvm.vp.ctpop.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 398   call <vscale x 8 x i16> @llvm.vp.ctpop.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 399   call <vscale x 16 x i16> @llvm.vp.ctpop.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 400   call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
 401   call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
 402   call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
 403   call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
 404   call <vscale x 1 x i16> @llvm.vp.ctpop.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
 405   call <vscale x 2 x i16> @llvm.vp.ctpop.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
 406   call <vscale x 4 x i16> @llvm.vp.ctpop.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
 407   call <vscale x 8 x i16> @llvm.vp.ctpop.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
 408   call <vscale x 16 x i16> @llvm.vp.ctpop.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
 409   call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
 410   call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
 411   call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
 412   call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
 413   call <vscale x 1 x i32> @llvm.vp.ctpop.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
 414   call <vscale x 2 x i32> @llvm.vp.ctpop.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
 415   call <vscale x 4 x i32> @llvm.vp.ctpop.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
 416   call <vscale x 8 x i32> @llvm.vp.ctpop.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
 417   call <vscale x 16 x i32> @llvm.vp.ctpop.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
 418   call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
 419   call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
 420   call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
 421   call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
 422   call <vscale x 1 x i64> @llvm.vp.ctpop.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
 423   call <vscale x 2 x i64> @llvm.vp.ctpop.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
 424   call <vscale x 4 x i64> @llvm.vp.ctpop.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
 425   call <vscale x 8 x i64> @llvm.vp.ctpop.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
 426   call <vscale x 16 x i64> @llvm.vp.ctpop.nvx16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
 427   ret void
 428 }
 429
 430 define void @vp_ctlz() {
 431 ; CHECK-LABEL: 'vp_ctlz'
 432 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %1 = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
 433 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %2 = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
 434 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %3 = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
 435 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %4 = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
 436 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 437 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 438 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 439 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 440 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 441 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 442 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
 443 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %12 = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 444 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %13 = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 445 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %14 = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 446 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %15 = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 447 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 448 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 449 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 450 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 451 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 452 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 453 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %22 = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 454 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %23 = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 455 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %24 = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 456 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %25 = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 457 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %26 = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 458 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %27 = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 459 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %28 = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 460 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %29 = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 461 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %30 = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 462 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %31 = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 463 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %32 = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
 464 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %33 = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
 465 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %34 = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
 466 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %35 = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
 467 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %36 = call <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 468 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %37 = call <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 469 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %38 = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 470 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %39 = call <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 471 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %40 = call <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 472 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %41 = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
 473 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %42 = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
 474 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %43 = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
 475 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %44 = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
 476 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %45 = call <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 477 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %46 = call <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 478 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %47 = call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 479 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %48 = call <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 480 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %49 = call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 481 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 482 ;
 483   call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
 484   call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
 485   call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
 486   call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
 487   call <vscale x 1 x i8> @llvm.vp.ctlz.nvx1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 488   call <vscale x 2 x i8> @llvm.vp.ctlz.nvx2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 489   call <vscale x 4 x i8> @llvm.vp.ctlz.nvx4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 490   call <vscale x 8 x i8> @llvm.vp.ctlz.nvx8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 491   call <vscale x 16 x i8> @llvm.vp.ctlz.nvx16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 492   call <vscale x 32 x i8> @llvm.vp.ctlz.nvx32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 493   call <vscale x 64 x i8> @llvm.vp.ctlz.nvx64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
 494   call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 495   call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 496   call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 497   call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 498   call <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 499   call <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 500   call <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 501   call <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 502   call <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 503   call <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 504   call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 505   call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 506   call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 507   call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 508   call <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 509   call <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 510   call <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 511   call <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 512   call <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 513   call <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 514   call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
 515   call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
 516   call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
 517   call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
 518   call <vscale x 1 x i32> @llvm.vp.ctlz.nvx1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 519   call <vscale x 2 x i32> @llvm.vp.ctlz.nvx2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 520   call <vscale x 4 x i32> @llvm.vp.ctlz.nvx4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 521   call <vscale x 8 x i32> @llvm.vp.ctlz.nvx8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 522   call <vscale x 16 x i32> @llvm.vp.ctlz.nvx16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 523   call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
 524   call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
 525   call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
 526   call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
 527   call <vscale x 1 x i64> @llvm.vp.ctlz.nvx1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 528   call <vscale x 2 x i64> @llvm.vp.ctlz.nvx2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 529   call <vscale x 4 x i64> @llvm.vp.ctlz.nvx4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 530   call <vscale x 8 x i64> @llvm.vp.ctlz.nvx8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 531   call <vscale x 16 x i64> @llvm.vp.ctlz.nvx16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 532   ret void
 533 }
 534
 535 define void @vp_cttz() {
 536 ; CHECK-LABEL: 'vp_cttz'
 537 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %1 = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
 538 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %2 = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
 539 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
 540 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
 541 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 542 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 543 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 544 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 545 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 546 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 547 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
 548 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 549 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %13 = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 550 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 551 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %15 = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 552 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 553 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 554 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 555 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 556 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 557 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 558 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %22 = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 559 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %23 = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 560 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %24 = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 561 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %25 = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 562 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %26 = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 563 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %27 = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 564 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %28 = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 565 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %29 = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 566 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %30 = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 567 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %31 = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 568 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %32 = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
 569 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %33 = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
 570 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %34 = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
 571 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %35 = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
 572 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %36 = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 573 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %37 = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 574 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %38 = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 575 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %39 = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 576 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %40 = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 577 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %41 = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
 578 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %42 = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
 579 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %43 = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
 580 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %44 = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
 581 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %45 = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 582 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %46 = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 583 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %47 = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 584 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %48 = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 585 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %49 = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 586 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 587 ;
 588   call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
 589   call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
 590   call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
 591   call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
 592   call <vscale x 1 x i8> @llvm.vp.cttz.nvx1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 593   call <vscale x 2 x i8> @llvm.vp.cttz.nvx2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 594   call <vscale x 4 x i8> @llvm.vp.cttz.nvx4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 595   call <vscale x 8 x i8> @llvm.vp.cttz.nvx8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 596   call <vscale x 16 x i8> @llvm.vp.cttz.nvx16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 597   call <vscale x 32 x i8> @llvm.vp.cttz.nvx32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 598   call <vscale x 64 x i8> @llvm.vp.cttz.nvx64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
 599   call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 600   call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 601   call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 602   call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 603   call <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 604   call <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 605   call <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 606   call <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 607   call <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 608   call <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 609   call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
 610   call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
 611   call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
 612   call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
 613   call <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 614   call <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 615   call <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 616   call <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 617   call <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 618   call <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
 619   call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
 620   call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
 621   call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
 622   call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
 623   call <vscale x 1 x i32> @llvm.vp.cttz.nvx1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 624   call <vscale x 2 x i32> @llvm.vp.cttz.nvx2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 625   call <vscale x 4 x i32> @llvm.vp.cttz.nvx4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 626   call <vscale x 8 x i32> @llvm.vp.cttz.nvx8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 627   call <vscale x 16 x i32> @llvm.vp.cttz.nvx16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 628   call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
 629   call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
 630   call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
 631   call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
 632   call <vscale x 1 x i64> @llvm.vp.cttz.nvx1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
 633   call <vscale x 2 x i64> @llvm.vp.cttz.nvx2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
 634   call <vscale x 4 x i64> @llvm.vp.cttz.nvx4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
 635   call <vscale x 8 x i64> @llvm.vp.cttz.nvx8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
 636   call <vscale x 16 x i64> @llvm.vp.cttz.nvx16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
 637   ret void
 638 }
 639
 640 declare i16 @llvm.bswap.i16(i16)
 641 declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>)
 642 declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
 643 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
 644 declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
 645 declare <vscale x 1 x i16> @llvm.bswap.nvx1i16(<vscale x 1 x i16>)
 646 declare <vscale x 2 x i16> @llvm.bswap.nvx2i16(<vscale x 2 x i16>)
 647 declare <vscale x 4 x i16> @llvm.bswap.nvx4i16(<vscale x 4 x i16>)
 648 declare <vscale x 8 x i16> @llvm.bswap.nvx8i16(<vscale x 8 x i16>)
 649 declare <vscale x 16 x i16> @llvm.bswap.nvx16i16(<vscale x 16 x i16>)
 650 declare i32 @llvm.bswap.i32(i32)
 651 declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
 652 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
 653 declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
 654 declare <16 x i32> @llvm.bswap.v16i32(<16 x i32>)
 655 declare <vscale x 1 x i32> @llvm.bswap.nvx1i32(<vscale x 1 x i32>)
 656 declare <vscale x 2 x i32> @llvm.bswap.nvx2i32(<vscale x 2 x i32>)
 657 declare <vscale x 4 x i32> @llvm.bswap.nvx4i32(<vscale x 4 x i32>)
 658 declare <vscale x 8 x i32> @llvm.bswap.nvx8i32(<vscale x 8 x i32>)
 659 declare <vscale x 16 x i32> @llvm.bswap.nvx16i32(<vscale x 16 x i32>)
 660 declare i64 @llvm.bswap.i64(i64)
 661 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
 662 declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
 663 declare <8 x i64> @llvm.bswap.v8i64(<8 x i64>)
 664 declare <16 x i64> @llvm.bswap.v16i64(<16 x i64>)
 665 declare <vscale x 1 x i64> @llvm.bswap.nvx1i64(<vscale x 1 x i64>)
 666 declare <vscale x 2 x i64> @llvm.bswap.nvx2i64(<vscale x 2 x i64>)
 667 declare <vscale x 4 x i64> @llvm.bswap.nvx4i64(<vscale x 4 x i64>)
 668 declare <vscale x 8 x i64> @llvm.bswap.nvx8i64(<vscale x 8 x i64>)
 669 declare <vscale x 16 x i64> @llvm.bswap.nvx16i64(<vscale x 16 x i64>)
 670
 671 declare i8 @llvm.bitreverse.i8(i8)
 672 declare <2 x i8> @llvm.bitreverse.v2i8(<2 x i8>)
 673 declare <4 x i8> @llvm.bitreverse.v4i8(<4 x i8>)
 674 declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>)
 675 declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>)
 676 declare <vscale x 1 x i8> @llvm.bitreverse.nvx1i8(<vscale x 1 x i8>)
 677 declare <vscale x 2 x i8> @llvm.bitreverse.nvx2i8(<vscale x 2 x i8>)
 678 declare <vscale x 4 x i8> @llvm.bitreverse.nvx4i8(<vscale x 4 x i8>)
 679 declare <vscale x 8 x i8> @llvm.bitreverse.nvx8i8(<vscale x 8 x i8>)
 680 declare <vscale x 16 x i8> @llvm.bitreverse.nvx16i8(<vscale x 16 x i8>)
 681 declare i16 @llvm.bitreverse.i16(i16)
 682 declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>)
 683 declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>)
 684 declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
 685 declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
 686 declare <vscale x 1 x i16> @llvm.bitreverse.nvx1i16(<vscale x 1 x i16>)
 687 declare <vscale x 2 x i16> @llvm.bitreverse.nvx2i16(<vscale x 2 x i16>)
 688 declare <vscale x 4 x i16> @llvm.bitreverse.nvx4i16(<vscale x 4 x i16>)
 689 declare <vscale x 8 x i16> @llvm.bitreverse.nvx8i16(<vscale x 8 x i16>)
 690 declare <vscale x 16 x i16> @llvm.bitreverse.nvx16i16(<vscale x 16 x i16>)
 691 declare i32 @llvm.bitreverse.i32(i32)
 692 declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>)
 693 declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
 694 declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
 695 declare <16 x i32> @llvm.bitreverse.v16i32(<16 x i32>)
 696 declare <vscale x 1 x i32> @llvm.bitreverse.nvx1i32(<vscale x 1 x i32>)
 697 declare <vscale x 2 x i32> @llvm.bitreverse.nvx2i32(<vscale x 2 x i32>)
 698 declare <vscale x 4 x i32> @llvm.bitreverse.nvx4i32(<vscale x 4 x i32>)
 699 declare <vscale x 8 x i32> @llvm.bitreverse.nvx8i32(<vscale x 8 x i32>)
 700 declare <vscale x 16 x i32> @llvm.bitreverse.nvx16i32(<vscale x 16 x i32>)
 701 declare i64 @llvm.bitreverse.i64(i64)
 702 declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
 703 declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>)
 704 declare <8 x i64> @llvm.bitreverse.v8i64(<8 x i64>)
 705 declare <16 x i64> @llvm.bitreverse.v16i64(<16 x i64>)
 706 declare <vscale x 1 x i64> @llvm.bitreverse.nvx1i64(<vscale x 1 x i64>)
 707 declare <vscale x 2 x i64> @llvm.bitreverse.nvx2i64(<vscale x 2 x i64>)
 708 declare <vscale x 4 x i64> @llvm.bitreverse.nvx4i64(<vscale x 4 x i64>)
 709 declare <vscale x 8 x i64> @llvm.bitreverse.nvx8i64(<vscale x 8 x i64>)
 710 declare <vscale x 16 x i64> @llvm.bitreverse.nvx16i64(<vscale x 16 x i64>)
 711
 712 declare i8 @llvm.ctpop.i8(i8)
 713 declare <2 x i8> @llvm.ctpop.v2i8(<2 x i8>)
 714 declare <4 x i8> @llvm.ctpop.v4i8(<4 x i8>)
 715 declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>)
 716 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
 717 declare <vscale x 1 x i8> @llvm.ctpop.nvx1i8(<vscale x 1 x i8>)
 718 declare <vscale x 2 x i8> @llvm.ctpop.nvx2i8(<vscale x 2 x i8>)
 719 declare <vscale x 4 x i8> @llvm.ctpop.nvx4i8(<vscale x 4 x i8>)
 720 declare <vscale x 8 x i8> @llvm.ctpop.nvx8i8(<vscale x 8 x i8>)
 721 declare <vscale x 16 x i8> @llvm.ctpop.nvx16i8(<vscale x 16 x i8>)
 722 declare i16 @llvm.ctpop.i16(i16)
 723 declare <2 x i16> @llvm.ctpop.v2i16(<2 x i16>)
 724 declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>)
 725 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
 726 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
 727 declare <vscale x 1 x i16> @llvm.ctpop.nvx1i16(<vscale x 1 x i16>)
 728 declare <vscale x 2 x i16> @llvm.ctpop.nvx2i16(<vscale x 2 x i16>)
 729 declare <vscale x 4 x i16> @llvm.ctpop.nvx4i16(<vscale x 4 x i16>)
 730 declare <vscale x 8 x i16> @llvm.ctpop.nvx8i16(<vscale x 8 x i16>)
 731 declare <vscale x 16 x i16> @llvm.ctpop.nvx16i16(<vscale x 16 x i16>)
 732 declare i32 @llvm.ctpop.i32(i32)
 733 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
 734 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
 735 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
 736 declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
 737 declare <vscale x 1 x i32> @llvm.ctpop.nvx1i32(<vscale x 1 x i32>)
 738 declare <vscale x 2 x i32> @llvm.ctpop.nvx2i32(<vscale x 2 x i32>)
 739 declare <vscale x 4 x i32> @llvm.ctpop.nvx4i32(<vscale x 4 x i32>)
 740 declare <vscale x 8 x i32> @llvm.ctpop.nvx8i32(<vscale x 8 x i32>)
 741 declare <vscale x 16 x i32> @llvm.ctpop.nvx16i32(<vscale x 16 x i32>)
 742 declare i64 @llvm.ctpop.i64(i64)
 743 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
 744 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
 745 declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>)
 746 declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>)
 747 declare <vscale x 1 x i64> @llvm.ctpop.nvx1i64(<vscale x 1 x i64>)
 748 declare <vscale x 2 x i64> @llvm.ctpop.nvx2i64(<vscale x 2 x i64>)
 749 declare <vscale x 4 x i64> @llvm.ctpop.nvx4i64(<vscale x 4 x i64>)
 750 declare <vscale x 8 x i64> @llvm.ctpop.nvx8i64(<vscale x 8 x i64>)
 751 declare <vscale x 16 x i64> @llvm.ctpop.nvx16i64(<vscale x 16 x i64>)
 752
 753 declare <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16>, <2 x i1>, i32)
 754 declare <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16>, <4 x i1>, i32)
 755 declare <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16>, <8 x i1>, i32)
 756 declare <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16>, <16 x i1>, i32)
 757 declare <vscale x 1 x i16> @llvm.vp.bswap.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
 758 declare <vscale x 2 x i16> @llvm.vp.bswap.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
 759 declare <vscale x 4 x i16> @llvm.vp.bswap.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
 760 declare <vscale x 8 x i16> @llvm.vp.bswap.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
 761 declare <vscale x 16 x i16> @llvm.vp.bswap.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
 762 declare <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32>, <2 x i1>, i32)
 763 declare <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32>, <4 x i1>, i32)
 764 declare <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32>, <8 x i1>, i32)
 765 declare <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32>, <16 x i1>, i32)
 766 declare <vscale x 1 x i32> @llvm.vp.bswap.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
 767 declare <vscale x 2 x i32> @llvm.vp.bswap.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
 768 declare <vscale x 4 x i32> @llvm.vp.bswap.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
 769 declare <vscale x 8 x i32> @llvm.vp.bswap.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
 770 declare <vscale x 16 x i32> @llvm.vp.bswap.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
 771 declare <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64>, <2 x i1>, i32)
 772 declare <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64>, <4 x i1>, i32)
 773 declare <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64>, <8 x i1>, i32)
 774 declare <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64>, <16 x i1>, i32)
 775 declare <vscale x 1 x i64> @llvm.vp.bswap.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
 776 declare <vscale x 2 x i64> @llvm.vp.bswap.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
 777 declare <vscale x 4 x i64> @llvm.vp.bswap.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
 778 declare <vscale x 8 x i64> @llvm.vp.bswap.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
 779 declare <vscale x 16 x i64> @llvm.vp.bswap.nvx16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
 780
 781 declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32)
 782 declare <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8>, <4 x i1>, i32)
 783 declare <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8>, <8 x i1>, i32)
 784 declare <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8>, <16 x i1>, i32)
 785 declare <vscale x 1 x i8> @llvm.vp.ctpop.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
 786 declare <vscale x 2 x i8> @llvm.vp.ctpop.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
 787 declare <vscale x 4 x i8> @llvm.vp.ctpop.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
 788 declare <vscale x 8 x i8> @llvm.vp.ctpop.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32)
 789 declare <vscale x 16 x i8> @llvm.vp.ctpop.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32)
 790 declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32)
 791 declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32)
 792 declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32)
 793 declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32)
 794 declare <vscale x 1 x i16> @llvm.vp.ctpop.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
 795 declare <vscale x 2 x i16> @llvm.vp.ctpop.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
 796 declare <vscale x 4 x i16> @llvm.vp.ctpop.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
 797 declare <vscale x 8 x i16> @llvm.vp.ctpop.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
 798 declare <vscale x 16 x i16> @llvm.vp.ctpop.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
 799 declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32)
 800 declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32)
 801 declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32)
 802 declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32)
 803 declare <vscale x 1 x i32> @llvm.vp.ctpop.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
 804 declare <vscale x 2 x i32> @llvm.vp.ctpop.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
 805 declare <vscale x 4 x i32> @llvm.vp.ctpop.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
 806 declare <vscale x 8 x i32> @llvm.vp.ctpop.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
 807 declare <vscale x 16 x i32> @llvm.vp.ctpop.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
 808 declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32)
 809 declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32)
 810 declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32)
 811 declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32)
 812 declare <vscale x 1 x i64> @llvm.vp.ctpop.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
 813 declare <vscale x 2 x i64> @llvm.vp.ctpop.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
 814 declare <vscale x 4 x i64> @llvm.vp.ctpop.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
 815 declare <vscale x 8 x i64> @llvm.vp.ctpop.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
 816 declare <vscale x 16 x i64> @llvm.vp.ctpop.nvx16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
 817
 818 declare <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
 819 declare <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
 820 declare <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
 821 declare <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
 822 declare <vscale x 1 x i8> @llvm.vp.ctlz.nvx1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
 823 declare <vscale x 2 x i8> @llvm.vp.ctlz.nvx2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
 824 declare <vscale x 4 x i8> @llvm.vp.ctlz.nvx4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
 825 declare <vscale x 8 x i8> @llvm.vp.ctlz.nvx8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
 826 declare <vscale x 16 x i8> @llvm.vp.ctlz.nvx16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
 827 declare <vscale x 32 x i8> @llvm.vp.ctlz.nvx32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
 828 declare <vscale x 64 x i8> @llvm.vp.ctlz.nvx64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
 829 declare <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
 830 declare <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
 831 declare <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
 832 declare <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
 833 declare <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
 834 declare <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
 835 declare <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
 836 declare <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
 837 declare <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
 838 declare <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
 839 declare <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
 840 declare <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
 841 declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
 842 declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
 843 declare <vscale x 1 x i32> @llvm.vp.ctlz.nvx1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
 844 declare <vscale x 2 x i32> @llvm.vp.ctlz.nvx2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
 845 declare <vscale x 4 x i32> @llvm.vp.ctlz.nvx4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
 846 declare <vscale x 8 x i32> @llvm.vp.ctlz.nvx8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
 847 declare <vscale x 16 x i32> @llvm.vp.ctlz.nvx16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
 848 declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
 849 declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
 850 declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
 851 declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
 852 declare <vscale x 1 x i64> @llvm.vp.ctlz.nvx1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
 853 declare <vscale x 2 x i64> @llvm.vp.ctlz.nvx2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
 854 declare <vscale x 4 x i64> @llvm.vp.ctlz.nvx4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
 855 declare <vscale x 8 x i64> @llvm.vp.ctlz.nvx8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
 856 declare <vscale x 16 x i64> @llvm.vp.ctlz.nvx16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
 857
 858 declare <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
 859 declare <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
 860 declare <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
 861 declare <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
 862 declare <vscale x 1 x i8> @llvm.vp.cttz.nvx1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
 863 declare <vscale x 2 x i8> @llvm.vp.cttz.nvx2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
 864 declare <vscale x 4 x i8> @llvm.vp.cttz.nvx4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
 865 declare <vscale x 8 x i8> @llvm.vp.cttz.nvx8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
 866 declare <vscale x 16 x i8> @llvm.vp.cttz.nvx16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
 867 declare <vscale x 32 x i8> @llvm.vp.cttz.nvx32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
 868 declare <vscale x 64 x i8> @llvm.vp.cttz.nvx64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
 869 declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
 870 declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
 871 declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
 872 declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
 873 declare <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
 874 declare <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
 875 declare <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
 876 declare <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
 877 declare <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
 878 declare <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
 879 declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
 880 declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
 881 declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
 882 declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
 883 declare <vscale x 1 x i32> @llvm.vp.cttz.nvx1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
 884 declare <vscale x 2 x i32> @llvm.vp.cttz.nvx2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
 885 declare <vscale x 4 x i32> @llvm.vp.cttz.nvx4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
 886 declare <vscale x 8 x i32> @llvm.vp.cttz.nvx8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
 887 declare <vscale x 16 x i32> @llvm.vp.cttz.nvx16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
 888 declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
 889 declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
 890 declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
 891 declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
 892 declare <vscale x 1 x i64> @llvm.vp.cttz.nvx1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
 893 declare <vscale x 2 x i64> @llvm.vp.cttz.nvx2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
 894 declare <vscale x 4 x i64> @llvm.vp.cttz.nvx4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
 895 declare <vscale x 8 x i64> @llvm.vp.cttz.nvx8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
 896 declare <vscale x 16 x i64> @llvm.vp.cttz.nvx16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)