1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d -riscv-v-vector-bits-min=-1 | FileCheck %s --check-prefixes=CHECK,NOZVBB
3 ; Vector ctpop exists only under ZVBB
4 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+experimental-zvbb -riscv-v-vector-bits-min=-1 | FileCheck %s --check-prefixes=CHECK,ZVBB
8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call i16 @llvm.bswap.i16(i16 undef)
9 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> undef)
10 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)
11 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> undef)
12 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %5 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef)
13 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16> undef)
14 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16> undef)
15 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = call <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16> undef)
16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16> undef)
17 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %10 = call <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16> undef)
18 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call i32 @llvm.bswap.i32(i32 undef)
19 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %12 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> undef)
20 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %13 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> undef)
21 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %14 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> undef)
22 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %15 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> undef)
23 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %16 = call <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32> undef)
24 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %17 = call <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32> undef)
25 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %18 = call <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32> undef)
26 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %19 = call <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32> undef)
27 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %20 = call <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32> undef)
28 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %21 = call i64 @llvm.bswap.i64(i64 undef)
29 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %22 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
30 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %23 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
31 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %24 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> undef)
32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %25 = call <16 x i64> @llvm.bswap.v16i64(<16 x i64> undef)
33 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %26 = call <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64> undef)
34 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %27 = call <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64> undef)
35 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %28 = call <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64> undef)
36 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %29 = call <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64> undef)
37 ; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %30 = call <vscale x 16 x i64> @llvm.bswap.nxv16i64(<vscale x 16 x i64> undef)
38 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
40 call i16 @llvm.bswap.i16(i16 undef)
41 call <2 x i16> @llvm.bswap.v2i16(<2 x i16> undef)
42 call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)
43 call <8 x i16> @llvm.bswap.v8i16(<8 x i16> undef)
44 call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef)
45 call <vscale x 1 x i16> @llvm.bswap.nvx1i16(<vscale x 1 x i16> undef)
46 call <vscale x 2 x i16> @llvm.bswap.nvx2i16(<vscale x 2 x i16> undef)
47 call <vscale x 4 x i16> @llvm.bswap.nvx4i16(<vscale x 4 x i16> undef)
48 call <vscale x 8 x i16> @llvm.bswap.nvx8i16(<vscale x 8 x i16> undef)
49 call <vscale x 16 x i16> @llvm.bswap.nvx16i16(<vscale x 16 x i16> undef)
50 call i32 @llvm.bswap.i32(i32 undef)
51 call <2 x i32> @llvm.bswap.v2i32(<2 x i32> undef)
52 call <4 x i32> @llvm.bswap.v4i32(<4 x i32> undef)
53 call <8 x i32> @llvm.bswap.v8i32(<8 x i32> undef)
54 call <16 x i32> @llvm.bswap.v16i32(<16 x i32> undef)
55 call <vscale x 1 x i32> @llvm.bswap.nvx1i32(<vscale x 1 x i32> undef)
56 call <vscale x 2 x i32> @llvm.bswap.nvx2i32(<vscale x 2 x i32> undef)
57 call <vscale x 4 x i32> @llvm.bswap.nvx4i32(<vscale x 4 x i32> undef)
58 call <vscale x 8 x i32> @llvm.bswap.nvx8i32(<vscale x 8 x i32> undef)
59 call <vscale x 16 x i32> @llvm.bswap.nvx16i32(<vscale x 16 x i32> undef)
60 call i64 @llvm.bswap.i64(i64 undef)
61 call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
62 call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
63 call <8 x i64> @llvm.bswap.v8i64(<8 x i64> undef)
64 call <16 x i64> @llvm.bswap.v16i64(<16 x i64> undef)
65 call <vscale x 1 x i64> @llvm.bswap.nvx1i64(<vscale x 1 x i64> undef)
66 call <vscale x 2 x i64> @llvm.bswap.nvx2i64(<vscale x 2 x i64> undef)
67 call <vscale x 4 x i64> @llvm.bswap.nvx4i64(<vscale x 4 x i64> undef)
68 call <vscale x 8 x i64> @llvm.bswap.nvx8i64(<vscale x 8 x i64> undef)
69 call <vscale x 16 x i64> @llvm.bswap.nvx16i64(<vscale x 16 x i64> undef)
73 define void @bitreverse() {
74 ; CHECK-LABEL: 'bitreverse'
75 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call i8 @llvm.bitreverse.i8(i8 undef)
76 ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %2 = call <2 x i8> @llvm.bitreverse.v2i8(<2 x i8> undef)
77 ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %3 = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> undef)
78 ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %4 = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> undef)
79 ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %5 = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> undef)
80 ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %6 = call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> undef)
81 ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %7 = call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> undef)
82 ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %8 = call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> undef)
83 ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %9 = call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> undef)
84 ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %10 = call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> undef)
85 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call i16 @llvm.bitreverse.i16(i16 undef)
86 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %12 = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
87 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %13 = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> undef)
88 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %14 = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> undef)
89 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %15 = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> undef)
90 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %16 = call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> undef)
91 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %17 = call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> undef)
92 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %18 = call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> undef)
93 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %19 = call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> undef)
94 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %20 = call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> undef)
95 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %21 = call i32 @llvm.bitreverse.i32(i32 undef)
96 ; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %22 = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> undef)
97 ; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %23 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> undef)
98 ; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %24 = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> undef)
99 ; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %25 = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> undef)
100 ; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %26 = call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> undef)
101 ; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %27 = call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> undef)
102 ; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %28 = call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> undef)
103 ; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %29 = call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> undef)
104 ; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %30 = call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> undef)
105 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %31 = call i64 @llvm.bitreverse.i64(i64 undef)
106 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %32 = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> undef)
107 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %33 = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> undef)
108 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %34 = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> undef)
109 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %35 = call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> undef)
110 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %36 = call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> undef)
111 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %37 = call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> undef)
112 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %38 = call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> undef)
113 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %39 = call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> undef)
114 ; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %40 = call <vscale x 16 x i64> @llvm.bitreverse.nxv16i64(<vscale x 16 x i64> undef)
115 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
117 call i8 @llvm.bitreverse.i8(i8 undef)
118 call <2 x i8> @llvm.bitreverse.v2i8(<2 x i8> undef)
119 call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> undef)
120 call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> undef)
121 call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> undef)
122 call <vscale x 1 x i8> @llvm.bitreverse.nvx1i8(<vscale x 1 x i8> undef)
123 call <vscale x 2 x i8> @llvm.bitreverse.nvx2i8(<vscale x 2 x i8> undef)
124 call <vscale x 4 x i8> @llvm.bitreverse.nvx4i8(<vscale x 4 x i8> undef)
125 call <vscale x 8 x i8> @llvm.bitreverse.nvx8i8(<vscale x 8 x i8> undef)
126 call <vscale x 16 x i8> @llvm.bitreverse.nvx16i8(<vscale x 16 x i8> undef)
127 call i16 @llvm.bitreverse.i16(i16 undef)
128 call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
129 call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> undef)
130 call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> undef)
131 call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> undef)
132 call <vscale x 1 x i16> @llvm.bitreverse.nvx1i16(<vscale x 1 x i16> undef)
133 call <vscale x 2 x i16> @llvm.bitreverse.nvx2i16(<vscale x 2 x i16> undef)
134 call <vscale x 4 x i16> @llvm.bitreverse.nvx4i16(<vscale x 4 x i16> undef)
135 call <vscale x 8 x i16> @llvm.bitreverse.nvx8i16(<vscale x 8 x i16> undef)
136 call <vscale x 16 x i16> @llvm.bitreverse.nvx16i16(<vscale x 16 x i16> undef)
137 call i32 @llvm.bitreverse.i32(i32 undef)
138 call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> undef)
139 call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> undef)
140 call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> undef)
141 call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> undef)
142 call <vscale x 1 x i32> @llvm.bitreverse.nvx1i32(<vscale x 1 x i32> undef)
143 call <vscale x 2 x i32> @llvm.bitreverse.nvx2i32(<vscale x 2 x i32> undef)
144 call <vscale x 4 x i32> @llvm.bitreverse.nvx4i32(<vscale x 4 x i32> undef)
145 call <vscale x 8 x i32> @llvm.bitreverse.nvx8i32(<vscale x 8 x i32> undef)
146 call <vscale x 16 x i32> @llvm.bitreverse.nvx16i32(<vscale x 16 x i32> undef)
147 call i64 @llvm.bitreverse.i64(i64 undef)
148 call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> undef)
149 call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> undef)
150 call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> undef)
151 call <16 x i64> @llvm.bitreverse.v16i64(<16 x i64> undef)
152 call <vscale x 1 x i64> @llvm.bitreverse.nvx1i64(<vscale x 1 x i64> undef)
153 call <vscale x 2 x i64> @llvm.bitreverse.nvx2i64(<vscale x 2 x i64> undef)
154 call <vscale x 4 x i64> @llvm.bitreverse.nvx4i64(<vscale x 4 x i64> undef)
155 call <vscale x 8 x i64> @llvm.bitreverse.nvx8i64(<vscale x 8 x i64> undef)
156 call <vscale x 16 x i64> @llvm.bitreverse.nvx16i64(<vscale x 16 x i64> undef)
160 define void @ctpop() {
161 ; NOZVBB-LABEL: 'ctpop'
162 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
163 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
164 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
165 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
166 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %5 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
167 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %6 = call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> undef)
168 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %7 = call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> undef)
169 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
170 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
171 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
172 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
173 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
174 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
175 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
176 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %15 = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
177 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %16 = call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> undef)
178 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %17 = call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> undef)
179 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
180 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
181 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
182 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
183 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
184 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
185 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
186 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %25 = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
187 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %26 = call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> undef)
188 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %27 = call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> undef)
189 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
190 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
191 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
192 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
193 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
194 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
195 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
196 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %35 = call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
197 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %36 = call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> undef)
198 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %37 = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> undef)
199 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %38 = call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> undef)
200 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %39 = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> undef)
201 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %40 = call <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64> undef)
202 ; NOZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
204 ; ZVBB-LABEL: 'ctpop'
205 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
206 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
207 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
208 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
209 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
210 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> undef)
211 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> undef)
212 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
213 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
214 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
215 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
216 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
217 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
218 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
219 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
220 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> undef)
221 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> undef)
222 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
223 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
224 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
225 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
226 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
227 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
228 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
229 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
230 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> undef)
231 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> undef)
232 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
233 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
234 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
235 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
236 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
237 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
238 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
239 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
240 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> undef)
241 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> undef)
242 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> undef)
243 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> undef)
244 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %40 = call <vscale x 16 x i64> @llvm.ctpop.nxv16i64(<vscale x 16 x i64> undef)
245 ; ZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
247 call i8 @llvm.ctpop.i8(i8 undef)
248 call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
249 call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
250 call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
251 call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> undef)
252 call <vscale x 1 x i8> @llvm.ctpop.nvx1i8(<vscale x 1 x i8> undef)
253 call <vscale x 2 x i8> @llvm.ctpop.nvx2i8(<vscale x 2 x i8> undef)
254 call <vscale x 4 x i8> @llvm.ctpop.nvx4i8(<vscale x 4 x i8> undef)
255 call <vscale x 8 x i8> @llvm.ctpop.nvx8i8(<vscale x 8 x i8> undef)
256 call <vscale x 16 x i8> @llvm.ctpop.nvx16i8(<vscale x 16 x i8> undef)
257 call i16 @llvm.ctpop.i16(i16 undef)
258 call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
259 call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
260 call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
261 call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> undef)
262 call <vscale x 1 x i16> @llvm.ctpop.nvx1i16(<vscale x 1 x i16> undef)
263 call <vscale x 2 x i16> @llvm.ctpop.nvx2i16(<vscale x 2 x i16> undef)
264 call <vscale x 4 x i16> @llvm.ctpop.nvx4i16(<vscale x 4 x i16> undef)
265 call <vscale x 8 x i16> @llvm.ctpop.nvx8i16(<vscale x 8 x i16> undef)
266 call <vscale x 16 x i16> @llvm.ctpop.nvx16i16(<vscale x 16 x i16> undef)
267 call i32 @llvm.ctpop.i32(i32 undef)
268 call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
269 call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
270 call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
271 call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> undef)
272 call <vscale x 1 x i32> @llvm.ctpop.nvx1i32(<vscale x 1 x i32> undef)
273 call <vscale x 2 x i32> @llvm.ctpop.nvx2i32(<vscale x 2 x i32> undef)
274 call <vscale x 4 x i32> @llvm.ctpop.nvx4i32(<vscale x 4 x i32> undef)
275 call <vscale x 8 x i32> @llvm.ctpop.nvx8i32(<vscale x 8 x i32> undef)
276 call <vscale x 16 x i32> @llvm.ctpop.nvx16i32(<vscale x 16 x i32> undef)
277 call i64 @llvm.ctpop.i64(i64 undef)
278 call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
279 call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
280 call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
281 call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> undef)
282 call <vscale x 1 x i64> @llvm.ctpop.nvx1i64(<vscale x 1 x i64> undef)
283 call <vscale x 2 x i64> @llvm.ctpop.nvx2i64(<vscale x 2 x i64> undef)
284 call <vscale x 4 x i64> @llvm.ctpop.nvx4i64(<vscale x 4 x i64> undef)
285 call <vscale x 8 x i64> @llvm.ctpop.nvx8i64(<vscale x 8 x i64> undef)
286 call <vscale x 16 x i64> @llvm.ctpop.nvx16i64(<vscale x 16 x i64> undef)
290 define void @vp_bswap() {
291 ; CHECK-LABEL: 'vp_bswap'
292 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = call <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
293 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
294 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
295 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
296 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %5 = call <vscale x 1 x i16> @llvm.vp.bswap.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
297 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x i16> @llvm.vp.bswap.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
298 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x i16> @llvm.vp.bswap.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
299 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = call <vscale x 8 x i16> @llvm.vp.bswap.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
300 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = call <vscale x 16 x i16> @llvm.vp.bswap.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
301 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %10 = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
302 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %11 = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
303 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %12 = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
304 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %13 = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
305 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %14 = call <vscale x 1 x i32> @llvm.vp.bswap.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
306 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %15 = call <vscale x 2 x i32> @llvm.vp.bswap.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
307 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %16 = call <vscale x 4 x i32> @llvm.vp.bswap.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
308 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %17 = call <vscale x 8 x i32> @llvm.vp.bswap.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
309 ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %18 = call <vscale x 16 x i32> @llvm.vp.bswap.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
310 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %19 = call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
311 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %20 = call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
312 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %21 = call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
313 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %22 = call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
314 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %23 = call <vscale x 1 x i64> @llvm.vp.bswap.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
315 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %24 = call <vscale x 2 x i64> @llvm.vp.bswap.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
316 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %25 = call <vscale x 4 x i64> @llvm.vp.bswap.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
317 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %26 = call <vscale x 8 x i64> @llvm.vp.bswap.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
318 ; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %27 = call <vscale x 16 x i64> @llvm.vp.bswap.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
319 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
321 call <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
322 call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
323 call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
324 call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
325 call <vscale x 1 x i16> @llvm.vp.bswap.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
326 call <vscale x 2 x i16> @llvm.vp.bswap.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
327 call <vscale x 4 x i16> @llvm.vp.bswap.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
328 call <vscale x 8 x i16> @llvm.vp.bswap.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
329 call <vscale x 16 x i16> @llvm.vp.bswap.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
330 call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
331 call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
332 call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
333 call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
334 call <vscale x 1 x i32> @llvm.vp.bswap.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
335 call <vscale x 2 x i32> @llvm.vp.bswap.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
336 call <vscale x 4 x i32> @llvm.vp.bswap.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
337 call <vscale x 8 x i32> @llvm.vp.bswap.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
338 call <vscale x 16 x i32> @llvm.vp.bswap.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
339 call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
340 call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
341 call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
342 call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
343 call <vscale x 1 x i64> @llvm.vp.bswap.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
344 call <vscale x 2 x i64> @llvm.vp.bswap.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
345 call <vscale x 4 x i64> @llvm.vp.bswap.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
346 call <vscale x 8 x i64> @llvm.vp.bswap.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
347 call <vscale x 16 x i64> @llvm.vp.bswap.nvx16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
351 define void @vp_ctpop() {
352 ; CHECK-LABEL: 'vp_ctpop'
353 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %1 = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
354 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %2 = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
355 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %3 = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
356 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %4 = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
357 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %5 = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
358 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %6 = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
359 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %7 = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
360 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %8 = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
361 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %9 = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
362 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %10 = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
363 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %11 = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
364 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %12 = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
365 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %13 = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
366 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %14 = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
367 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %15 = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
368 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %16 = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
369 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %17 = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
370 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %18 = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
371 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %19 = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
372 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %20 = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
373 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %21 = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
374 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %22 = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
375 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %23 = call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
376 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %24 = call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
377 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %25 = call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
378 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %26 = call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
379 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %27 = call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
380 ; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %28 = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
381 ; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %29 = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
382 ; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %30 = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
383 ; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %31 = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
384 ; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %32 = call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
385 ; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %33 = call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
386 ; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %34 = call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
387 ; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %35 = call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
388 ; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %36 = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
389 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
391 call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
392 call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
393 call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
394 call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
395 call <vscale x 1 x i16> @llvm.vp.ctpop.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
396 call <vscale x 2 x i16> @llvm.vp.ctpop.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
397 call <vscale x 4 x i16> @llvm.vp.ctpop.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
398 call <vscale x 8 x i16> @llvm.vp.ctpop.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
399 call <vscale x 16 x i16> @llvm.vp.ctpop.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
400 call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> undef, <2 x i1> undef, i32 undef)
401 call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> undef, <4 x i1> undef, i32 undef)
402 call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> undef, <8 x i1> undef, i32 undef)
403 call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> undef, <16 x i1> undef, i32 undef)
404 call <vscale x 1 x i16> @llvm.vp.ctpop.nvx1i16(<vscale x 1 x i16> undef, <vscale x 1 x i1> undef, i32 undef)
405 call <vscale x 2 x i16> @llvm.vp.ctpop.nvx2i16(<vscale x 2 x i16> undef, <vscale x 2 x i1> undef, i32 undef)
406 call <vscale x 4 x i16> @llvm.vp.ctpop.nvx4i16(<vscale x 4 x i16> undef, <vscale x 4 x i1> undef, i32 undef)
407 call <vscale x 8 x i16> @llvm.vp.ctpop.nvx8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> undef, i32 undef)
408 call <vscale x 16 x i16> @llvm.vp.ctpop.nvx16i16(<vscale x 16 x i16> undef, <vscale x 16 x i1> undef, i32 undef)
409 call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> undef, <2 x i1> undef, i32 undef)
410 call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> undef, <4 x i1> undef, i32 undef)
411 call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> undef, <8 x i1> undef, i32 undef)
412 call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> undef, <16 x i1> undef, i32 undef)
413 call <vscale x 1 x i32> @llvm.vp.ctpop.nvx1i32(<vscale x 1 x i32> undef, <vscale x 1 x i1> undef, i32 undef)
414 call <vscale x 2 x i32> @llvm.vp.ctpop.nvx2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> undef, i32 undef)
415 call <vscale x 4 x i32> @llvm.vp.ctpop.nvx4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> undef, i32 undef)
416 call <vscale x 8 x i32> @llvm.vp.ctpop.nvx8i32(<vscale x 8 x i32> undef, <vscale x 8 x i1> undef, i32 undef)
417 call <vscale x 16 x i32> @llvm.vp.ctpop.nvx16i32(<vscale x 16 x i32> undef, <vscale x 16 x i1> undef, i32 undef)
418 call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> undef, <2 x i1> undef, i32 undef)
419 call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> undef, <4 x i1> undef, i32 undef)
420 call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> undef, <8 x i1> undef, i32 undef)
421 call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> undef, <16 x i1> undef, i32 undef)
422 call <vscale x 1 x i64> @llvm.vp.ctpop.nvx1i64(<vscale x 1 x i64> undef, <vscale x 1 x i1> undef, i32 undef)
423 call <vscale x 2 x i64> @llvm.vp.ctpop.nvx2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> undef, i32 undef)
424 call <vscale x 4 x i64> @llvm.vp.ctpop.nvx4i64(<vscale x 4 x i64> undef, <vscale x 4 x i1> undef, i32 undef)
425 call <vscale x 8 x i64> @llvm.vp.ctpop.nvx8i64(<vscale x 8 x i64> undef, <vscale x 8 x i1> undef, i32 undef)
426 call <vscale x 16 x i64> @llvm.vp.ctpop.nvx16i64(<vscale x 16 x i64> undef, <vscale x 16 x i1> undef, i32 undef)
430 define void @vp_ctlz() {
431 ; CHECK-LABEL: 'vp_ctlz'
432 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %1 = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
433 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %2 = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
434 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %3 = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
435 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %4 = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
436 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.ctlz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
437 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.ctlz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
438 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.ctlz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
439 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.ctlz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
440 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.ctlz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
441 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.ctlz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
442 ; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.ctlz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
443 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %12 = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
444 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %13 = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
445 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %14 = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
446 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %15 = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
447 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
448 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
449 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
450 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
451 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
452 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
453 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %22 = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
454 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %23 = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
455 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %24 = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
456 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %25 = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
457 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %26 = call <vscale x 1 x i16> @llvm.vp.ctlz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
458 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %27 = call <vscale x 2 x i16> @llvm.vp.ctlz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
459 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %28 = call <vscale x 4 x i16> @llvm.vp.ctlz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
460 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %29 = call <vscale x 8 x i16> @llvm.vp.ctlz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
461 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %30 = call <vscale x 16 x i16> @llvm.vp.ctlz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
462 ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %31 = call <vscale x 32 x i16> @llvm.vp.ctlz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
463 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %32 = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
464 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %33 = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
465 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %34 = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
466 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %35 = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
467 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %36 = call <vscale x 1 x i32> @llvm.vp.ctlz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
468 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %37 = call <vscale x 2 x i32> @llvm.vp.ctlz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
469 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %38 = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
470 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %39 = call <vscale x 8 x i32> @llvm.vp.ctlz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
471 ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %40 = call <vscale x 16 x i32> @llvm.vp.ctlz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
472 ; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %41 = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
473 ; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %42 = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
474 ; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %43 = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
475 ; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %44 = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
476 ; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %45 = call <vscale x 1 x i64> @llvm.vp.ctlz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
477 ; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %46 = call <vscale x 2 x i64> @llvm.vp.ctlz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
478 ; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %47 = call <vscale x 4 x i64> @llvm.vp.ctlz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
479 ; CHECK-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %48 = call <vscale x 8 x i64> @llvm.vp.ctlz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
480 ; CHECK-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %49 = call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
481 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
483 call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
484 call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
485 call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
486 call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
487 call <vscale x 1 x i8> @llvm.vp.ctlz.nvx1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
488 call <vscale x 2 x i8> @llvm.vp.ctlz.nvx2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
489 call <vscale x 4 x i8> @llvm.vp.ctlz.nvx4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
490 call <vscale x 8 x i8> @llvm.vp.ctlz.nvx8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
491 call <vscale x 16 x i8> @llvm.vp.ctlz.nvx16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
492 call <vscale x 32 x i8> @llvm.vp.ctlz.nvx32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
493 call <vscale x 64 x i8> @llvm.vp.ctlz.nvx64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
494 call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
495 call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
496 call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
497 call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
498 call <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
499 call <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
500 call <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
501 call <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
502 call <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
503 call <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
504 call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
505 call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
506 call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
507 call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
508 call <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
509 call <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
510 call <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
511 call <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
512 call <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
513 call <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
514 call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
515 call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
516 call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
517 call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
518 call <vscale x 1 x i32> @llvm.vp.ctlz.nvx1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
519 call <vscale x 2 x i32> @llvm.vp.ctlz.nvx2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
520 call <vscale x 4 x i32> @llvm.vp.ctlz.nvx4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
521 call <vscale x 8 x i32> @llvm.vp.ctlz.nvx8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
522 call <vscale x 16 x i32> @llvm.vp.ctlz.nvx16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
523 call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
524 call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
525 call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
526 call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
527 call <vscale x 1 x i64> @llvm.vp.ctlz.nvx1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
528 call <vscale x 2 x i64> @llvm.vp.ctlz.nvx2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
529 call <vscale x 4 x i64> @llvm.vp.ctlz.nvx4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
530 call <vscale x 8 x i64> @llvm.vp.ctlz.nvx8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
531 call <vscale x 16 x i64> @llvm.vp.ctlz.nvx16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
535 define void @vp_cttz() {
536 ; CHECK-LABEL: 'vp_cttz'
537 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %1 = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
538 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %2 = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
539 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %3 = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
540 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
541 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
542 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
543 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %7 = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
544 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %8 = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
545 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %9 = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
546 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %10 = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
547 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %11 = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
548 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
549 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %13 = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
550 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %14 = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
551 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %15 = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
552 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %16 = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
553 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %17 = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
554 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %18 = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
555 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %19 = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
556 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %20 = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
557 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %21 = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
558 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %22 = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
559 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %23 = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
560 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %24 = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
561 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %25 = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
562 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %26 = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
563 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %27 = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
564 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %28 = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
565 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %29 = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
566 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %30 = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
567 ; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %31 = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
568 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %32 = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
569 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %33 = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
570 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %34 = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
571 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %35 = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
572 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %36 = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
573 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %37 = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
574 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %38 = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
575 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %39 = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
576 ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %40 = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
577 ; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %41 = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
578 ; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %42 = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
579 ; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %43 = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
580 ; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %44 = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
581 ; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %45 = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
582 ; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %46 = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
583 ; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %47 = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
584 ; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %48 = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
585 ; CHECK-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %49 = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
586 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
588 call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> undef, i1 false, <2 x i1> undef, i32 undef)
589 call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> undef, i1 false, <4 x i1> undef, i32 undef)
590 call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> undef, i1 false, <8 x i1> undef, i32 undef)
591 call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> undef, i1 false, <16 x i1> undef, i32 undef)
592 call <vscale x 1 x i8> @llvm.vp.cttz.nvx1i8(<vscale x 1 x i8> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
593 call <vscale x 2 x i8> @llvm.vp.cttz.nvx2i8(<vscale x 2 x i8> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
594 call <vscale x 4 x i8> @llvm.vp.cttz.nvx4i8(<vscale x 4 x i8> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
595 call <vscale x 8 x i8> @llvm.vp.cttz.nvx8i8(<vscale x 8 x i8> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
596 call <vscale x 16 x i8> @llvm.vp.cttz.nvx16i8(<vscale x 16 x i8> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
597 call <vscale x 32 x i8> @llvm.vp.cttz.nvx32i8(<vscale x 32 x i8> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
598 call <vscale x 64 x i8> @llvm.vp.cttz.nvx64i8(<vscale x 64 x i8> undef, i1 false, <vscale x 64 x i1> undef, i32 undef)
599 call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
600 call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
601 call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
602 call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
603 call <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
604 call <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
605 call <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
606 call <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
607 call <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
608 call <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
609 call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> undef, i1 false, <2 x i1> undef, i32 undef)
610 call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> undef, i1 false, <4 x i1> undef, i32 undef)
611 call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> undef, i1 false, <8 x i1> undef, i32 undef)
612 call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> undef, i1 false, <16 x i1> undef, i32 undef)
613 call <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
614 call <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
615 call <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
616 call <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
617 call <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
618 call <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16> undef, i1 false, <vscale x 32 x i1> undef, i32 undef)
619 call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> undef, i1 false, <2 x i1> undef, i32 undef)
620 call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> undef, i1 false, <4 x i1> undef, i32 undef)
621 call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> undef, i1 false, <8 x i1> undef, i32 undef)
622 call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> undef, i1 false, <16 x i1> undef, i32 undef)
623 call <vscale x 1 x i32> @llvm.vp.cttz.nvx1i32(<vscale x 1 x i32> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
624 call <vscale x 2 x i32> @llvm.vp.cttz.nvx2i32(<vscale x 2 x i32> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
625 call <vscale x 4 x i32> @llvm.vp.cttz.nvx4i32(<vscale x 4 x i32> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
626 call <vscale x 8 x i32> @llvm.vp.cttz.nvx8i32(<vscale x 8 x i32> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
627 call <vscale x 16 x i32> @llvm.vp.cttz.nvx16i32(<vscale x 16 x i32> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
628 call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> undef, i1 false, <2 x i1> undef, i32 undef)
629 call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> undef, i1 false, <4 x i1> undef, i32 undef)
630 call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> undef, i1 false, <8 x i1> undef, i32 undef)
631 call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> undef, i1 false, <16 x i1> undef, i32 undef)
632 call <vscale x 1 x i64> @llvm.vp.cttz.nvx1i64(<vscale x 1 x i64> undef, i1 false, <vscale x 1 x i1> undef, i32 undef)
633 call <vscale x 2 x i64> @llvm.vp.cttz.nvx2i64(<vscale x 2 x i64> undef, i1 false, <vscale x 2 x i1> undef, i32 undef)
634 call <vscale x 4 x i64> @llvm.vp.cttz.nvx4i64(<vscale x 4 x i64> undef, i1 false, <vscale x 4 x i1> undef, i32 undef)
635 call <vscale x 8 x i64> @llvm.vp.cttz.nvx8i64(<vscale x 8 x i64> undef, i1 false, <vscale x 8 x i1> undef, i32 undef)
636 call <vscale x 16 x i64> @llvm.vp.cttz.nvx16i64(<vscale x 16 x i64> undef, i1 false, <vscale x 16 x i1> undef, i32 undef)
640 declare i16 @llvm.bswap.i16(i16)
641 declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>)
642 declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
643 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
644 declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
645 declare <vscale x 1 x i16> @llvm.bswap.nvx1i16(<vscale x 1 x i16>)
646 declare <vscale x 2 x i16> @llvm.bswap.nvx2i16(<vscale x 2 x i16>)
647 declare <vscale x 4 x i16> @llvm.bswap.nvx4i16(<vscale x 4 x i16>)
648 declare <vscale x 8 x i16> @llvm.bswap.nvx8i16(<vscale x 8 x i16>)
649 declare <vscale x 16 x i16> @llvm.bswap.nvx16i16(<vscale x 16 x i16>)
650 declare i32 @llvm.bswap.i32(i32)
651 declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
652 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
653 declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
654 declare <16 x i32> @llvm.bswap.v16i32(<16 x i32>)
655 declare <vscale x 1 x i32> @llvm.bswap.nvx1i32(<vscale x 1 x i32>)
656 declare <vscale x 2 x i32> @llvm.bswap.nvx2i32(<vscale x 2 x i32>)
657 declare <vscale x 4 x i32> @llvm.bswap.nvx4i32(<vscale x 4 x i32>)
658 declare <vscale x 8 x i32> @llvm.bswap.nvx8i32(<vscale x 8 x i32>)
659 declare <vscale x 16 x i32> @llvm.bswap.nvx16i32(<vscale x 16 x i32>)
660 declare i64 @llvm.bswap.i64(i64)
661 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
662 declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
663 declare <8 x i64> @llvm.bswap.v8i64(<8 x i64>)
664 declare <16 x i64> @llvm.bswap.v16i64(<16 x i64>)
665 declare <vscale x 1 x i64> @llvm.bswap.nvx1i64(<vscale x 1 x i64>)
666 declare <vscale x 2 x i64> @llvm.bswap.nvx2i64(<vscale x 2 x i64>)
667 declare <vscale x 4 x i64> @llvm.bswap.nvx4i64(<vscale x 4 x i64>)
668 declare <vscale x 8 x i64> @llvm.bswap.nvx8i64(<vscale x 8 x i64>)
669 declare <vscale x 16 x i64> @llvm.bswap.nvx16i64(<vscale x 16 x i64>)
671 declare i8 @llvm.bitreverse.i8(i8)
672 declare <2 x i8> @llvm.bitreverse.v2i8(<2 x i8>)
673 declare <4 x i8> @llvm.bitreverse.v4i8(<4 x i8>)
674 declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>)
675 declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>)
676 declare <vscale x 1 x i8> @llvm.bitreverse.nvx1i8(<vscale x 1 x i8>)
677 declare <vscale x 2 x i8> @llvm.bitreverse.nvx2i8(<vscale x 2 x i8>)
678 declare <vscale x 4 x i8> @llvm.bitreverse.nvx4i8(<vscale x 4 x i8>)
679 declare <vscale x 8 x i8> @llvm.bitreverse.nvx8i8(<vscale x 8 x i8>)
680 declare <vscale x 16 x i8> @llvm.bitreverse.nvx16i8(<vscale x 16 x i8>)
681 declare i16 @llvm.bitreverse.i16(i16)
682 declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>)
683 declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>)
684 declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
685 declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
686 declare <vscale x 1 x i16> @llvm.bitreverse.nvx1i16(<vscale x 1 x i16>)
687 declare <vscale x 2 x i16> @llvm.bitreverse.nvx2i16(<vscale x 2 x i16>)
688 declare <vscale x 4 x i16> @llvm.bitreverse.nvx4i16(<vscale x 4 x i16>)
689 declare <vscale x 8 x i16> @llvm.bitreverse.nvx8i16(<vscale x 8 x i16>)
690 declare <vscale x 16 x i16> @llvm.bitreverse.nvx16i16(<vscale x 16 x i16>)
691 declare i32 @llvm.bitreverse.i32(i32)
692 declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>)
693 declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
694 declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
695 declare <16 x i32> @llvm.bitreverse.v16i32(<16 x i32>)
696 declare <vscale x 1 x i32> @llvm.bitreverse.nvx1i32(<vscale x 1 x i32>)
697 declare <vscale x 2 x i32> @llvm.bitreverse.nvx2i32(<vscale x 2 x i32>)
698 declare <vscale x 4 x i32> @llvm.bitreverse.nvx4i32(<vscale x 4 x i32>)
699 declare <vscale x 8 x i32> @llvm.bitreverse.nvx8i32(<vscale x 8 x i32>)
700 declare <vscale x 16 x i32> @llvm.bitreverse.nvx16i32(<vscale x 16 x i32>)
701 declare i64 @llvm.bitreverse.i64(i64)
702 declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
703 declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>)
704 declare <8 x i64> @llvm.bitreverse.v8i64(<8 x i64>)
705 declare <16 x i64> @llvm.bitreverse.v16i64(<16 x i64>)
706 declare <vscale x 1 x i64> @llvm.bitreverse.nvx1i64(<vscale x 1 x i64>)
707 declare <vscale x 2 x i64> @llvm.bitreverse.nvx2i64(<vscale x 2 x i64>)
708 declare <vscale x 4 x i64> @llvm.bitreverse.nvx4i64(<vscale x 4 x i64>)
709 declare <vscale x 8 x i64> @llvm.bitreverse.nvx8i64(<vscale x 8 x i64>)
710 declare <vscale x 16 x i64> @llvm.bitreverse.nvx16i64(<vscale x 16 x i64>)
712 declare i8 @llvm.ctpop.i8(i8)
713 declare <2 x i8> @llvm.ctpop.v2i8(<2 x i8>)
714 declare <4 x i8> @llvm.ctpop.v4i8(<4 x i8>)
715 declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>)
716 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
717 declare <vscale x 1 x i8> @llvm.ctpop.nvx1i8(<vscale x 1 x i8>)
718 declare <vscale x 2 x i8> @llvm.ctpop.nvx2i8(<vscale x 2 x i8>)
719 declare <vscale x 4 x i8> @llvm.ctpop.nvx4i8(<vscale x 4 x i8>)
720 declare <vscale x 8 x i8> @llvm.ctpop.nvx8i8(<vscale x 8 x i8>)
721 declare <vscale x 16 x i8> @llvm.ctpop.nvx16i8(<vscale x 16 x i8>)
722 declare i16 @llvm.ctpop.i16(i16)
723 declare <2 x i16> @llvm.ctpop.v2i16(<2 x i16>)
724 declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>)
725 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
726 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
727 declare <vscale x 1 x i16> @llvm.ctpop.nvx1i16(<vscale x 1 x i16>)
728 declare <vscale x 2 x i16> @llvm.ctpop.nvx2i16(<vscale x 2 x i16>)
729 declare <vscale x 4 x i16> @llvm.ctpop.nvx4i16(<vscale x 4 x i16>)
730 declare <vscale x 8 x i16> @llvm.ctpop.nvx8i16(<vscale x 8 x i16>)
731 declare <vscale x 16 x i16> @llvm.ctpop.nvx16i16(<vscale x 16 x i16>)
732 declare i32 @llvm.ctpop.i32(i32)
733 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
734 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
735 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
736 declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
737 declare <vscale x 1 x i32> @llvm.ctpop.nvx1i32(<vscale x 1 x i32>)
738 declare <vscale x 2 x i32> @llvm.ctpop.nvx2i32(<vscale x 2 x i32>)
739 declare <vscale x 4 x i32> @llvm.ctpop.nvx4i32(<vscale x 4 x i32>)
740 declare <vscale x 8 x i32> @llvm.ctpop.nvx8i32(<vscale x 8 x i32>)
741 declare <vscale x 16 x i32> @llvm.ctpop.nvx16i32(<vscale x 16 x i32>)
742 declare i64 @llvm.ctpop.i64(i64)
743 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
744 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
745 declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>)
746 declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>)
747 declare <vscale x 1 x i64> @llvm.ctpop.nvx1i64(<vscale x 1 x i64>)
748 declare <vscale x 2 x i64> @llvm.ctpop.nvx2i64(<vscale x 2 x i64>)
749 declare <vscale x 4 x i64> @llvm.ctpop.nvx4i64(<vscale x 4 x i64>)
750 declare <vscale x 8 x i64> @llvm.ctpop.nvx8i64(<vscale x 8 x i64>)
751 declare <vscale x 16 x i64> @llvm.ctpop.nvx16i64(<vscale x 16 x i64>)
753 declare <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16>, <2 x i1>, i32)
754 declare <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16>, <4 x i1>, i32)
755 declare <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16>, <8 x i1>, i32)
756 declare <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16>, <16 x i1>, i32)
757 declare <vscale x 1 x i16> @llvm.vp.bswap.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
758 declare <vscale x 2 x i16> @llvm.vp.bswap.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
759 declare <vscale x 4 x i16> @llvm.vp.bswap.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
760 declare <vscale x 8 x i16> @llvm.vp.bswap.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
761 declare <vscale x 16 x i16> @llvm.vp.bswap.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
762 declare <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32>, <2 x i1>, i32)
763 declare <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32>, <4 x i1>, i32)
764 declare <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32>, <8 x i1>, i32)
765 declare <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32>, <16 x i1>, i32)
766 declare <vscale x 1 x i32> @llvm.vp.bswap.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
767 declare <vscale x 2 x i32> @llvm.vp.bswap.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
768 declare <vscale x 4 x i32> @llvm.vp.bswap.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
769 declare <vscale x 8 x i32> @llvm.vp.bswap.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
770 declare <vscale x 16 x i32> @llvm.vp.bswap.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
771 declare <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64>, <2 x i1>, i32)
772 declare <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64>, <4 x i1>, i32)
773 declare <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64>, <8 x i1>, i32)
774 declare <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64>, <16 x i1>, i32)
775 declare <vscale x 1 x i64> @llvm.vp.bswap.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
776 declare <vscale x 2 x i64> @llvm.vp.bswap.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
777 declare <vscale x 4 x i64> @llvm.vp.bswap.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
778 declare <vscale x 8 x i64> @llvm.vp.bswap.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
779 declare <vscale x 16 x i64> @llvm.vp.bswap.nvx16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
781 declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32)
782 declare <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8>, <4 x i1>, i32)
783 declare <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8>, <8 x i1>, i32)
784 declare <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8>, <16 x i1>, i32)
785 declare <vscale x 1 x i8> @llvm.vp.ctpop.nvx1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32)
786 declare <vscale x 2 x i8> @llvm.vp.ctpop.nvx2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32)
787 declare <vscale x 4 x i8> @llvm.vp.ctpop.nvx4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32)
788 declare <vscale x 8 x i8> @llvm.vp.ctpop.nvx8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32)
789 declare <vscale x 16 x i8> @llvm.vp.ctpop.nvx16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32)
790 declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32)
791 declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32)
792 declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32)
793 declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32)
794 declare <vscale x 1 x i16> @llvm.vp.ctpop.nvx1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32)
795 declare <vscale x 2 x i16> @llvm.vp.ctpop.nvx2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32)
796 declare <vscale x 4 x i16> @llvm.vp.ctpop.nvx4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32)
797 declare <vscale x 8 x i16> @llvm.vp.ctpop.nvx8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
798 declare <vscale x 16 x i16> @llvm.vp.ctpop.nvx16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32)
799 declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32)
800 declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32)
801 declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32)
802 declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32)
803 declare <vscale x 1 x i32> @llvm.vp.ctpop.nvx1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32)
804 declare <vscale x 2 x i32> @llvm.vp.ctpop.nvx2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32)
805 declare <vscale x 4 x i32> @llvm.vp.ctpop.nvx4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
806 declare <vscale x 8 x i32> @llvm.vp.ctpop.nvx8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
807 declare <vscale x 16 x i32> @llvm.vp.ctpop.nvx16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32)
808 declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32)
809 declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32)
810 declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32)
811 declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32)
812 declare <vscale x 1 x i64> @llvm.vp.ctpop.nvx1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32)
813 declare <vscale x 2 x i64> @llvm.vp.ctpop.nvx2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)
814 declare <vscale x 4 x i64> @llvm.vp.ctpop.nvx4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32)
815 declare <vscale x 8 x i64> @llvm.vp.ctpop.nvx8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32)
816 declare <vscale x 16 x i64> @llvm.vp.ctpop.nvx16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32)
818 declare <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
819 declare <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
820 declare <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
821 declare <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
822 declare <vscale x 1 x i8> @llvm.vp.ctlz.nvx1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
823 declare <vscale x 2 x i8> @llvm.vp.ctlz.nvx2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
824 declare <vscale x 4 x i8> @llvm.vp.ctlz.nvx4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
825 declare <vscale x 8 x i8> @llvm.vp.ctlz.nvx8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
826 declare <vscale x 16 x i8> @llvm.vp.ctlz.nvx16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
827 declare <vscale x 32 x i8> @llvm.vp.ctlz.nvx32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
828 declare <vscale x 64 x i8> @llvm.vp.ctlz.nvx64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
829 declare <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
830 declare <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
831 declare <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
832 declare <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
833 declare <vscale x 1 x i16> @llvm.vp.ctlz.nvx1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
834 declare <vscale x 2 x i16> @llvm.vp.ctlz.nvx2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
835 declare <vscale x 4 x i16> @llvm.vp.ctlz.nvx4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
836 declare <vscale x 8 x i16> @llvm.vp.ctlz.nvx8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
837 declare <vscale x 16 x i16> @llvm.vp.ctlz.nvx16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
838 declare <vscale x 32 x i16> @llvm.vp.ctlz.nvx32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
839 declare <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
840 declare <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
841 declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
842 declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
843 declare <vscale x 1 x i32> @llvm.vp.ctlz.nvx1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
844 declare <vscale x 2 x i32> @llvm.vp.ctlz.nvx2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
845 declare <vscale x 4 x i32> @llvm.vp.ctlz.nvx4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
846 declare <vscale x 8 x i32> @llvm.vp.ctlz.nvx8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
847 declare <vscale x 16 x i32> @llvm.vp.ctlz.nvx16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
848 declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
849 declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
850 declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
851 declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
852 declare <vscale x 1 x i64> @llvm.vp.ctlz.nvx1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
853 declare <vscale x 2 x i64> @llvm.vp.ctlz.nvx2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
854 declare <vscale x 4 x i64> @llvm.vp.ctlz.nvx4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
855 declare <vscale x 8 x i64> @llvm.vp.ctlz.nvx8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
856 declare <vscale x 16 x i64> @llvm.vp.ctlz.nvx16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)
858 declare <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32)
859 declare <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32)
860 declare <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32)
861 declare <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32)
862 declare <vscale x 1 x i8> @llvm.vp.cttz.nvx1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32)
863 declare <vscale x 2 x i8> @llvm.vp.cttz.nvx2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32)
864 declare <vscale x 4 x i8> @llvm.vp.cttz.nvx4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32)
865 declare <vscale x 8 x i8> @llvm.vp.cttz.nvx8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32)
866 declare <vscale x 16 x i8> @llvm.vp.cttz.nvx16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32)
867 declare <vscale x 32 x i8> @llvm.vp.cttz.nvx32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32)
868 declare <vscale x 64 x i8> @llvm.vp.cttz.nvx64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32)
869 declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32)
870 declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32)
871 declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32)
872 declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32)
873 declare <vscale x 1 x i16> @llvm.vp.cttz.nvx1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32)
874 declare <vscale x 2 x i16> @llvm.vp.cttz.nvx2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32)
875 declare <vscale x 4 x i16> @llvm.vp.cttz.nvx4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32)
876 declare <vscale x 8 x i16> @llvm.vp.cttz.nvx8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32)
877 declare <vscale x 16 x i16> @llvm.vp.cttz.nvx16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32)
878 declare <vscale x 32 x i16> @llvm.vp.cttz.nvx32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32)
879 declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32)
880 declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32)
881 declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32)
882 declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32)
883 declare <vscale x 1 x i32> @llvm.vp.cttz.nvx1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32)
884 declare <vscale x 2 x i32> @llvm.vp.cttz.nvx2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32)
885 declare <vscale x 4 x i32> @llvm.vp.cttz.nvx4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32)
886 declare <vscale x 8 x i32> @llvm.vp.cttz.nvx8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32)
887 declare <vscale x 16 x i32> @llvm.vp.cttz.nvx16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32)
888 declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
889 declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
890 declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
891 declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
892 declare <vscale x 1 x i64> @llvm.vp.cttz.nvx1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32)
893 declare <vscale x 2 x i64> @llvm.vp.cttz.nvx2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32)
894 declare <vscale x 4 x i64> @llvm.vp.cttz.nvx4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32)
895 declare <vscale x 8 x i64> @llvm.vp.cttz.nvx8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32)
896 declare <vscale x 16 x i64> @llvm.vp.cttz.nvx16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32)