1 ; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
4 ; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5 ; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 ; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
7 ; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
18 target triple = "aarch64-unknown-linux-gnu"
20 ; Don't use SVE when its registers are no bigger than NEON.
27 ; Don't use SVE for 64-bit vectors.
28 define <8 x i8> @ctlz_v8i8(<8 x i8> %op) #0 {
29 ; CHECK-LABEL: ctlz_v8i8:
30 ; CHECK: clz v0.8b, v0.8b
32 %res = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %op)
36 ; Don't use SVE for 128-bit vectors.
37 define <16 x i8> @ctlz_v16i8(<16 x i8> %op) #0 {
38 ; CHECK-LABEL: ctlz_v16i8:
39 ; CHECK: clz v0.16b, v0.16b
41 %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %op)
45 define void @ctlz_v32i8(<32 x i8>* %a) #0 {
46 ; CHECK-LABEL: ctlz_v32i8:
47 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
48 ; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
49 ; CHECK-NEXT: clz [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b
50 ; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
52 %op = load <32 x i8>, <32 x i8>* %a
53 %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %op)
54 store <32 x i8> %res, <32 x i8>* %a
58 define void @ctlz_v64i8(<64 x i8>* %a) #0 {
59 ; CHECK-LABEL: ctlz_v64i8:
60 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
61 ; VBITS_GE_512-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
62 ; VBITS_GE_512-NEXT: clz [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b
63 ; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
64 ; VBITS_GE_512-NEXT: ret
66 ; Ensure sensible type legalisation.
67 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
68 ; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
69 ; VBITS_EQ_256-DAG: ld1b { [[OP_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
70 ; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
71 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP_LO]].b
72 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP_HI]].b
73 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
74 ; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
75 ; VBITS_EQ_256-NEXT: ret
76 %op = load <64 x i8>, <64 x i8>* %a
77 %res = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %op)
78 store <64 x i8> %res, <64 x i8>* %a
82 define void @ctlz_v128i8(<128 x i8>* %a) #0 {
83 ; CHECK-LABEL: ctlz_v128i8:
84 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
85 ; VBITS_GE_1024-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
86 ; VBITS_GE_1024-NEXT: clz [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b
87 ; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
88 ; VBITS_GE_1024-NEXT: ret
89 %op = load <128 x i8>, <128 x i8>* %a
90 %res = call <128 x i8> @llvm.ctlz.v128i8(<128 x i8> %op)
91 store <128 x i8> %res, <128 x i8>* %a
95 define void @ctlz_v256i8(<256 x i8>* %a) #0 {
96 ; CHECK-LABEL: ctlz_v256i8:
97 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
98 ; VBITS_GE_2048-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
99 ; VBITS_GE_2048-NEXT: clz [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b
100 ; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
101 ; VBITS_GE_2048-NEXT: ret
102 %op = load <256 x i8>, <256 x i8>* %a
103 %res = call <256 x i8> @llvm.ctlz.v256i8(<256 x i8> %op)
104 store <256 x i8> %res, <256 x i8>* %a
108 ; Don't use SVE for 64-bit vectors.
109 define <4 x i16> @ctlz_v4i16(<4 x i16> %op) #0 {
110 ; CHECK-LABEL: ctlz_v4i16:
111 ; CHECK: clz v0.4h, v0.4h
113 %res = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %op)
117 ; Don't use SVE for 128-bit vectors.
118 define <8 x i16> @ctlz_v8i16(<8 x i16> %op) #0 {
119 ; CHECK-LABEL: ctlz_v8i16:
120 ; CHECK: clz v0.8h, v0.8h
122 %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %op)
126 define void @ctlz_v16i16(<16 x i16>* %a) #0 {
127 ; CHECK-LABEL: ctlz_v16i16:
128 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
129 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
130 ; CHECK-NEXT: clz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
131 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
133 %op = load <16 x i16>, <16 x i16>* %a
134 %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %op)
135 store <16 x i16> %res, <16 x i16>* %a
139 define void @ctlz_v32i16(<32 x i16>* %a) #0 {
140 ; CHECK-LABEL: ctlz_v32i16:
141 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
142 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
143 ; VBITS_GE_512-NEXT: clz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
144 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
145 ; VBITS_GE_512-NEXT: ret
147 ; Ensure sensible type legalisation.
148 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
149 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
150 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
151 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
152 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
153 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
154 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
155 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
156 ; VBITS_EQ_256-NEXT: ret
157 %op = load <32 x i16>, <32 x i16>* %a
158 %res = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %op)
159 store <32 x i16> %res, <32 x i16>* %a
163 define void @ctlz_v64i16(<64 x i16>* %a) #0 {
164 ; CHECK-LABEL: ctlz_v64i16:
165 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
166 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
167 ; VBITS_GE_1024-NEXT: clz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
168 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
169 ; VBITS_GE_1024-NEXT: ret
170 %op = load <64 x i16>, <64 x i16>* %a
171 %res = call <64 x i16> @llvm.ctlz.v64i16(<64 x i16> %op)
172 store <64 x i16> %res, <64 x i16>* %a
176 define void @ctlz_v128i16(<128 x i16>* %a) #0 {
177 ; CHECK-LABEL: ctlz_v128i16:
178 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
179 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
180 ; VBITS_GE_2048-NEXT: clz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
181 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
182 ; VBITS_GE_2048-NEXT: ret
183 %op = load <128 x i16>, <128 x i16>* %a
184 %res = call <128 x i16> @llvm.ctlz.v128i16(<128 x i16> %op)
185 store <128 x i16> %res, <128 x i16>* %a
189 ; Don't use SVE for 64-bit vectors.
190 define <2 x i32> @ctlz_v2i32(<2 x i32> %op) #0 {
191 ; CHECK-LABEL: ctlz_v2i32:
192 ; CHECK: clz v0.2s, v0.2s
194 %res = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %op)
198 ; Don't use SVE for 128-bit vectors.
199 define <4 x i32> @ctlz_v4i32(<4 x i32> %op) #0 {
200 ; CHECK-LABEL: ctlz_v4i32:
201 ; CHECK: clz v0.4s, v0.4s
203 %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %op)
207 define void @ctlz_v8i32(<8 x i32>* %a) #0 {
208 ; CHECK-LABEL: ctlz_v8i32:
209 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
210 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
211 ; CHECK-NEXT: clz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
212 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
214 %op = load <8 x i32>, <8 x i32>* %a
215 %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %op)
216 store <8 x i32> %res, <8 x i32>* %a
220 define void @ctlz_v16i32(<16 x i32>* %a) #0 {
221 ; CHECK-LABEL: ctlz_v16i32:
222 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
223 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
224 ; VBITS_GE_512-NEXT: clz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
225 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
226 ; VBITS_GE_512-NEXT: ret
228 ; Ensure sensible type legalisation.
229 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
230 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
231 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
232 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
233 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
234 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
235 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
236 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
237 ; VBITS_EQ_256-NEXT: ret
238 %op = load <16 x i32>, <16 x i32>* %a
239 %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %op)
240 store <16 x i32> %res, <16 x i32>* %a
244 define void @ctlz_v32i32(<32 x i32>* %a) #0 {
245 ; CHECK-LABEL: ctlz_v32i32:
246 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
247 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
248 ; VBITS_GE_1024-NEXT: clz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
249 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
250 ; VBITS_GE_1024-NEXT: ret
251 %op = load <32 x i32>, <32 x i32>* %a
252 %res = call <32 x i32> @llvm.ctlz.v32i32(<32 x i32> %op)
253 store <32 x i32> %res, <32 x i32>* %a
257 define void @ctlz_v64i32(<64 x i32>* %a) #0 {
258 ; CHECK-LABEL: ctlz_v64i32:
259 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
260 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
261 ; VBITS_GE_2048-NEXT: clz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
262 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
263 ; VBITS_GE_2048-NEXT: ret
264 %op = load <64 x i32>, <64 x i32>* %a
265 %res = call <64 x i32> @llvm.ctlz.v64i32(<64 x i32> %op)
266 store <64 x i32> %res, <64 x i32>* %a
270 define <1 x i64> @ctlz_v1i64(<1 x i64> %op) #0 {
271 ; CHECK-LABEL: ctlz_v1i64:
272 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl1
273 ; CHECK-NEXT: clz z0.d, [[PG]]/m, z0.d
275 %res = call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %op)
279 define <2 x i64> @ctlz_v2i64(<2 x i64> %op) #0 {
280 ; CHECK-LABEL: ctlz_v2i64:
281 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl2
282 ; CHECK-NEXT: clz z0.d, [[PG]]/m, z0.d
284 %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %op)
288 define void @ctlz_v4i64(<4 x i64>* %a) #0 {
289 ; CHECK-LABEL: ctlz_v4i64:
290 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
291 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
292 ; CHECK-NEXT: clz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
293 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
295 %op = load <4 x i64>, <4 x i64>* %a
296 %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %op)
297 store <4 x i64> %res, <4 x i64>* %a
301 define void @ctlz_v8i64(<8 x i64>* %a) #0 {
302 ; CHECK-LABEL: ctlz_v8i64:
303 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
304 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
305 ; VBITS_GE_512-NEXT: clz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
306 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
307 ; VBITS_GE_512-NEXT: ret
309 ; Ensure sensible type legalisation.
310 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
311 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
312 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
313 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
314 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
315 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
316 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
317 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
318 ; VBITS_EQ_256-NEXT: ret
319 %op = load <8 x i64>, <8 x i64>* %a
320 %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %op)
321 store <8 x i64> %res, <8 x i64>* %a
325 define void @ctlz_v16i64(<16 x i64>* %a) #0 {
326 ; CHECK-LABEL: ctlz_v16i64:
327 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
328 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
329 ; VBITS_GE_1024-NEXT: clz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
330 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
331 ; VBITS_GE_1024-NEXT: ret
332 %op = load <16 x i64>, <16 x i64>* %a
333 %res = call <16 x i64> @llvm.ctlz.v16i64(<16 x i64> %op)
334 store <16 x i64> %res, <16 x i64>* %a
338 define void @ctlz_v32i64(<32 x i64>* %a) #0 {
339 ; CHECK-LABEL: ctlz_v32i64:
340 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
341 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
342 ; VBITS_GE_2048-NEXT: clz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
343 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
344 ; VBITS_GE_2048-NEXT: ret
345 %op = load <32 x i64>, <32 x i64>* %a
346 %res = call <32 x i64> @llvm.ctlz.v32i64(<32 x i64> %op)
347 store <32 x i64> %res, <32 x i64>* %a
355 ; Don't use SVE for 64-bit vectors.
356 define <8 x i8> @ctpop_v8i8(<8 x i8> %op) #0 {
357 ; CHECK-LABEL: ctpop_v8i8:
358 ; CHECK: cnt v0.8b, v0.8b
360 %res = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %op)
364 ; Don't use SVE for 128-bit vectors.
365 define <16 x i8> @ctpop_v16i8(<16 x i8> %op) #0 {
366 ; CHECK-LABEL: ctpop_v16i8:
367 ; CHECK: cnt v0.16b, v0.16b
369 %res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %op)
373 define void @ctpop_v32i8(<32 x i8>* %a) #0 {
374 ; CHECK-LABEL: ctpop_v32i8:
375 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
376 ; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
377 ; CHECK-NEXT: cnt [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b
378 ; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
380 %op = load <32 x i8>, <32 x i8>* %a
381 %res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %op)
382 store <32 x i8> %res, <32 x i8>* %a
386 define void @ctpop_v64i8(<64 x i8>* %a) #0 {
387 ; CHECK-LABEL: ctpop_v64i8:
388 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
389 ; VBITS_GE_512-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
390 ; VBITS_GE_512-NEXT: cnt [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b
391 ; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
392 ; VBITS_GE_512-NEXT: ret
394 ; Ensure sensible type legalisation.
395 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
396 ; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
397 ; VBITS_EQ_256-DAG: ld1b { [[OP_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
398 ; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
399 ; VBITS_EQ_256-DAG: cnt [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP_LO]].b
400 ; VBITS_EQ_256-DAG: cnt [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP_HI]].b
401 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
402 ; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
403 ; VBITS_EQ_256-NEXT: ret
404 %op = load <64 x i8>, <64 x i8>* %a
405 %res = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %op)
406 store <64 x i8> %res, <64 x i8>* %a
410 define void @ctpop_v128i8(<128 x i8>* %a) #0 {
411 ; CHECK-LABEL: ctpop_v128i8:
412 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
413 ; VBITS_GE_1024-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
414 ; VBITS_GE_1024-NEXT: cnt [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b
415 ; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
416 ; VBITS_GE_1024-NEXT: ret
417 %op = load <128 x i8>, <128 x i8>* %a
418 %res = call <128 x i8> @llvm.ctpop.v128i8(<128 x i8> %op)
419 store <128 x i8> %res, <128 x i8>* %a
423 define void @ctpop_v256i8(<256 x i8>* %a) #0 {
424 ; CHECK-LABEL: ctpop_v256i8:
425 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
426 ; VBITS_GE_2048-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
427 ; VBITS_GE_2048-NEXT: cnt [[RES:z[0-9]+]].b, [[PG]]/m, [[OP]].b
428 ; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
429 ; VBITS_GE_2048-NEXT: ret
430 %op = load <256 x i8>, <256 x i8>* %a
431 %res = call <256 x i8> @llvm.ctpop.v256i8(<256 x i8> %op)
432 store <256 x i8> %res, <256 x i8>* %a
436 ; Don't use SVE for 64-bit vectors.
437 define <4 x i16> @ctpop_v4i16(<4 x i16> %op) #0 {
438 ; CHECK-LABEL: ctpop_v4i16:
439 ; CHECK: cnt v0.8b, v0.8b
440 ; CHECK-NEXT: uaddlp v0.4h, v0.8b
442 %res = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %op)
446 ; Don't use SVE for 128-bit vectors.
447 define <8 x i16> @ctpop_v8i16(<8 x i16> %op) #0 {
448 ; CHECK-LABEL: ctpop_v8i16:
449 ; CHECK: cnt v0.16b, v0.16b
450 ; CHECK-NEXT: uaddlp v0.8h, v0.16b
452 %res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %op)
456 define void @ctpop_v16i16(<16 x i16>* %a) #0 {
457 ; CHECK-LABEL: ctpop_v16i16:
458 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
459 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
460 ; CHECK-NEXT: cnt [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
461 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
463 %op = load <16 x i16>, <16 x i16>* %a
464 %res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %op)
465 store <16 x i16> %res, <16 x i16>* %a
469 define void @ctpop_v32i16(<32 x i16>* %a) #0 {
470 ; CHECK-LABEL: ctpop_v32i16:
471 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
472 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
473 ; VBITS_GE_512-NEXT: cnt [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
474 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
475 ; VBITS_GE_512-NEXT: ret
477 ; Ensure sensible type legalisation.
478 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
479 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
480 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
481 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
482 ; VBITS_EQ_256-DAG: cnt [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
483 ; VBITS_EQ_256-DAG: cnt [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
484 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
485 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
486 ; VBITS_EQ_256-NEXT: ret
487 %op = load <32 x i16>, <32 x i16>* %a
488 %res = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %op)
489 store <32 x i16> %res, <32 x i16>* %a
493 define void @ctpop_v64i16(<64 x i16>* %a) #0 {
494 ; CHECK-LABEL: ctpop_v64i16:
495 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
496 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
497 ; VBITS_GE_1024-NEXT: cnt [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
498 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
499 ; VBITS_GE_1024-NEXT: ret
500 %op = load <64 x i16>, <64 x i16>* %a
501 %res = call <64 x i16> @llvm.ctpop.v64i16(<64 x i16> %op)
502 store <64 x i16> %res, <64 x i16>* %a
506 define void @ctpop_v128i16(<128 x i16>* %a) #0 {
507 ; CHECK-LABEL: ctpop_v128i16:
508 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
509 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
510 ; VBITS_GE_2048-NEXT: cnt [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
511 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
512 ; VBITS_GE_2048-NEXT: ret
513 %op = load <128 x i16>, <128 x i16>* %a
514 %res = call <128 x i16> @llvm.ctpop.v128i16(<128 x i16> %op)
515 store <128 x i16> %res, <128 x i16>* %a
519 ; Don't use SVE for 64-bit vectors.
520 define <2 x i32> @ctpop_v2i32(<2 x i32> %op) #0 {
521 ; CHECK-LABEL: ctpop_v2i32:
522 ; CHECK: cnt v0.8b, v0.8b
523 ; CHECK-NEXT: uaddlp v0.4h, v0.8b
524 ; CHECK-NEXT: uaddlp v0.2s, v0.4h
526 %res = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %op)
530 ; Don't use SVE for 128-bit vectors.
531 define <4 x i32> @ctpop_v4i32(<4 x i32> %op) #0 {
532 ; CHECK-LABEL: ctpop_v4i32:
533 ; CHECK: cnt v0.16b, v0.16b
534 ; CHECK-NEXT: uaddlp v0.8h, v0.16b
535 ; CHECK-NEXT: uaddlp v0.4s, v0.8h
537 %res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %op)
541 define void @ctpop_v8i32(<8 x i32>* %a) #0 {
542 ; CHECK-LABEL: ctpop_v8i32:
543 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
544 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
545 ; CHECK-NEXT: cnt [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
546 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
548 %op = load <8 x i32>, <8 x i32>* %a
549 %res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %op)
550 store <8 x i32> %res, <8 x i32>* %a
554 define void @ctpop_v16i32(<16 x i32>* %a) #0 {
555 ; CHECK-LABEL: ctpop_v16i32:
556 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
557 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
558 ; VBITS_GE_512-NEXT: cnt [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
559 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
560 ; VBITS_GE_512-NEXT: ret
562 ; Ensure sensible type legalisation.
563 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
564 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
565 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
566 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
567 ; VBITS_EQ_256-DAG: cnt [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
568 ; VBITS_EQ_256-DAG: cnt [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
569 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
570 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
571 ; VBITS_EQ_256-NEXT: ret
572 %op = load <16 x i32>, <16 x i32>* %a
573 %res = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %op)
574 store <16 x i32> %res, <16 x i32>* %a
578 define void @ctpop_v32i32(<32 x i32>* %a) #0 {
579 ; CHECK-LABEL: ctpop_v32i32:
580 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
581 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
582 ; VBITS_GE_1024-NEXT: cnt [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
583 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
584 ; VBITS_GE_1024-NEXT: ret
585 %op = load <32 x i32>, <32 x i32>* %a
586 %res = call <32 x i32> @llvm.ctpop.v32i32(<32 x i32> %op)
587 store <32 x i32> %res, <32 x i32>* %a
591 define void @ctpop_v64i32(<64 x i32>* %a) #0 {
592 ; CHECK-LABEL: ctpop_v64i32:
593 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
594 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
595 ; VBITS_GE_2048-NEXT: cnt [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
596 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
597 ; VBITS_GE_2048-NEXT: ret
598 %op = load <64 x i32>, <64 x i32>* %a
599 %res = call <64 x i32> @llvm.ctpop.v64i32(<64 x i32> %op)
600 store <64 x i32> %res, <64 x i32>* %a
604 ; Don't use SVE for 64-bit vectors.
605 define <1 x i64> @ctpop_v1i64(<1 x i64> %op) #0 {
606 ; CHECK-LABEL: ctpop_v1i64:
607 ; CHECK: cnt v0.8b, v0.8b
608 ; CHECK-NEXT: uaddlp v0.4h, v0.8b
609 ; CHECK-NEXT: uaddlp v0.2s, v0.4h
610 ; CHECK-NEXT: uaddlp v0.1d, v0.2s
612 %res = call <1 x i64> @llvm.ctpop.v1i64(<1 x i64> %op)
616 ; Don't use SVE for 128-bit vectors.
617 define <2 x i64> @ctpop_v2i64(<2 x i64> %op) #0 {
618 ; CHECK-LABEL: ctpop_v2i64:
619 ; CHECK: cnt v0.16b, v0.16b
620 ; CHECK-NEXT: uaddlp v0.8h, v0.16b
621 ; CHECK-NEXT: uaddlp v0.4s, v0.8h
622 ; CHECK-NEXT: uaddlp v0.2d, v0.4s
624 %res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %op)
628 define void @ctpop_v4i64(<4 x i64>* %a) #0 {
629 ; CHECK-LABEL: ctpop_v4i64:
630 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
631 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
632 ; CHECK-NEXT: cnt [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
633 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
635 %op = load <4 x i64>, <4 x i64>* %a
636 %res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %op)
637 store <4 x i64> %res, <4 x i64>* %a
641 define void @ctpop_v8i64(<8 x i64>* %a) #0 {
642 ; CHECK-LABEL: ctpop_v8i64:
643 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
644 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
645 ; VBITS_GE_512-NEXT: cnt [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
646 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
647 ; VBITS_GE_512-NEXT: ret
649 ; Ensure sensible type legalisation.
650 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
651 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
652 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
653 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
654 ; VBITS_EQ_256-DAG: cnt [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
655 ; VBITS_EQ_256-DAG: cnt [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
656 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
657 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
658 ; VBITS_EQ_256-NEXT: ret
659 %op = load <8 x i64>, <8 x i64>* %a
660 %res = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %op)
661 store <8 x i64> %res, <8 x i64>* %a
665 define void @ctpop_v16i64(<16 x i64>* %a) #0 {
666 ; CHECK-LABEL: ctpop_v16i64:
667 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
668 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
669 ; VBITS_GE_1024-NEXT: cnt [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
670 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
671 ; VBITS_GE_1024-NEXT: ret
672 %op = load <16 x i64>, <16 x i64>* %a
673 %res = call <16 x i64> @llvm.ctpop.v16i64(<16 x i64> %op)
674 store <16 x i64> %res, <16 x i64>* %a
678 define void @ctpop_v32i64(<32 x i64>* %a) #0 {
679 ; CHECK-LABEL: ctpop_v32i64:
680 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
681 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
682 ; VBITS_GE_2048-NEXT: cnt [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
683 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
684 ; VBITS_GE_2048-NEXT: ret
685 %op = load <32 x i64>, <32 x i64>* %a
686 %res = call <32 x i64> @llvm.ctpop.v32i64(<32 x i64> %op)
687 store <32 x i64> %res, <32 x i64>* %a
692 ; Count trailing zeros
695 define <8 x i8> @cttz_v8i8(<8 x i8> %op) #0 {
696 ; CHECK-LABEL: cttz_v8i8:
697 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl8
698 ; CHECK-NEXT: rbit z[[RBIT:[0-9]+]].b, p0/m, z0.b
699 ; CHECK-NEXT: clz v0.8b, v[[RBIT]].8b
701 %res = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %op)
705 define <16 x i8> @cttz_v16i8(<16 x i8> %op) #0 {
706 ; CHECK-LABEL: cttz_v16i8:
707 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl16
708 ; CHECK-NEXT: rbit z[[RBIT:[0-9]+]].b, p0/m, z0.b
709 ; CHECK-NEXT: clz v0.16b, v[[RBIT]].16b
711 %res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %op)
715 define void @cttz_v32i8(<32 x i8>* %a) #0 {
716 ; CHECK-LABEL: cttz_v32i8:
717 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
718 ; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
719 ; CHECK-NEXT: rbit [[RBIT:z[0-9]+]].b, [[PG]]/m, [[OP]].b
720 ; CHECK-NEXT: clz [[RES:z[0-9]+]].b, [[PG]]/m, [[RBIT]].b
721 ; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
723 %op = load <32 x i8>, <32 x i8>* %a
724 %res = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %op)
725 store <32 x i8> %res, <32 x i8>* %a
729 define void @cttz_v64i8(<64 x i8>* %a) #0 {
730 ; CHECK-LABEL: cttz_v64i8:
731 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
732 ; VBITS_GE_512-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
733 ; VBITS_GE_512-NEXT: rbit [[RBIT:z[0-9]+]].b, [[PG]]/m, [[OP]].b
734 ; VBITS_GE_512-NEXT: clz [[RES:z[0-9]+]].b, [[PG]]/m, [[RBIT]].b
735 ; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
736 ; VBITS_GE_512-NEXT: ret
738 ; Ensure sensible type legalisation.
739 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
740 ; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
741 ; VBITS_EQ_256-DAG: ld1b { [[OP_LO:z[0-9]+]].b }, [[PG]]/z, [x0]
742 ; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
743 ; VBITS_EQ_256-DAG: rbit [[RBIT_LO:z[0-9]+]].b, [[PG]]/m, [[OP_LO]].b
744 ; VBITS_EQ_256-DAG: rbit [[RBIT_HI:z[0-9]+]].b, [[PG]]/m, [[OP_HI]].b
745 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[RBIT_LO]].b
746 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[RBIT_HI]].b
747 ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0]
748 ; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]]
749 ; VBITS_EQ_256-NEXT: ret
750 %op = load <64 x i8>, <64 x i8>* %a
751 %res = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %op)
752 store <64 x i8> %res, <64 x i8>* %a
756 define void @cttz_v128i8(<128 x i8>* %a) #0 {
757 ; CHECK-LABEL: cttz_v128i8:
758 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
759 ; VBITS_GE_1024-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
760 ; VBITS_GE_1024-NEXT: rbit [[RBIT:z[0-9]+]].b, [[PG]]/m, [[OP]].b
761 ; VBITS_GE_1024-NEXT: clz [[RES:z[0-9]+]].b, [[PG]]/m, [[RBIT]].b
762 ; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
763 ; VBITS_GE_1024-NEXT: ret
764 %op = load <128 x i8>, <128 x i8>* %a
765 %res = call <128 x i8> @llvm.cttz.v128i8(<128 x i8> %op)
766 store <128 x i8> %res, <128 x i8>* %a
770 define void @cttz_v256i8(<256 x i8>* %a) #0 {
771 ; CHECK-LABEL: cttz_v256i8:
772 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
773 ; VBITS_GE_2048-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
774 ; VBITS_GE_2048-NEXT: rbit [[RBIT:z[0-9]+]].b, [[PG]]/m, [[OP]].b
775 ; VBITS_GE_2048-NEXT: clz [[RES:z[0-9]+]].b, [[PG]]/m, [[RBIT]].b
776 ; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
777 ; VBITS_GE_2048-NEXT: ret
778 %op = load <256 x i8>, <256 x i8>* %a
779 %res = call <256 x i8> @llvm.cttz.v256i8(<256 x i8> %op)
780 store <256 x i8> %res, <256 x i8>* %a
784 define <4 x i16> @cttz_v4i16(<4 x i16> %op) #0 {
785 ; CHECK-LABEL: cttz_v4i16:
786 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl4
787 ; CHECK-NEXT: rbit z[[RBIT:[0-9]+]].h, p0/m, z0.h
788 ; CHECK-NEXT: clz v0.4h, v[[RBIT]].4h
790 %res = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %op)
794 define <8 x i16> @cttz_v8i16(<8 x i16> %op) #0 {
795 ; CHECK-LABEL: cttz_v8i16:
796 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl8
797 ; CHECK-NEXT: rbit z[[RBIT:[0-9]+]].h, p0/m, z0.h
798 ; CHECK-NEXT: clz v0.8h, v[[RBIT]].8h
800 %res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %op)
804 define void @cttz_v16i16(<16 x i16>* %a) #0 {
805 ; CHECK-LABEL: cttz_v16i16:
806 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
807 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
808 ; CHECK-NEXT: rbit [[RBIT:z[0-9]+]].h, [[PG]]/m, [[OP]].h
809 ; CHECK-NEXT: clz [[RES:z[0-9]+]].h, [[PG]]/m, [[RBIT]].h
810 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
812 %op = load <16 x i16>, <16 x i16>* %a
813 %res = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %op)
814 store <16 x i16> %res, <16 x i16>* %a
818 define void @cttz_v32i16(<32 x i16>* %a) #0 {
819 ; CHECK-LABEL: cttz_v32i16:
820 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
821 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
822 ; VBITS_GE_512-NEXT: rbit [[RBIT:z[0-9]+]].h, [[PG]]/m, [[OP]].h
823 ; VBITS_GE_512-NEXT: clz [[RES:z[0-9]+]].h, [[PG]]/m, [[RBIT]].h
824 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
825 ; VBITS_GE_512-NEXT: ret
827 ; Ensure sensible type legalisation.
828 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
829 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
830 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
831 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
832 ; VBITS_EQ_256-DAG: rbit [[RBIT_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
833 ; VBITS_EQ_256-DAG: rbit [[RBIT_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
834 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[RBIT_LO]].h
835 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[RBIT_HI]].h
836 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
837 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
838 ; VBITS_EQ_256-NEXT: ret
839 %op = load <32 x i16>, <32 x i16>* %a
840 %res = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %op)
841 store <32 x i16> %res, <32 x i16>* %a
845 define void @cttz_v64i16(<64 x i16>* %a) #0 {
846 ; CHECK-LABEL: cttz_v64i16:
847 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
848 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
849 ; VBITS_GE_1024-NEXT: rbit [[RBIT:z[0-9]+]].h, [[PG]]/m, [[OP]].h
850 ; VBITS_GE_1024-NEXT: clz [[RES:z[0-9]+]].h, [[PG]]/m, [[RBIT]].h
851 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
852 ; VBITS_GE_1024-NEXT: ret
853 %op = load <64 x i16>, <64 x i16>* %a
854 %res = call <64 x i16> @llvm.cttz.v64i16(<64 x i16> %op)
855 store <64 x i16> %res, <64 x i16>* %a
859 define void @cttz_v128i16(<128 x i16>* %a) #0 {
860 ; CHECK-LABEL: cttz_v128i16:
861 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
862 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
863 ; VBITS_GE_2048-NEXT: rbit [[RBIT:z[0-9]+]].h, [[PG]]/m, [[OP]].h
864 ; VBITS_GE_2048-NEXT: clz [[RES:z[0-9]+]].h, [[PG]]/m, [[RBIT]].h
865 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
866 ; VBITS_GE_2048-NEXT: ret
867 %op = load <128 x i16>, <128 x i16>* %a
868 %res = call <128 x i16> @llvm.cttz.v128i16(<128 x i16> %op)
869 store <128 x i16> %res, <128 x i16>* %a
873 ; Don't use SVE for 64-bit vectors.
874 define <2 x i32> @cttz_v2i32(<2 x i32> %op) #0 {
875 ; CHECK-LABEL: cttz_v2i32:
876 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl2
877 ; CHECK-NEXT: rbit z[[RBIT:[0-9]+]].s, p0/m, z0.s
878 ; CHECK-NEXT: clz v0.2s, v[[RBIT]].2s
880 %res = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %op)
884 ; Don't use SVE for 128-bit vectors.
885 define <4 x i32> @cttz_v4i32(<4 x i32> %op) #0 {
886 ; CHECK-LABEL: cttz_v4i32:
887 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl4
888 ; CHECK-NEXT: rbit z[[RBIT:[0-9]+]].s, p0/m, z0.s
889 ; CHECK-NEXT: clz v0.4s, v[[RBIT]].4s
891 %res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %op)
895 define void @cttz_v8i32(<8 x i32>* %a) #0 {
896 ; CHECK-LABEL: cttz_v8i32:
897 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
898 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
899 ; CHECK-NEXT: rbit [[RBIT:z[0-9]+]].s, [[PG]]/m, [[OP]].s
900 ; CHECK-NEXT: clz [[RES:z[0-9]+]].s, [[PG]]/m, [[RBIT]].s
901 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
903 %op = load <8 x i32>, <8 x i32>* %a
904 %res = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %op)
905 store <8 x i32> %res, <8 x i32>* %a
909 define void @cttz_v16i32(<16 x i32>* %a) #0 {
910 ; CHECK-LABEL: cttz_v16i32:
911 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
912 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
913 ; VBITS_GE_512-NEXT: rbit [[RBIT:z[0-9]+]].s, [[PG]]/m, [[OP]].s
914 ; VBITS_GE_512-NEXT: clz [[RES:z[0-9]+]].s, [[PG]]/m, [[RBIT]].s
915 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
916 ; VBITS_GE_512-NEXT: ret
918 ; Ensure sensible type legalisation.
919 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
920 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
921 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
922 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
923 ; VBITS_EQ_256-DAG: rbit [[RBIT_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
924 ; VBITS_EQ_256-DAG: rbit [[RBIT_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
925 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[RBIT_LO]].s
926 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[RBIT_HI]].s
927 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
928 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
929 ; VBITS_EQ_256-NEXT: ret
930 %op = load <16 x i32>, <16 x i32>* %a
931 %res = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %op)
932 store <16 x i32> %res, <16 x i32>* %a
936 define void @cttz_v32i32(<32 x i32>* %a) #0 {
937 ; CHECK-LABEL: cttz_v32i32:
938 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
939 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
940 ; VBITS_GE_1024-NEXT: rbit [[RBIT:z[0-9]+]].s, [[PG]]/m, [[OP]].s
941 ; VBITS_GE_1024-NEXT: clz [[RES:z[0-9]+]].s, [[PG]]/m, [[RBIT]].s
942 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
943 ; VBITS_GE_1024-NEXT: ret
944 %op = load <32 x i32>, <32 x i32>* %a
945 %res = call <32 x i32> @llvm.cttz.v32i32(<32 x i32> %op)
946 store <32 x i32> %res, <32 x i32>* %a
950 define void @cttz_v64i32(<64 x i32>* %a) #0 {
951 ; CHECK-LABEL: cttz_v64i32:
952 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
953 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
954 ; VBITS_GE_2048-NEXT: rbit [[RBIT:z[0-9]+]].s, [[PG]]/m, [[OP]].s
955 ; VBITS_GE_2048-NEXT: clz [[RES:z[0-9]+]].s, [[PG]]/m, [[RBIT]].s
956 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
957 ; VBITS_GE_2048-NEXT: ret
958 %op = load <64 x i32>, <64 x i32>* %a
959 %res = call <64 x i32> @llvm.cttz.v64i32(<64 x i32> %op)
960 store <64 x i32> %res, <64 x i32>* %a
964 define <1 x i64> @cttz_v1i64(<1 x i64> %op) #0 {
965 ; CHECK-LABEL: cttz_v1i64:
966 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl1
967 ; CHECK-NEXT: rbit [[RBIT:z[0-9]+]].d, [[PG]]/m, z0.d
968 ; CHECK-NEXT: clz z0.d, [[PG]]/m, [[RBIT]].d
970 %res = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %op)
974 define <2 x i64> @cttz_v2i64(<2 x i64> %op) #0 {
975 ; CHECK-LABEL: cttz_v2i64:
976 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl2
977 ; CHECK-NEXT: rbit [[RBIT:z[0-9]+]].d, [[PG]]/m, z0.d
978 ; CHECK-NEXT: clz z0.d, [[PG]]/m, [[RBIT]].d
980 %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %op)
984 define void @cttz_v4i64(<4 x i64>* %a) #0 {
985 ; CHECK-LABEL: cttz_v4i64:
986 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
987 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
988 ; CHECK-NEXT: rbit [[RBIT:z[0-9]+]].d, [[PG]]/m, [[OP]].d
989 ; CHECK-NEXT: clz [[RES:z[0-9]+]].d, [[PG]]/m, [[RBIT]].d
990 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
992 %op = load <4 x i64>, <4 x i64>* %a
993 %res = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %op)
994 store <4 x i64> %res, <4 x i64>* %a
998 define void @cttz_v8i64(<8 x i64>* %a) #0 {
999 ; CHECK-LABEL: cttz_v8i64:
1000 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1001 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1002 ; VBITS_GE_512-NEXT: rbit [[RBIT:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1003 ; VBITS_GE_512-NEXT: clz [[RES:z[0-9]+]].d, [[PG]]/m, [[RBIT]].d
1004 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1005 ; VBITS_GE_512-NEXT: ret
1007 ; Ensure sensible type legalisation.
1008 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1009 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1010 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1011 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1012 ; VBITS_EQ_256-DAG: rbit [[RBIT_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
1013 ; VBITS_EQ_256-DAG: rbit [[RBIT_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
1014 ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[RBIT_LO]].d
1015 ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[RBIT_HI]].d
1016 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
1017 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
1018 ; VBITS_EQ_256-NEXT: ret
1019 %op = load <8 x i64>, <8 x i64>* %a
1020 %res = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %op)
1021 store <8 x i64> %res, <8 x i64>* %a
1025 define void @cttz_v16i64(<16 x i64>* %a) #0 {
1026 ; CHECK-LABEL: cttz_v16i64:
1027 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1028 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1029 ; VBITS_GE_1024-NEXT: rbit [[RBIT:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1030 ; VBITS_GE_1024-NEXT: clz [[RES:z[0-9]+]].d, [[PG]]/m, [[RBIT]].d
1031 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1032 ; VBITS_GE_1024-NEXT: ret
1033 %op = load <16 x i64>, <16 x i64>* %a
1034 %res = call <16 x i64> @llvm.cttz.v16i64(<16 x i64> %op)
1035 store <16 x i64> %res, <16 x i64>* %a
1039 define void @cttz_v32i64(<32 x i64>* %a) #0 {
1040 ; CHECK-LABEL: cttz_v32i64:
1041 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1042 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1043 ; VBITS_GE_2048-NEXT: rbit [[RBIT:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1044 ; VBITS_GE_2048-NEXT: clz [[RES:z[0-9]+]].d, [[PG]]/m, [[RBIT]].d
1045 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1046 ; VBITS_GE_2048-NEXT: ret
1047 %op = load <32 x i64>, <32 x i64>* %a
1048 %res = call <32 x i64> @llvm.cttz.v32i64(<32 x i64> %op)
1049 store <32 x i64> %res, <32 x i64>* %a
1053 attributes #0 = { "target-features"="+sve" }
1055 declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>)
1056 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>)
1057 declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>)
1058 declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>)
1059 declare <128 x i8> @llvm.ctlz.v128i8(<128 x i8>)
1060 declare <256 x i8> @llvm.ctlz.v256i8(<256 x i8>)
1061 declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>)
1062 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>)
1063 declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>)
1064 declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>)
1065 declare <64 x i16> @llvm.ctlz.v64i16(<64 x i16>)
1066 declare <128 x i16> @llvm.ctlz.v128i16(<128 x i16>)
1067 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>)
1068 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>)
1069 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>)
1070 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>)
1071 declare <32 x i32> @llvm.ctlz.v32i32(<32 x i32>)
1072 declare <64 x i32> @llvm.ctlz.v64i32(<64 x i32>)
1073 declare <1 x i64> @llvm.ctlz.v1i64(<1 x i64>)
1074 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
1075 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>)
1076 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>)
1077 declare <16 x i64> @llvm.ctlz.v16i64(<16 x i64>)
1078 declare <32 x i64> @llvm.ctlz.v32i64(<32 x i64>)
1080 declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>)
1081 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
1082 declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
1083 declare <64 x i8> @llvm.ctpop.v64i8(<64 x i8>)
1084 declare <128 x i8> @llvm.ctpop.v128i8(<128 x i8>)
1085 declare <256 x i8> @llvm.ctpop.v256i8(<256 x i8>)
1086 declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>)
1087 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
1088 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
1089 declare <32 x i16> @llvm.ctpop.v32i16(<32 x i16>)
1090 declare <64 x i16> @llvm.ctpop.v64i16(<64 x i16>)
1091 declare <128 x i16> @llvm.ctpop.v128i16(<128 x i16>)
1092 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
1093 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
1094 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
1095 declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
1096 declare <32 x i32> @llvm.ctpop.v32i32(<32 x i32>)
1097 declare <64 x i32> @llvm.ctpop.v64i32(<64 x i32>)
1098 declare <1 x i64> @llvm.ctpop.v1i64(<1 x i64>)
1099 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
1100 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
1101 declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>)
1102 declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>)
1103 declare <32 x i64> @llvm.ctpop.v32i64(<32 x i64>)
1105 declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>)
1106 declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>)
1107 declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>)
1108 declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>)
1109 declare <128 x i8> @llvm.cttz.v128i8(<128 x i8>)
1110 declare <256 x i8> @llvm.cttz.v256i8(<256 x i8>)
1111 declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>)
1112 declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>)
1113 declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>)
1114 declare <32 x i16> @llvm.cttz.v32i16(<32 x i16>)
1115 declare <64 x i16> @llvm.cttz.v64i16(<64 x i16>)
1116 declare <128 x i16> @llvm.cttz.v128i16(<128 x i16>)
1117 declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>)
1118 declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>)
1119 declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>)
1120 declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>)
1121 declare <32 x i32> @llvm.cttz.v32i32(<32 x i32>)
1122 declare <64 x i32> @llvm.cttz.v64i32(<64 x i32>)
1123 declare <1 x i64> @llvm.cttz.v1i64(<1 x i64>)
1124 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
1125 declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>)
1126 declare <8 x i64> @llvm.cttz.v8i64(<8 x i64>)
1127 declare <16 x i64> @llvm.cttz.v16i64(<16 x i64>)
1128 declare <32 x i64> @llvm.cttz.v32i64(<32 x i64>)