1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
10 define <4 x i8> @ctlz_v4i8(<4 x i8> %op) {
11 ; CHECK-LABEL: ctlz_v4i8:
13 ; CHECK-NEXT: ptrue p0.h, vl4
14 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
15 ; CHECK-NEXT: and z0.h, z0.h, #0xff
16 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
17 ; CHECK-NEXT: sub z0.h, z0.h, #8 // =0x8
18 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
20 %res = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> %op)
24 define <8 x i8> @ctlz_v8i8(<8 x i8> %op) {
25 ; CHECK-LABEL: ctlz_v8i8:
27 ; CHECK-NEXT: ptrue p0.b, vl8
28 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
29 ; CHECK-NEXT: clz z0.b, p0/m, z0.b
30 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
32 %res = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %op)
36 define <16 x i8> @ctlz_v16i8(<16 x i8> %op) {
37 ; CHECK-LABEL: ctlz_v16i8:
39 ; CHECK-NEXT: ptrue p0.b, vl16
40 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
41 ; CHECK-NEXT: clz z0.b, p0/m, z0.b
42 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
44 %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %op)
48 define void @ctlz_v32i8(ptr %a) {
49 ; CHECK-LABEL: ctlz_v32i8:
51 ; CHECK-NEXT: ptrue p0.b, vl16
52 ; CHECK-NEXT: ldp q0, q1, [x0]
53 ; CHECK-NEXT: clz z0.b, p0/m, z0.b
54 ; CHECK-NEXT: clz z1.b, p0/m, z1.b
55 ; CHECK-NEXT: stp q0, q1, [x0]
57 %op = load <32 x i8>, ptr %a
58 %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %op)
59 store <32 x i8> %res, ptr %a
63 define <2 x i16> @ctlz_v2i16(<2 x i16> %op) {
64 ; CHECK-LABEL: ctlz_v2i16:
66 ; CHECK-NEXT: ptrue p0.s, vl2
67 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
68 ; CHECK-NEXT: and z0.s, z0.s, #0xffff
69 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
70 ; CHECK-NEXT: sub z0.s, z0.s, #16 // =0x10
71 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
73 %res = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %op)
77 define <4 x i16> @ctlz_v4i16(<4 x i16> %op) {
78 ; CHECK-LABEL: ctlz_v4i16:
80 ; CHECK-NEXT: ptrue p0.h, vl4
81 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
82 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
83 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
85 %res = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %op)
89 define <8 x i16> @ctlz_v8i16(<8 x i16> %op) {
90 ; CHECK-LABEL: ctlz_v8i16:
92 ; CHECK-NEXT: ptrue p0.h, vl8
93 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
94 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
95 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
97 %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %op)
101 define void @ctlz_v16i16(ptr %a) {
102 ; CHECK-LABEL: ctlz_v16i16:
104 ; CHECK-NEXT: ptrue p0.h, vl8
105 ; CHECK-NEXT: ldp q0, q1, [x0]
106 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
107 ; CHECK-NEXT: clz z1.h, p0/m, z1.h
108 ; CHECK-NEXT: stp q0, q1, [x0]
110 %op = load <16 x i16>, ptr %a
111 %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %op)
112 store <16 x i16> %res, ptr %a
116 define <2 x i32> @ctlz_v2i32(<2 x i32> %op) {
117 ; CHECK-LABEL: ctlz_v2i32:
119 ; CHECK-NEXT: ptrue p0.s, vl2
120 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
121 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
122 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
124 %res = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %op)
128 define <4 x i32> @ctlz_v4i32(<4 x i32> %op) {
129 ; CHECK-LABEL: ctlz_v4i32:
131 ; CHECK-NEXT: ptrue p0.s, vl4
132 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
133 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
134 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
136 %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %op)
140 define void @ctlz_v8i32(ptr %a) {
141 ; CHECK-LABEL: ctlz_v8i32:
143 ; CHECK-NEXT: ptrue p0.s, vl4
144 ; CHECK-NEXT: ldp q0, q1, [x0]
145 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
146 ; CHECK-NEXT: clz z1.s, p0/m, z1.s
147 ; CHECK-NEXT: stp q0, q1, [x0]
149 %op = load <8 x i32>, ptr %a
150 %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %op)
151 store <8 x i32> %res, ptr %a
155 define <1 x i64> @ctlz_v1i64(<1 x i64> %op) {
156 ; CHECK-LABEL: ctlz_v1i64:
158 ; CHECK-NEXT: ptrue p0.d, vl1
159 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
160 ; CHECK-NEXT: clz z0.d, p0/m, z0.d
161 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
163 %res = call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %op)
167 define <2 x i64> @ctlz_v2i64(<2 x i64> %op) {
168 ; CHECK-LABEL: ctlz_v2i64:
170 ; CHECK-NEXT: ptrue p0.d, vl2
171 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
172 ; CHECK-NEXT: clz z0.d, p0/m, z0.d
173 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
175 %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %op)
179 define void @ctlz_v4i64(ptr %a) {
180 ; CHECK-LABEL: ctlz_v4i64:
182 ; CHECK-NEXT: ptrue p0.d, vl2
183 ; CHECK-NEXT: ldp q0, q1, [x0]
184 ; CHECK-NEXT: clz z0.d, p0/m, z0.d
185 ; CHECK-NEXT: clz z1.d, p0/m, z1.d
186 ; CHECK-NEXT: stp q0, q1, [x0]
188 %op = load <4 x i64>, ptr %a
189 %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %op)
190 store <4 x i64> %res, ptr %a
198 define <4 x i8> @ctpop_v4i8(<4 x i8> %op) {
199 ; CHECK-LABEL: ctpop_v4i8:
201 ; CHECK-NEXT: ptrue p0.h, vl4
202 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
203 ; CHECK-NEXT: and z0.h, z0.h, #0xff
204 ; CHECK-NEXT: cnt z0.h, p0/m, z0.h
205 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
207 %res = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %op)
211 define <8 x i8> @ctpop_v8i8(<8 x i8> %op) {
212 ; CHECK-LABEL: ctpop_v8i8:
214 ; CHECK-NEXT: ptrue p0.b, vl8
215 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
216 ; CHECK-NEXT: cnt z0.b, p0/m, z0.b
217 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
219 %res = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %op)
223 define <16 x i8> @ctpop_v16i8(<16 x i8> %op) {
224 ; CHECK-LABEL: ctpop_v16i8:
226 ; CHECK-NEXT: ptrue p0.b, vl16
227 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
228 ; CHECK-NEXT: cnt z0.b, p0/m, z0.b
229 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
231 %res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %op)
235 define void @ctpop_v32i8(ptr %a) {
236 ; CHECK-LABEL: ctpop_v32i8:
238 ; CHECK-NEXT: ptrue p0.b, vl16
239 ; CHECK-NEXT: ldp q0, q1, [x0]
240 ; CHECK-NEXT: cnt z0.b, p0/m, z0.b
241 ; CHECK-NEXT: cnt z1.b, p0/m, z1.b
242 ; CHECK-NEXT: stp q0, q1, [x0]
244 %op = load <32 x i8>, ptr %a
245 %res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %op)
246 store <32 x i8> %res, ptr %a
250 define <2 x i16> @ctpop_v2i16(<2 x i16> %op) {
251 ; CHECK-LABEL: ctpop_v2i16:
253 ; CHECK-NEXT: ptrue p0.s, vl2
254 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
255 ; CHECK-NEXT: and z0.s, z0.s, #0xffff
256 ; CHECK-NEXT: cnt z0.s, p0/m, z0.s
257 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
259 %res = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %op)
263 define <4 x i16> @ctpop_v4i16(<4 x i16> %op) {
264 ; CHECK-LABEL: ctpop_v4i16:
266 ; CHECK-NEXT: ptrue p0.h, vl4
267 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
268 ; CHECK-NEXT: cnt z0.h, p0/m, z0.h
269 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
271 %res = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %op)
275 define <8 x i16> @ctpop_v8i16(<8 x i16> %op) {
276 ; CHECK-LABEL: ctpop_v8i16:
278 ; CHECK-NEXT: ptrue p0.h, vl8
279 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
280 ; CHECK-NEXT: cnt z0.h, p0/m, z0.h
281 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
283 %res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %op)
287 define void @ctpop_v16i16(ptr %a) {
288 ; CHECK-LABEL: ctpop_v16i16:
290 ; CHECK-NEXT: ptrue p0.h, vl8
291 ; CHECK-NEXT: ldp q0, q1, [x0]
292 ; CHECK-NEXT: cnt z0.h, p0/m, z0.h
293 ; CHECK-NEXT: cnt z1.h, p0/m, z1.h
294 ; CHECK-NEXT: stp q0, q1, [x0]
296 %op = load <16 x i16>, ptr %a
297 %res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %op)
298 store <16 x i16> %res, ptr %a
302 define <2 x i32> @ctpop_v2i32(<2 x i32> %op) {
303 ; CHECK-LABEL: ctpop_v2i32:
305 ; CHECK-NEXT: ptrue p0.s, vl2
306 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
307 ; CHECK-NEXT: cnt z0.s, p0/m, z0.s
308 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
310 %res = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %op)
314 define <4 x i32> @ctpop_v4i32(<4 x i32> %op) {
315 ; CHECK-LABEL: ctpop_v4i32:
317 ; CHECK-NEXT: ptrue p0.s, vl4
318 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
319 ; CHECK-NEXT: cnt z0.s, p0/m, z0.s
320 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
322 %res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %op)
326 define void @ctpop_v8i32(ptr %a) {
327 ; CHECK-LABEL: ctpop_v8i32:
329 ; CHECK-NEXT: ptrue p0.s, vl4
330 ; CHECK-NEXT: ldp q0, q1, [x0]
331 ; CHECK-NEXT: cnt z0.s, p0/m, z0.s
332 ; CHECK-NEXT: cnt z1.s, p0/m, z1.s
333 ; CHECK-NEXT: stp q0, q1, [x0]
335 %op = load <8 x i32>, ptr %a
336 %res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %op)
337 store <8 x i32> %res, ptr %a
341 define <1 x i64> @ctpop_v1i64(<1 x i64> %op) {
342 ; CHECK-LABEL: ctpop_v1i64:
344 ; CHECK-NEXT: ptrue p0.d, vl1
345 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
346 ; CHECK-NEXT: cnt z0.d, p0/m, z0.d
347 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
349 %res = call <1 x i64> @llvm.ctpop.v1i64(<1 x i64> %op)
353 define <2 x i64> @ctpop_v2i64(<2 x i64> %op) {
354 ; CHECK-LABEL: ctpop_v2i64:
356 ; CHECK-NEXT: ptrue p0.d, vl2
357 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
358 ; CHECK-NEXT: cnt z0.d, p0/m, z0.d
359 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
361 %res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %op)
365 define void @ctpop_v4i64(ptr %a) {
366 ; CHECK-LABEL: ctpop_v4i64:
368 ; CHECK-NEXT: ptrue p0.d, vl2
369 ; CHECK-NEXT: ldp q0, q1, [x0]
370 ; CHECK-NEXT: cnt z0.d, p0/m, z0.d
371 ; CHECK-NEXT: cnt z1.d, p0/m, z1.d
372 ; CHECK-NEXT: stp q0, q1, [x0]
374 %op = load <4 x i64>, ptr %a
375 %res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %op)
376 store <4 x i64> %res, ptr %a
381 ; Count trailing zeros
384 define <4 x i8> @cttz_v4i8(<4 x i8> %op) {
385 ; CHECK-LABEL: cttz_v4i8:
387 ; CHECK-NEXT: ptrue p0.h, vl4
388 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
389 ; CHECK-NEXT: orr z0.h, z0.h, #0x100
390 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h
391 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
392 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
394 %res = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %op)
398 define <8 x i8> @cttz_v8i8(<8 x i8> %op) {
399 ; CHECK-LABEL: cttz_v8i8:
401 ; CHECK-NEXT: ptrue p0.b, vl8
402 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
403 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b
404 ; CHECK-NEXT: clz z0.b, p0/m, z0.b
405 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
407 %res = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %op)
411 define <16 x i8> @cttz_v16i8(<16 x i8> %op) {
412 ; CHECK-LABEL: cttz_v16i8:
414 ; CHECK-NEXT: ptrue p0.b, vl16
415 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
416 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b
417 ; CHECK-NEXT: clz z0.b, p0/m, z0.b
418 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
420 %res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %op)
424 define void @cttz_v32i8(ptr %a) {
425 ; CHECK-LABEL: cttz_v32i8:
427 ; CHECK-NEXT: ptrue p0.b, vl16
428 ; CHECK-NEXT: ldp q0, q1, [x0]
429 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b
430 ; CHECK-NEXT: rbit z1.b, p0/m, z1.b
431 ; CHECK-NEXT: clz z0.b, p0/m, z0.b
432 ; CHECK-NEXT: clz z1.b, p0/m, z1.b
433 ; CHECK-NEXT: stp q0, q1, [x0]
435 %op = load <32 x i8>, ptr %a
436 %res = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %op)
437 store <32 x i8> %res, ptr %a
441 define <2 x i16> @cttz_v2i16(<2 x i16> %op) {
442 ; CHECK-LABEL: cttz_v2i16:
444 ; CHECK-NEXT: ptrue p0.s, vl2
445 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
446 ; CHECK-NEXT: orr z0.s, z0.s, #0x10000
447 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s
448 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
449 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
451 %res = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %op)
455 define <4 x i16> @cttz_v4i16(<4 x i16> %op) {
456 ; CHECK-LABEL: cttz_v4i16:
458 ; CHECK-NEXT: ptrue p0.h, vl4
459 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
460 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h
461 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
462 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
464 %res = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %op)
468 define <8 x i16> @cttz_v8i16(<8 x i16> %op) {
469 ; CHECK-LABEL: cttz_v8i16:
471 ; CHECK-NEXT: ptrue p0.h, vl8
472 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
473 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h
474 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
475 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
477 %res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %op)
481 define void @cttz_v16i16(ptr %a) {
482 ; CHECK-LABEL: cttz_v16i16:
484 ; CHECK-NEXT: ptrue p0.h, vl8
485 ; CHECK-NEXT: ldp q0, q1, [x0]
486 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h
487 ; CHECK-NEXT: rbit z1.h, p0/m, z1.h
488 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
489 ; CHECK-NEXT: clz z1.h, p0/m, z1.h
490 ; CHECK-NEXT: stp q0, q1, [x0]
492 %op = load <16 x i16>, ptr %a
493 %res = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %op)
494 store <16 x i16> %res, ptr %a
498 define <2 x i32> @cttz_v2i32(<2 x i32> %op) {
499 ; CHECK-LABEL: cttz_v2i32:
501 ; CHECK-NEXT: ptrue p0.s, vl2
502 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
503 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s
504 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
505 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
507 %res = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %op)
511 define <4 x i32> @cttz_v4i32(<4 x i32> %op) {
512 ; CHECK-LABEL: cttz_v4i32:
514 ; CHECK-NEXT: ptrue p0.s, vl4
515 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
516 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s
517 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
518 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
520 %res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %op)
524 define void @cttz_v8i32(ptr %a) {
525 ; CHECK-LABEL: cttz_v8i32:
527 ; CHECK-NEXT: ptrue p0.s, vl4
528 ; CHECK-NEXT: ldp q0, q1, [x0]
529 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s
530 ; CHECK-NEXT: rbit z1.s, p0/m, z1.s
531 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
532 ; CHECK-NEXT: clz z1.s, p0/m, z1.s
533 ; CHECK-NEXT: stp q0, q1, [x0]
535 %op = load <8 x i32>, ptr %a
536 %res = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %op)
537 store <8 x i32> %res, ptr %a
541 define <1 x i64> @cttz_v1i64(<1 x i64> %op) {
542 ; CHECK-LABEL: cttz_v1i64:
544 ; CHECK-NEXT: ptrue p0.d, vl1
545 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
546 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d
547 ; CHECK-NEXT: clz z0.d, p0/m, z0.d
548 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
550 %res = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %op)
554 define <2 x i64> @cttz_v2i64(<2 x i64> %op) {
555 ; CHECK-LABEL: cttz_v2i64:
557 ; CHECK-NEXT: ptrue p0.d, vl2
558 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
559 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d
560 ; CHECK-NEXT: clz z0.d, p0/m, z0.d
561 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
563 %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %op)
567 define void @cttz_v4i64(ptr %a) {
568 ; CHECK-LABEL: cttz_v4i64:
570 ; CHECK-NEXT: ptrue p0.d, vl2
571 ; CHECK-NEXT: ldp q0, q1, [x0]
572 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d
573 ; CHECK-NEXT: rbit z1.d, p0/m, z1.d
574 ; CHECK-NEXT: clz z0.d, p0/m, z0.d
575 ; CHECK-NEXT: clz z1.d, p0/m, z1.d
576 ; CHECK-NEXT: stp q0, q1, [x0]
578 %op = load <4 x i64>, ptr %a
579 %res = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %op)
580 store <4 x i64> %res, ptr %a
585 declare <4 x i8> @llvm.ctlz.v4i8(<4 x i8>)
586 declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>)
587 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>)
588 declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>)
589 declare <2 x i16> @llvm.ctlz.v2i16(<2 x i16>)
590 declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>)
591 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>)
592 declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>)
593 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>)
594 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>)
595 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>)
596 declare <1 x i64> @llvm.ctlz.v1i64(<1 x i64>)
597 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
598 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>)
600 declare <4 x i8> @llvm.ctpop.v4i8(<4 x i8>)
601 declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>)
602 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
603 declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
604 declare <2 x i16> @llvm.ctpop.v2i16(<2 x i16>)
605 declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>)
606 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
607 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
608 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
609 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
610 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
611 declare <1 x i64> @llvm.ctpop.v1i64(<1 x i64>)
612 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
613 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
615 declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>)
616 declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>)
617 declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>)
618 declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>)
619 declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>)
620 declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>)
621 declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>)
622 declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>)
623 declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>)
624 declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>)
625 declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>)
626 declare <1 x i64> @llvm.cttz.v1i64(<1 x i64>)
627 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
628 declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>)