1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
3 ; RUN: llc -mattr=+sme -force-streaming-compatible-sve < %s | FileCheck %s
5 target triple = "aarch64-unknown-linux-gnu"
11 define <4 x i8> @ctlz_v4i8(<4 x i8> %op) {
12 ; CHECK-LABEL: ctlz_v4i8:
14 ; CHECK-NEXT: ptrue p0.h, vl4
15 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
16 ; CHECK-NEXT: and z0.h, z0.h, #0xff
17 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
18 ; CHECK-NEXT: sub z0.h, z0.h, #8 // =0x8
19 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
21 %res = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> %op)
25 define <8 x i8> @ctlz_v8i8(<8 x i8> %op) {
26 ; CHECK-LABEL: ctlz_v8i8:
28 ; CHECK-NEXT: ptrue p0.b, vl8
29 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
30 ; CHECK-NEXT: clz z0.b, p0/m, z0.b
31 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
33 %res = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %op)
37 define <16 x i8> @ctlz_v16i8(<16 x i8> %op) {
38 ; CHECK-LABEL: ctlz_v16i8:
40 ; CHECK-NEXT: ptrue p0.b, vl16
41 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
42 ; CHECK-NEXT: clz z0.b, p0/m, z0.b
43 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
45 %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %op)
49 define void @ctlz_v32i8(ptr %a) {
50 ; CHECK-LABEL: ctlz_v32i8:
52 ; CHECK-NEXT: ptrue p0.b, vl16
53 ; CHECK-NEXT: ldp q0, q1, [x0]
54 ; CHECK-NEXT: clz z0.b, p0/m, z0.b
55 ; CHECK-NEXT: clz z1.b, p0/m, z1.b
56 ; CHECK-NEXT: stp q0, q1, [x0]
58 %op = load <32 x i8>, ptr %a
59 %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %op)
60 store <32 x i8> %res, ptr %a
64 define <2 x i16> @ctlz_v2i16(<2 x i16> %op) {
65 ; CHECK-LABEL: ctlz_v2i16:
67 ; CHECK-NEXT: ptrue p0.s, vl2
68 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
69 ; CHECK-NEXT: and z0.s, z0.s, #0xffff
70 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
71 ; CHECK-NEXT: sub z0.s, z0.s, #16 // =0x10
72 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
74 %res = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %op)
78 define <4 x i16> @ctlz_v4i16(<4 x i16> %op) {
79 ; CHECK-LABEL: ctlz_v4i16:
81 ; CHECK-NEXT: ptrue p0.h, vl4
82 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
83 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
84 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
86 %res = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %op)
90 define <8 x i16> @ctlz_v8i16(<8 x i16> %op) {
91 ; CHECK-LABEL: ctlz_v8i16:
93 ; CHECK-NEXT: ptrue p0.h, vl8
94 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
95 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
96 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
98 %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %op)
102 define void @ctlz_v16i16(ptr %a) {
103 ; CHECK-LABEL: ctlz_v16i16:
105 ; CHECK-NEXT: ptrue p0.h, vl8
106 ; CHECK-NEXT: ldp q0, q1, [x0]
107 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
108 ; CHECK-NEXT: clz z1.h, p0/m, z1.h
109 ; CHECK-NEXT: stp q0, q1, [x0]
111 %op = load <16 x i16>, ptr %a
112 %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %op)
113 store <16 x i16> %res, ptr %a
117 define <2 x i32> @ctlz_v2i32(<2 x i32> %op) {
118 ; CHECK-LABEL: ctlz_v2i32:
120 ; CHECK-NEXT: ptrue p0.s, vl2
121 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
122 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
123 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
125 %res = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %op)
129 define <4 x i32> @ctlz_v4i32(<4 x i32> %op) {
130 ; CHECK-LABEL: ctlz_v4i32:
132 ; CHECK-NEXT: ptrue p0.s, vl4
133 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
134 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
135 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
137 %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %op)
141 define void @ctlz_v8i32(ptr %a) {
142 ; CHECK-LABEL: ctlz_v8i32:
144 ; CHECK-NEXT: ptrue p0.s, vl4
145 ; CHECK-NEXT: ldp q0, q1, [x0]
146 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
147 ; CHECK-NEXT: clz z1.s, p0/m, z1.s
148 ; CHECK-NEXT: stp q0, q1, [x0]
150 %op = load <8 x i32>, ptr %a
151 %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %op)
152 store <8 x i32> %res, ptr %a
156 define <1 x i64> @ctlz_v1i64(<1 x i64> %op) {
157 ; CHECK-LABEL: ctlz_v1i64:
159 ; CHECK-NEXT: ptrue p0.d, vl1
160 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
161 ; CHECK-NEXT: clz z0.d, p0/m, z0.d
162 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
164 %res = call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %op)
168 define <2 x i64> @ctlz_v2i64(<2 x i64> %op) {
169 ; CHECK-LABEL: ctlz_v2i64:
171 ; CHECK-NEXT: ptrue p0.d, vl2
172 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
173 ; CHECK-NEXT: clz z0.d, p0/m, z0.d
174 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
176 %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %op)
180 define void @ctlz_v4i64(ptr %a) {
181 ; CHECK-LABEL: ctlz_v4i64:
183 ; CHECK-NEXT: ptrue p0.d, vl2
184 ; CHECK-NEXT: ldp q0, q1, [x0]
185 ; CHECK-NEXT: clz z0.d, p0/m, z0.d
186 ; CHECK-NEXT: clz z1.d, p0/m, z1.d
187 ; CHECK-NEXT: stp q0, q1, [x0]
189 %op = load <4 x i64>, ptr %a
190 %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %op)
191 store <4 x i64> %res, ptr %a
199 define <4 x i8> @ctpop_v4i8(<4 x i8> %op) {
200 ; CHECK-LABEL: ctpop_v4i8:
202 ; CHECK-NEXT: ptrue p0.h, vl4
203 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
204 ; CHECK-NEXT: and z0.h, z0.h, #0xff
205 ; CHECK-NEXT: cnt z0.h, p0/m, z0.h
206 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
208 %res = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %op)
212 define <8 x i8> @ctpop_v8i8(<8 x i8> %op) {
213 ; CHECK-LABEL: ctpop_v8i8:
215 ; CHECK-NEXT: ptrue p0.b, vl8
216 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
217 ; CHECK-NEXT: cnt z0.b, p0/m, z0.b
218 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
220 %res = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %op)
224 define <16 x i8> @ctpop_v16i8(<16 x i8> %op) {
225 ; CHECK-LABEL: ctpop_v16i8:
227 ; CHECK-NEXT: ptrue p0.b, vl16
228 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
229 ; CHECK-NEXT: cnt z0.b, p0/m, z0.b
230 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
232 %res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %op)
236 define void @ctpop_v32i8(ptr %a) {
237 ; CHECK-LABEL: ctpop_v32i8:
239 ; CHECK-NEXT: ptrue p0.b, vl16
240 ; CHECK-NEXT: ldp q0, q1, [x0]
241 ; CHECK-NEXT: cnt z0.b, p0/m, z0.b
242 ; CHECK-NEXT: cnt z1.b, p0/m, z1.b
243 ; CHECK-NEXT: stp q0, q1, [x0]
245 %op = load <32 x i8>, ptr %a
246 %res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %op)
247 store <32 x i8> %res, ptr %a
251 define <2 x i16> @ctpop_v2i16(<2 x i16> %op) {
252 ; CHECK-LABEL: ctpop_v2i16:
254 ; CHECK-NEXT: ptrue p0.s, vl2
255 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
256 ; CHECK-NEXT: and z0.s, z0.s, #0xffff
257 ; CHECK-NEXT: cnt z0.s, p0/m, z0.s
258 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
260 %res = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %op)
264 define <4 x i16> @ctpop_v4i16(<4 x i16> %op) {
265 ; CHECK-LABEL: ctpop_v4i16:
267 ; CHECK-NEXT: ptrue p0.h, vl4
268 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
269 ; CHECK-NEXT: cnt z0.h, p0/m, z0.h
270 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
272 %res = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %op)
276 define <8 x i16> @ctpop_v8i16(<8 x i16> %op) {
277 ; CHECK-LABEL: ctpop_v8i16:
279 ; CHECK-NEXT: ptrue p0.h, vl8
280 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
281 ; CHECK-NEXT: cnt z0.h, p0/m, z0.h
282 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
284 %res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %op)
288 define void @ctpop_v16i16(ptr %a) {
289 ; CHECK-LABEL: ctpop_v16i16:
291 ; CHECK-NEXT: ptrue p0.h, vl8
292 ; CHECK-NEXT: ldp q0, q1, [x0]
293 ; CHECK-NEXT: cnt z0.h, p0/m, z0.h
294 ; CHECK-NEXT: cnt z1.h, p0/m, z1.h
295 ; CHECK-NEXT: stp q0, q1, [x0]
297 %op = load <16 x i16>, ptr %a
298 %res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %op)
299 store <16 x i16> %res, ptr %a
303 define <2 x i32> @ctpop_v2i32(<2 x i32> %op) {
304 ; CHECK-LABEL: ctpop_v2i32:
306 ; CHECK-NEXT: ptrue p0.s, vl2
307 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
308 ; CHECK-NEXT: cnt z0.s, p0/m, z0.s
309 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
311 %res = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %op)
315 define <4 x i32> @ctpop_v4i32(<4 x i32> %op) {
316 ; CHECK-LABEL: ctpop_v4i32:
318 ; CHECK-NEXT: ptrue p0.s, vl4
319 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
320 ; CHECK-NEXT: cnt z0.s, p0/m, z0.s
321 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
323 %res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %op)
327 define void @ctpop_v8i32(ptr %a) {
328 ; CHECK-LABEL: ctpop_v8i32:
330 ; CHECK-NEXT: ptrue p0.s, vl4
331 ; CHECK-NEXT: ldp q0, q1, [x0]
332 ; CHECK-NEXT: cnt z0.s, p0/m, z0.s
333 ; CHECK-NEXT: cnt z1.s, p0/m, z1.s
334 ; CHECK-NEXT: stp q0, q1, [x0]
336 %op = load <8 x i32>, ptr %a
337 %res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %op)
338 store <8 x i32> %res, ptr %a
342 define <1 x i64> @ctpop_v1i64(<1 x i64> %op) {
343 ; CHECK-LABEL: ctpop_v1i64:
345 ; CHECK-NEXT: ptrue p0.d, vl1
346 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
347 ; CHECK-NEXT: cnt z0.d, p0/m, z0.d
348 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
350 %res = call <1 x i64> @llvm.ctpop.v1i64(<1 x i64> %op)
354 define <2 x i64> @ctpop_v2i64(<2 x i64> %op) {
355 ; CHECK-LABEL: ctpop_v2i64:
357 ; CHECK-NEXT: ptrue p0.d, vl2
358 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
359 ; CHECK-NEXT: cnt z0.d, p0/m, z0.d
360 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
362 %res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %op)
366 define void @ctpop_v4i64(ptr %a) {
367 ; CHECK-LABEL: ctpop_v4i64:
369 ; CHECK-NEXT: ptrue p0.d, vl2
370 ; CHECK-NEXT: ldp q0, q1, [x0]
371 ; CHECK-NEXT: cnt z0.d, p0/m, z0.d
372 ; CHECK-NEXT: cnt z1.d, p0/m, z1.d
373 ; CHECK-NEXT: stp q0, q1, [x0]
375 %op = load <4 x i64>, ptr %a
376 %res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %op)
377 store <4 x i64> %res, ptr %a
382 ; Count trailing zeros
385 define <4 x i8> @cttz_v4i8(<4 x i8> %op) {
386 ; CHECK-LABEL: cttz_v4i8:
388 ; CHECK-NEXT: ptrue p0.h, vl4
389 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
390 ; CHECK-NEXT: orr z0.h, z0.h, #0x100
391 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h
392 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
393 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
395 %res = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %op)
399 define <8 x i8> @cttz_v8i8(<8 x i8> %op) {
400 ; CHECK-LABEL: cttz_v8i8:
402 ; CHECK-NEXT: ptrue p0.b, vl8
403 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
404 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b
405 ; CHECK-NEXT: clz z0.b, p0/m, z0.b
406 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
408 %res = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %op)
412 define <16 x i8> @cttz_v16i8(<16 x i8> %op) {
413 ; CHECK-LABEL: cttz_v16i8:
415 ; CHECK-NEXT: ptrue p0.b, vl16
416 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
417 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b
418 ; CHECK-NEXT: clz z0.b, p0/m, z0.b
419 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
421 %res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %op)
425 define void @cttz_v32i8(ptr %a) {
426 ; CHECK-LABEL: cttz_v32i8:
428 ; CHECK-NEXT: ptrue p0.b, vl16
429 ; CHECK-NEXT: ldp q0, q1, [x0]
430 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b
431 ; CHECK-NEXT: rbit z1.b, p0/m, z1.b
432 ; CHECK-NEXT: clz z0.b, p0/m, z0.b
433 ; CHECK-NEXT: clz z1.b, p0/m, z1.b
434 ; CHECK-NEXT: stp q0, q1, [x0]
436 %op = load <32 x i8>, ptr %a
437 %res = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %op)
438 store <32 x i8> %res, ptr %a
442 define <2 x i16> @cttz_v2i16(<2 x i16> %op) {
443 ; CHECK-LABEL: cttz_v2i16:
445 ; CHECK-NEXT: ptrue p0.s, vl2
446 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
447 ; CHECK-NEXT: orr z0.s, z0.s, #0x10000
448 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s
449 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
450 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
452 %res = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %op)
456 define <4 x i16> @cttz_v4i16(<4 x i16> %op) {
457 ; CHECK-LABEL: cttz_v4i16:
459 ; CHECK-NEXT: ptrue p0.h, vl4
460 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
461 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h
462 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
463 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
465 %res = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %op)
469 define <8 x i16> @cttz_v8i16(<8 x i16> %op) {
470 ; CHECK-LABEL: cttz_v8i16:
472 ; CHECK-NEXT: ptrue p0.h, vl8
473 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
474 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h
475 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
476 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
478 %res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %op)
482 define void @cttz_v16i16(ptr %a) {
483 ; CHECK-LABEL: cttz_v16i16:
485 ; CHECK-NEXT: ptrue p0.h, vl8
486 ; CHECK-NEXT: ldp q0, q1, [x0]
487 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h
488 ; CHECK-NEXT: rbit z1.h, p0/m, z1.h
489 ; CHECK-NEXT: clz z0.h, p0/m, z0.h
490 ; CHECK-NEXT: clz z1.h, p0/m, z1.h
491 ; CHECK-NEXT: stp q0, q1, [x0]
493 %op = load <16 x i16>, ptr %a
494 %res = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %op)
495 store <16 x i16> %res, ptr %a
499 define <2 x i32> @cttz_v2i32(<2 x i32> %op) {
500 ; CHECK-LABEL: cttz_v2i32:
502 ; CHECK-NEXT: ptrue p0.s, vl2
503 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
504 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s
505 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
506 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
508 %res = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %op)
512 define <4 x i32> @cttz_v4i32(<4 x i32> %op) {
513 ; CHECK-LABEL: cttz_v4i32:
515 ; CHECK-NEXT: ptrue p0.s, vl4
516 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
517 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s
518 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
519 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
521 %res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %op)
525 define void @cttz_v8i32(ptr %a) {
526 ; CHECK-LABEL: cttz_v8i32:
528 ; CHECK-NEXT: ptrue p0.s, vl4
529 ; CHECK-NEXT: ldp q0, q1, [x0]
530 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s
531 ; CHECK-NEXT: rbit z1.s, p0/m, z1.s
532 ; CHECK-NEXT: clz z0.s, p0/m, z0.s
533 ; CHECK-NEXT: clz z1.s, p0/m, z1.s
534 ; CHECK-NEXT: stp q0, q1, [x0]
536 %op = load <8 x i32>, ptr %a
537 %res = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %op)
538 store <8 x i32> %res, ptr %a
542 define <1 x i64> @cttz_v1i64(<1 x i64> %op) {
543 ; CHECK-LABEL: cttz_v1i64:
545 ; CHECK-NEXT: ptrue p0.d, vl1
546 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
547 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d
548 ; CHECK-NEXT: clz z0.d, p0/m, z0.d
549 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
551 %res = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %op)
555 define <2 x i64> @cttz_v2i64(<2 x i64> %op) {
556 ; CHECK-LABEL: cttz_v2i64:
558 ; CHECK-NEXT: ptrue p0.d, vl2
559 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
560 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d
561 ; CHECK-NEXT: clz z0.d, p0/m, z0.d
562 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
564 %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %op)
568 define void @cttz_v4i64(ptr %a) {
569 ; CHECK-LABEL: cttz_v4i64:
571 ; CHECK-NEXT: ptrue p0.d, vl2
572 ; CHECK-NEXT: ldp q0, q1, [x0]
573 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d
574 ; CHECK-NEXT: rbit z1.d, p0/m, z1.d
575 ; CHECK-NEXT: clz z0.d, p0/m, z0.d
576 ; CHECK-NEXT: clz z1.d, p0/m, z1.d
577 ; CHECK-NEXT: stp q0, q1, [x0]
579 %op = load <4 x i64>, ptr %a
580 %res = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %op)
581 store <4 x i64> %res, ptr %a
586 declare <4 x i8> @llvm.ctlz.v4i8(<4 x i8>)
587 declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>)
588 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>)
589 declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>)
590 declare <2 x i16> @llvm.ctlz.v2i16(<2 x i16>)
591 declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>)
592 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>)
593 declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>)
594 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>)
595 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>)
596 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>)
597 declare <1 x i64> @llvm.ctlz.v1i64(<1 x i64>)
598 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
599 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>)
601 declare <4 x i8> @llvm.ctpop.v4i8(<4 x i8>)
602 declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>)
603 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
604 declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
605 declare <2 x i16> @llvm.ctpop.v2i16(<2 x i16>)
606 declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>)
607 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
608 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
609 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
610 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
611 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
612 declare <1 x i64> @llvm.ctpop.v1i64(<1 x i64>)
613 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
614 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
616 declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>)
617 declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>)
618 declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>)
619 declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>)
620 declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>)
621 declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>)
622 declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>)
623 declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>)
624 declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>)
625 declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>)
626 declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>)
627 declare <1 x i64> @llvm.cttz.v1i64(<1 x i64>)
628 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
629 declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>)