1 ; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
5 define i8 @ctz_v8i1(<8 x i1> %a) {
6 ; CHECK-LABEL: .LCPI0_0:
15 ; CHECK-LABEL: ctz_v8i1:
17 ; CHECK-NEXT: shl v0.8b, v0.8b, #7
18 ; CHECK-NEXT: adrp x8, .LCPI0_0
19 ; CHECK-NEXT: mov w9, #8 // =0x8
20 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
21 ; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
22 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
23 ; CHECK-NEXT: umaxv b0, v0.8b
24 ; CHECK-NEXT: fmov w8, s0
25 ; CHECK-NEXT: sub w0, w9, w8
27 %res = call i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1> %a, i1 0)
31 define i32 @ctz_v16i1(<16 x i1> %a) {
32 ; CHECK-LABEL: .LCPI1_0:
33 ; CHECK-NEXT: .byte 16
34 ; CHECK-NEXT: .byte 15
35 ; CHECK-NEXT: .byte 14
36 ; CHECK-NEXT: .byte 13
37 ; CHECK-NEXT: .byte 12
38 ; CHECK-NEXT: .byte 11
39 ; CHECK-NEXT: .byte 10
49 ; CHECK-LABEL: ctz_v16i1:
51 ; CHECK-NEXT: shl v0.16b, v0.16b, #7
52 ; CHECK-NEXT: adrp x8, .LCPI1_0
53 ; CHECK-NEXT: mov w9, #16 // =0x10
54 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
55 ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
56 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
57 ; CHECK-NEXT: umaxv b0, v0.16b
58 ; CHECK-NEXT: fmov w8, s0
59 ; CHECK-NEXT: sub w8, w9, w8
60 ; CHECK-NEXT: and w0, w8, #0xff
62 %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
66 define i16 @ctz_v4i32(<4 x i32> %a) {
67 ; CHECK-LABEL: .LCPI2_0:
68 ; CHECK-NEXT: .hword 4
69 ; CHECK-NEXT: .hword 3
70 ; CHECK-NEXT: .hword 2
71 ; CHECK-NEXT: .hword 1
72 ; CHECK-LABEL: ctz_v4i32:
74 ; CHECK-NEXT: cmtst v0.4s, v0.4s, v0.4s
75 ; CHECK-NEXT: adrp x8, .LCPI2_0
76 ; CHECK-NEXT: mov w9, #4 // =0x4
77 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI2_0]
78 ; CHECK-NEXT: xtn v0.4h, v0.4s
79 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
80 ; CHECK-NEXT: umaxv h0, v0.4h
81 ; CHECK-NEXT: fmov w8, s0
82 ; CHECK-NEXT: sub w8, w9, w8
83 ; CHECK-NEXT: and w0, w8, #0xff
85 %res = call i16 @llvm.experimental.cttz.elts.i16.v4i32(<4 x i32> %a, i1 0)
89 define i7 @ctz_i7_v8i1(<8 x i1> %a) {
90 ; CHECK-LABEL: .LCPI3_0:
99 ; CHECK-LABEL: ctz_i7_v8i1:
101 ; CHECK-NEXT: shl v0.8b, v0.8b, #7
102 ; CHECK-NEXT: adrp x8, .LCPI3_0
103 ; CHECK-NEXT: mov w9, #8 // =0x8
104 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_0]
105 ; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
106 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
107 ; CHECK-NEXT: umaxv b0, v0.8b
108 ; CHECK-NEXT: fmov w8, s0
109 ; CHECK-NEXT: sub w0, w9, w8
111 %res = call i7 @llvm.experimental.cttz.elts.i7.v8i1(<8 x i1> %a, i1 0)
117 define i8 @ctz_v8i1_poison(<8 x i1> %a) {
118 ; CHECK-LABEL: .LCPI4_0:
119 ; CHECK-NEXT: .byte 8
120 ; CHECK-NEXT: .byte 7
121 ; CHECK-NEXT: .byte 6
122 ; CHECK-NEXT: .byte 5
123 ; CHECK-NEXT: .byte 4
124 ; CHECK-NEXT: .byte 3
125 ; CHECK-NEXT: .byte 2
126 ; CHECK-NEXT: .byte 1
127 ; CHECK-LABEL: ctz_v8i1_poison:
129 ; CHECK-NEXT: shl v0.8b, v0.8b, #7
130 ; CHECK-NEXT: adrp x8, .LCPI4_0
131 ; CHECK-NEXT: mov w9, #8 // =0x8
132 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
133 ; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
134 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
135 ; CHECK-NEXT: umaxv b0, v0.8b
136 ; CHECK-NEXT: fmov w8, s0
137 ; CHECK-NEXT: sub w0, w9, w8
139 %res = call i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1> %a, i1 1)
143 declare i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1>, i1)
144 declare i7 @llvm.experimental.cttz.elts.i7.v8i1(<8 x i1>, i1)
145 declare i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1>, i1)
146 declare i16 @llvm.experimental.cttz.elts.i16.v4i32(<4 x i32>, i1)