1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,AVX2
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64,AVX512VL
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=X64,AVX512VLBWDQ
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd,+avx512vl | FileCheck %s --check-prefixes=X64,AVX512,AVX512VLCD
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd | FileCheck %s --check-prefixes=X64,AVX512,AVX512CD
9 ; Just one 32-bit run to make sure we do reasonable things for i64 lzcnt.
10 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX
12 define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
13 ; AVX1-LABEL: testv4i64:
15 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
16 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
17 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm4
18 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm1
19 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20 ; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm6
21 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm6, %xmm7
23 ; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
24 ; AVX1-NEXT: vpshufb %xmm6, %xmm3, %xmm6
25 ; AVX1-NEXT: vpaddb %xmm6, %xmm4, %xmm4
26 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm6
27 ; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6
28 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm6
29 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
30 ; AVX1-NEXT: vpaddw %xmm6, %xmm4, %xmm4
31 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm6
32 ; AVX1-NEXT: vpsrld $16, %xmm6, %xmm6
33 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm6
34 ; AVX1-NEXT: vpsrld $16, %xmm4, %xmm4
35 ; AVX1-NEXT: vpaddd %xmm6, %xmm4, %xmm4
36 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm2
37 ; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2
38 ; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm2
39 ; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
40 ; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2
41 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm4
42 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm6
43 ; AVX1-NEXT: vpand %xmm5, %xmm6, %xmm5
44 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm5, %xmm6
45 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
46 ; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm3
47 ; AVX1-NEXT: vpaddb %xmm3, %xmm4, %xmm3
48 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm4
49 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
50 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4
51 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
52 ; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3
53 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm4
54 ; AVX1-NEXT: vpsrld $16, %xmm4, %xmm4
55 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4
56 ; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3
57 ; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3
58 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
59 ; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
60 ; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
61 ; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm1
62 ; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0
63 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
66 ; AVX2-LABEL: testv4i64:
68 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
69 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
70 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
71 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
72 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
73 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
74 ; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
75 ; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1
76 ; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
77 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
78 ; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
79 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2
80 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
81 ; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1
82 ; AVX2-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
83 ; AVX2-NEXT: vpsrld $16, %ymm2, %ymm2
84 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2
85 ; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1
86 ; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1
87 ; AVX2-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
88 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
89 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
90 ; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm1
91 ; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0
94 ; AVX512VL-LABEL: testv4i64:
96 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
97 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
98 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
99 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
100 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
101 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
102 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
103 ; AVX512VL-NEXT: vpshufb %ymm3, %ymm1, %ymm1
104 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
105 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
106 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
107 ; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2
108 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
109 ; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1
110 ; AVX512VL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
111 ; AVX512VL-NEXT: vpsrld $16, %ymm2, %ymm2
112 ; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2
113 ; AVX512VL-NEXT: vpsrld $16, %ymm1, %ymm1
114 ; AVX512VL-NEXT: vpaddd %ymm2, %ymm1, %ymm1
115 ; AVX512VL-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
116 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
117 ; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0
118 ; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm1
119 ; AVX512VL-NEXT: vpaddq %ymm0, %ymm1, %ymm0
120 ; AVX512VL-NEXT: retq
122 ; AVX512VLBWDQ-LABEL: testv4i64:
123 ; AVX512VLBWDQ: # %bb.0:
124 ; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
125 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
126 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
127 ; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
128 ; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
129 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
130 ; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
131 ; AVX512VLBWDQ-NEXT: vpshufb %ymm3, %ymm1, %ymm1
132 ; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1
133 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
134 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm2, %ymm2
135 ; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2
136 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1
137 ; AVX512VLBWDQ-NEXT: vpaddw %ymm2, %ymm1, %ymm1
138 ; AVX512VLBWDQ-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
139 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm2, %ymm2
140 ; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2
141 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm1, %ymm1
142 ; AVX512VLBWDQ-NEXT: vpaddd %ymm2, %ymm1, %ymm1
143 ; AVX512VLBWDQ-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
144 ; AVX512VLBWDQ-NEXT: vpsrlq $32, %ymm0, %ymm0
145 ; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0
146 ; AVX512VLBWDQ-NEXT: vpsrlq $32, %ymm1, %ymm1
147 ; AVX512VLBWDQ-NEXT: vpaddq %ymm0, %ymm1, %ymm0
148 ; AVX512VLBWDQ-NEXT: retq
150 ; AVX512VLCD-LABEL: testv4i64:
151 ; AVX512VLCD: # %bb.0:
152 ; AVX512VLCD-NEXT: vplzcntq %ymm0, %ymm0
153 ; AVX512VLCD-NEXT: retq
155 ; AVX512CD-LABEL: testv4i64:
157 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
158 ; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
159 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
160 ; AVX512CD-NEXT: retq
162 ; X32-AVX-LABEL: testv4i64:
164 ; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
165 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
166 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm3
167 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm3, %ymm3
168 ; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
169 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
170 ; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
171 ; X32-AVX-NEXT: vpshufb %ymm3, %ymm1, %ymm1
172 ; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1
173 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
174 ; X32-AVX-NEXT: vpsrlw $8, %ymm2, %ymm2
175 ; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2
176 ; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1
177 ; X32-AVX-NEXT: vpaddw %ymm2, %ymm1, %ymm1
178 ; X32-AVX-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
179 ; X32-AVX-NEXT: vpsrld $16, %ymm2, %ymm2
180 ; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2
181 ; X32-AVX-NEXT: vpsrld $16, %ymm1, %ymm1
182 ; X32-AVX-NEXT: vpaddd %ymm2, %ymm1, %ymm1
183 ; X32-AVX-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
184 ; X32-AVX-NEXT: vpsrlq $32, %ymm0, %ymm0
185 ; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
186 ; X32-AVX-NEXT: vpsrlq $32, %ymm1, %ymm1
187 ; X32-AVX-NEXT: vpaddq %ymm0, %ymm1, %ymm0
190 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 0)
194 define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
195 ; AVX1-LABEL: testv4i64u:
197 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
198 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
199 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm4
200 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm1
201 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
202 ; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm6
203 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
204 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm6, %xmm7
205 ; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
206 ; AVX1-NEXT: vpshufb %xmm6, %xmm3, %xmm6
207 ; AVX1-NEXT: vpaddb %xmm6, %xmm4, %xmm4
208 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm6
209 ; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6
210 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm6
211 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
212 ; AVX1-NEXT: vpaddw %xmm6, %xmm4, %xmm4
213 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm6
214 ; AVX1-NEXT: vpsrld $16, %xmm6, %xmm6
215 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm6
216 ; AVX1-NEXT: vpsrld $16, %xmm4, %xmm4
217 ; AVX1-NEXT: vpaddd %xmm6, %xmm4, %xmm4
218 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm2
219 ; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2
220 ; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm2
221 ; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
222 ; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2
223 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm4
224 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm6
225 ; AVX1-NEXT: vpand %xmm5, %xmm6, %xmm5
226 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm5, %xmm6
227 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
228 ; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm3
229 ; AVX1-NEXT: vpaddb %xmm3, %xmm4, %xmm3
230 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm4
231 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
232 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4
233 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
234 ; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3
235 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm4
236 ; AVX1-NEXT: vpsrld $16, %xmm4, %xmm4
237 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4
238 ; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3
239 ; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3
240 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
241 ; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
242 ; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
243 ; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm1
244 ; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0
245 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
248 ; AVX2-LABEL: testv4i64u:
250 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
251 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
252 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
253 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
254 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
255 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
256 ; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
257 ; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1
258 ; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
259 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
260 ; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
261 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2
262 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
263 ; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1
264 ; AVX2-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
265 ; AVX2-NEXT: vpsrld $16, %ymm2, %ymm2
266 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2
267 ; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1
268 ; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1
269 ; AVX2-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
270 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
271 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
272 ; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm1
273 ; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0
276 ; AVX512VL-LABEL: testv4i64u:
278 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
279 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
280 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
281 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
282 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
283 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
284 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
285 ; AVX512VL-NEXT: vpshufb %ymm3, %ymm1, %ymm1
286 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
287 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
288 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
289 ; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2
290 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
291 ; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1
292 ; AVX512VL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
293 ; AVX512VL-NEXT: vpsrld $16, %ymm2, %ymm2
294 ; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2
295 ; AVX512VL-NEXT: vpsrld $16, %ymm1, %ymm1
296 ; AVX512VL-NEXT: vpaddd %ymm2, %ymm1, %ymm1
297 ; AVX512VL-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
298 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
299 ; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0
300 ; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm1
301 ; AVX512VL-NEXT: vpaddq %ymm0, %ymm1, %ymm0
302 ; AVX512VL-NEXT: retq
304 ; AVX512VLBWDQ-LABEL: testv4i64u:
305 ; AVX512VLBWDQ: # %bb.0:
306 ; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
307 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
308 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
309 ; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
310 ; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
311 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
312 ; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
313 ; AVX512VLBWDQ-NEXT: vpshufb %ymm3, %ymm1, %ymm1
314 ; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1
315 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
316 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm2, %ymm2
317 ; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2
318 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1
319 ; AVX512VLBWDQ-NEXT: vpaddw %ymm2, %ymm1, %ymm1
320 ; AVX512VLBWDQ-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
321 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm2, %ymm2
322 ; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2
323 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm1, %ymm1
324 ; AVX512VLBWDQ-NEXT: vpaddd %ymm2, %ymm1, %ymm1
325 ; AVX512VLBWDQ-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
326 ; AVX512VLBWDQ-NEXT: vpsrlq $32, %ymm0, %ymm0
327 ; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0
328 ; AVX512VLBWDQ-NEXT: vpsrlq $32, %ymm1, %ymm1
329 ; AVX512VLBWDQ-NEXT: vpaddq %ymm0, %ymm1, %ymm0
330 ; AVX512VLBWDQ-NEXT: retq
332 ; AVX512VLCD-LABEL: testv4i64u:
333 ; AVX512VLCD: # %bb.0:
334 ; AVX512VLCD-NEXT: vplzcntq %ymm0, %ymm0
335 ; AVX512VLCD-NEXT: retq
337 ; AVX512CD-LABEL: testv4i64u:
339 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
340 ; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
341 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
342 ; AVX512CD-NEXT: retq
344 ; X32-AVX-LABEL: testv4i64u:
346 ; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
347 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
348 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm3
349 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm3, %ymm3
350 ; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
351 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
352 ; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
353 ; X32-AVX-NEXT: vpshufb %ymm3, %ymm1, %ymm1
354 ; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1
355 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
356 ; X32-AVX-NEXT: vpsrlw $8, %ymm2, %ymm2
357 ; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2
358 ; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1
359 ; X32-AVX-NEXT: vpaddw %ymm2, %ymm1, %ymm1
360 ; X32-AVX-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
361 ; X32-AVX-NEXT: vpsrld $16, %ymm2, %ymm2
362 ; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2
363 ; X32-AVX-NEXT: vpsrld $16, %ymm1, %ymm1
364 ; X32-AVX-NEXT: vpaddd %ymm2, %ymm1, %ymm1
365 ; X32-AVX-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
366 ; X32-AVX-NEXT: vpsrlq $32, %ymm0, %ymm0
367 ; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
368 ; X32-AVX-NEXT: vpsrlq $32, %ymm1, %ymm1
369 ; X32-AVX-NEXT: vpaddq %ymm0, %ymm1, %ymm0
372 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 -1)
376 define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
377 ; AVX1-LABEL: testv8i32:
379 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
380 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
381 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
382 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm4
383 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
384 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
385 ; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
386 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm7
387 ; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
388 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm4
389 ; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
390 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm4
391 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
392 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4
393 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
394 ; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3
395 ; AVX1-NEXT: vpcmpeqw %xmm6, %xmm1, %xmm1
396 ; AVX1-NEXT: vpsrld $16, %xmm1, %xmm1
397 ; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1
398 ; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3
399 ; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1
400 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
401 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
402 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
403 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm5
404 ; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
405 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2
406 ; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
407 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm3
408 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
409 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm3
410 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
411 ; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
412 ; AVX1-NEXT: vpcmpeqw %xmm6, %xmm0, %xmm0
413 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
414 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
415 ; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
416 ; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm0
417 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
420 ; AVX2-LABEL: testv8i32:
422 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
423 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
424 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
425 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
426 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
427 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
428 ; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
429 ; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1
430 ; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
431 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
432 ; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
433 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2
434 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
435 ; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1
436 ; AVX2-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
437 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
438 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
439 ; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1
440 ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
443 ; AVX512VL-LABEL: testv8i32:
445 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
446 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
447 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
448 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
449 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
450 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
451 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
452 ; AVX512VL-NEXT: vpshufb %ymm3, %ymm1, %ymm1
453 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
454 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
455 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
456 ; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2
457 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
458 ; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1
459 ; AVX512VL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
460 ; AVX512VL-NEXT: vpsrld $16, %ymm0, %ymm0
461 ; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0
462 ; AVX512VL-NEXT: vpsrld $16, %ymm1, %ymm1
463 ; AVX512VL-NEXT: vpaddd %ymm0, %ymm1, %ymm0
464 ; AVX512VL-NEXT: retq
466 ; AVX512VLBWDQ-LABEL: testv8i32:
467 ; AVX512VLBWDQ: # %bb.0:
468 ; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
469 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
470 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
471 ; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
472 ; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
473 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
474 ; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
475 ; AVX512VLBWDQ-NEXT: vpshufb %ymm3, %ymm1, %ymm1
476 ; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1
477 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
478 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm2, %ymm2
479 ; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2
480 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1
481 ; AVX512VLBWDQ-NEXT: vpaddw %ymm2, %ymm1, %ymm1
482 ; AVX512VLBWDQ-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
483 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm0, %ymm0
484 ; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0
485 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm1, %ymm1
486 ; AVX512VLBWDQ-NEXT: vpaddd %ymm0, %ymm1, %ymm0
487 ; AVX512VLBWDQ-NEXT: retq
489 ; AVX512VLCD-LABEL: testv8i32:
490 ; AVX512VLCD: # %bb.0:
491 ; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0
492 ; AVX512VLCD-NEXT: retq
494 ; AVX512CD-LABEL: testv8i32:
496 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
497 ; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
498 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
499 ; AVX512CD-NEXT: retq
501 ; X32-AVX-LABEL: testv8i32:
503 ; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
504 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
505 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm3
506 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm3, %ymm3
507 ; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
508 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
509 ; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
510 ; X32-AVX-NEXT: vpshufb %ymm3, %ymm1, %ymm1
511 ; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1
512 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
513 ; X32-AVX-NEXT: vpsrlw $8, %ymm2, %ymm2
514 ; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2
515 ; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1
516 ; X32-AVX-NEXT: vpaddw %ymm2, %ymm1, %ymm1
517 ; X32-AVX-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
518 ; X32-AVX-NEXT: vpsrld $16, %ymm0, %ymm0
519 ; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
520 ; X32-AVX-NEXT: vpsrld $16, %ymm1, %ymm1
521 ; X32-AVX-NEXT: vpaddd %ymm0, %ymm1, %ymm0
524 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 0)
528 define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
529 ; AVX1-LABEL: testv8i32u:
531 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
532 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
533 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
534 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm4
535 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
536 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
537 ; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
538 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm7
539 ; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
540 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm4
541 ; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
542 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm4
543 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
544 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4
545 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
546 ; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3
547 ; AVX1-NEXT: vpcmpeqw %xmm6, %xmm1, %xmm1
548 ; AVX1-NEXT: vpsrld $16, %xmm1, %xmm1
549 ; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1
550 ; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3
551 ; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1
552 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
553 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
554 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
555 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm5
556 ; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
557 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2
558 ; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
559 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm3
560 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
561 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm3
562 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
563 ; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
564 ; AVX1-NEXT: vpcmpeqw %xmm6, %xmm0, %xmm0
565 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
566 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
567 ; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
568 ; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm0
569 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
572 ; AVX2-LABEL: testv8i32u:
574 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
575 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
576 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
577 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
578 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
579 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
580 ; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
581 ; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1
582 ; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
583 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
584 ; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
585 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2
586 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
587 ; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1
588 ; AVX2-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
589 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
590 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
591 ; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1
592 ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
595 ; AVX512VL-LABEL: testv8i32u:
597 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
598 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
599 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
600 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
601 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
602 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
603 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
604 ; AVX512VL-NEXT: vpshufb %ymm3, %ymm1, %ymm1
605 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
606 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
607 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
608 ; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2
609 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
610 ; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1
611 ; AVX512VL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
612 ; AVX512VL-NEXT: vpsrld $16, %ymm0, %ymm0
613 ; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0
614 ; AVX512VL-NEXT: vpsrld $16, %ymm1, %ymm1
615 ; AVX512VL-NEXT: vpaddd %ymm0, %ymm1, %ymm0
616 ; AVX512VL-NEXT: retq
618 ; AVX512VLBWDQ-LABEL: testv8i32u:
619 ; AVX512VLBWDQ: # %bb.0:
620 ; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
621 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
622 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
623 ; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
624 ; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
625 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
626 ; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
627 ; AVX512VLBWDQ-NEXT: vpshufb %ymm3, %ymm1, %ymm1
628 ; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1
629 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
630 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm2, %ymm2
631 ; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2
632 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1
633 ; AVX512VLBWDQ-NEXT: vpaddw %ymm2, %ymm1, %ymm1
634 ; AVX512VLBWDQ-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
635 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm0, %ymm0
636 ; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0
637 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm1, %ymm1
638 ; AVX512VLBWDQ-NEXT: vpaddd %ymm0, %ymm1, %ymm0
639 ; AVX512VLBWDQ-NEXT: retq
641 ; AVX512VLCD-LABEL: testv8i32u:
642 ; AVX512VLCD: # %bb.0:
643 ; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0
644 ; AVX512VLCD-NEXT: retq
646 ; AVX512CD-LABEL: testv8i32u:
648 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
649 ; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
650 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
651 ; AVX512CD-NEXT: retq
653 ; X32-AVX-LABEL: testv8i32u:
655 ; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
656 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
657 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm3
658 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm3, %ymm3
659 ; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
660 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
661 ; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
662 ; X32-AVX-NEXT: vpshufb %ymm3, %ymm1, %ymm1
663 ; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1
664 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
665 ; X32-AVX-NEXT: vpsrlw $8, %ymm2, %ymm2
666 ; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2
667 ; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1
668 ; X32-AVX-NEXT: vpaddw %ymm2, %ymm1, %ymm1
669 ; X32-AVX-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
670 ; X32-AVX-NEXT: vpsrld $16, %ymm0, %ymm0
671 ; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
672 ; X32-AVX-NEXT: vpsrld $16, %ymm1, %ymm1
673 ; X32-AVX-NEXT: vpaddd %ymm0, %ymm1, %ymm0
676 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 -1)
680 define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
681 ; AVX1-LABEL: testv16i16:
683 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
684 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
685 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
686 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm4
687 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
688 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
689 ; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
690 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm7
691 ; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
692 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm4
693 ; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
694 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm1
695 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
696 ; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1
697 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
698 ; AVX1-NEXT: vpaddw %xmm1, %xmm3, %xmm1
699 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
700 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
701 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
702 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm5
703 ; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
704 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2
705 ; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
706 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm0
707 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
708 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
709 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
710 ; AVX1-NEXT: vpaddw %xmm0, %xmm2, %xmm0
711 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
714 ; AVX2-LABEL: testv16i16:
716 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
717 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
718 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
719 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
720 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
721 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
722 ; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
723 ; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1
724 ; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
725 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
726 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
727 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
728 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
729 ; AVX2-NEXT: vpaddw %ymm0, %ymm1, %ymm0
732 ; AVX512VL-LABEL: testv16i16:
734 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
735 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
736 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
737 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
738 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
739 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
740 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
741 ; AVX512VL-NEXT: vpshufb %ymm3, %ymm1, %ymm1
742 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
743 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
744 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
745 ; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0
746 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
747 ; AVX512VL-NEXT: vpaddw %ymm0, %ymm1, %ymm0
748 ; AVX512VL-NEXT: retq
750 ; AVX512VLBWDQ-LABEL: testv16i16:
751 ; AVX512VLBWDQ: # %bb.0:
752 ; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
753 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
754 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
755 ; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
756 ; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
757 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
758 ; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
759 ; AVX512VLBWDQ-NEXT: vpshufb %ymm3, %ymm1, %ymm1
760 ; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1
761 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
762 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm0, %ymm0
763 ; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0
764 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1
765 ; AVX512VLBWDQ-NEXT: vpaddw %ymm0, %ymm1, %ymm0
766 ; AVX512VLBWDQ-NEXT: retq
768 ; AVX512-LABEL: testv16i16:
770 ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
771 ; AVX512-NEXT: vplzcntd %zmm0, %zmm0
772 ; AVX512-NEXT: vpmovdw %zmm0, %ymm0
773 ; AVX512-NEXT: vpsubw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
776 ; X32-AVX-LABEL: testv16i16:
778 ; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
779 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
780 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm3
781 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm3, %ymm3
782 ; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
783 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
784 ; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
785 ; X32-AVX-NEXT: vpshufb %ymm3, %ymm1, %ymm1
786 ; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1
787 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
788 ; X32-AVX-NEXT: vpsrlw $8, %ymm0, %ymm0
789 ; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
790 ; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1
791 ; X32-AVX-NEXT: vpaddw %ymm0, %ymm1, %ymm0
793 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 0)
797 define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
798 ; AVX1-LABEL: testv16i16u:
800 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
801 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
802 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
803 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm4
804 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
805 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
806 ; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
807 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm7
808 ; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
809 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm4
810 ; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
811 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm1
812 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
813 ; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1
814 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
815 ; AVX1-NEXT: vpaddw %xmm1, %xmm3, %xmm1
816 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
817 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
818 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
819 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm5
820 ; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
821 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2
822 ; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
823 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm0
824 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
825 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
826 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
827 ; AVX1-NEXT: vpaddw %xmm0, %xmm2, %xmm0
828 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
831 ; AVX2-LABEL: testv16i16u:
833 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
834 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
835 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
836 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
837 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
838 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
839 ; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
840 ; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1
841 ; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
842 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
843 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
844 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
845 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
846 ; AVX2-NEXT: vpaddw %ymm0, %ymm1, %ymm0
849 ; AVX512VL-LABEL: testv16i16u:
851 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
852 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
853 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
854 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
855 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
856 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
857 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
858 ; AVX512VL-NEXT: vpshufb %ymm3, %ymm1, %ymm1
859 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
860 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
861 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
862 ; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0
863 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
864 ; AVX512VL-NEXT: vpaddw %ymm0, %ymm1, %ymm0
865 ; AVX512VL-NEXT: retq
867 ; AVX512VLBWDQ-LABEL: testv16i16u:
868 ; AVX512VLBWDQ: # %bb.0:
869 ; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
870 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
871 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
872 ; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
873 ; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
874 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
875 ; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
876 ; AVX512VLBWDQ-NEXT: vpshufb %ymm3, %ymm1, %ymm1
877 ; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1
878 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
879 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm0, %ymm0
880 ; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0
881 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1
882 ; AVX512VLBWDQ-NEXT: vpaddw %ymm0, %ymm1, %ymm0
883 ; AVX512VLBWDQ-NEXT: retq
885 ; AVX512-LABEL: testv16i16u:
887 ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
888 ; AVX512-NEXT: vplzcntd %zmm0, %zmm0
889 ; AVX512-NEXT: vpmovdw %zmm0, %ymm0
890 ; AVX512-NEXT: vpsubw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
893 ; X32-AVX-LABEL: testv16i16u:
895 ; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
896 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
897 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm3
898 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm3, %ymm3
899 ; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
900 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
901 ; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
902 ; X32-AVX-NEXT: vpshufb %ymm3, %ymm1, %ymm1
903 ; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1
904 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
905 ; X32-AVX-NEXT: vpsrlw $8, %ymm0, %ymm0
906 ; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
907 ; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1
908 ; X32-AVX-NEXT: vpaddw %ymm0, %ymm1, %ymm0
910 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 -1)
914 define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
915 ; AVX1-LABEL: testv32i8:
917 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
918 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
919 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
920 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
921 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
922 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
923 ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
924 ; AVX1-NEXT: vpcmpeqb %xmm5, %xmm1, %xmm6
925 ; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
926 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1
927 ; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1
928 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
929 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
930 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
931 ; AVX1-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm4
932 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
933 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm0
934 ; AVX1-NEXT: vpaddb %xmm0, %xmm3, %xmm0
935 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
938 ; AVX2-LABEL: testv32i8:
940 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
941 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
942 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
943 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
944 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
945 ; AVX2-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
946 ; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
947 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm0
948 ; AVX2-NEXT: vpaddb %ymm0, %ymm2, %ymm0
951 ; AVX512VL-LABEL: testv32i8:
953 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
954 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
955 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
956 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
957 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
958 ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
959 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
960 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm0
961 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm2, %ymm0
962 ; AVX512VL-NEXT: retq
964 ; AVX512VLBWDQ-LABEL: testv32i8:
965 ; AVX512VLBWDQ: # %bb.0:
966 ; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
967 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
968 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm0
969 ; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
970 ; AVX512VLBWDQ-NEXT: vpxor %xmm3, %xmm3, %xmm3
971 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
972 ; AVX512VLBWDQ-NEXT: vpand %ymm3, %ymm2, %ymm2
973 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm0
974 ; AVX512VLBWDQ-NEXT: vpaddb %ymm0, %ymm2, %ymm0
975 ; AVX512VLBWDQ-NEXT: retq
977 ; AVX512-LABEL: testv32i8:
979 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
980 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
981 ; AVX512-NEXT: vplzcntd %zmm1, %zmm1
982 ; AVX512-NEXT: vpmovdb %zmm1, %xmm1
983 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
984 ; AVX512-NEXT: vpsubb %xmm2, %xmm1, %xmm1
985 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
986 ; AVX512-NEXT: vplzcntd %zmm0, %zmm0
987 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
988 ; AVX512-NEXT: vpsubb %xmm2, %xmm0, %xmm0
989 ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
992 ; X32-AVX-LABEL: testv32i8:
994 ; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
995 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
996 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
997 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
998 ; X32-AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
999 ; X32-AVX-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
1000 ; X32-AVX-NEXT: vpand %ymm3, %ymm2, %ymm2
1001 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm0
1002 ; X32-AVX-NEXT: vpaddb %ymm0, %ymm2, %ymm0
1003 ; X32-AVX-NEXT: retl
1004 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 0)
1008 define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
1009 ; AVX1-LABEL: testv32i8u:
1011 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1012 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1013 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
1014 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
1015 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1016 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
1017 ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
1018 ; AVX1-NEXT: vpcmpeqb %xmm5, %xmm1, %xmm6
1019 ; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
1020 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1
1021 ; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1
1022 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
1023 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1024 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
1025 ; AVX1-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm4
1026 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
1027 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm0
1028 ; AVX1-NEXT: vpaddb %xmm0, %xmm3, %xmm0
1029 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1032 ; AVX2-LABEL: testv32i8u:
1034 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1035 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
1036 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
1037 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1038 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
1039 ; AVX2-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
1040 ; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
1041 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm0
1042 ; AVX2-NEXT: vpaddb %ymm0, %ymm2, %ymm0
1045 ; AVX512VL-LABEL: testv32i8u:
1046 ; AVX512VL: # %bb.0:
1047 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1048 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
1049 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
1050 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1051 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
1052 ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
1053 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
1054 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm0
1055 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm2, %ymm0
1056 ; AVX512VL-NEXT: retq
1058 ; AVX512VLBWDQ-LABEL: testv32i8u:
1059 ; AVX512VLBWDQ: # %bb.0:
1060 ; AVX512VLBWDQ-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1061 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
1062 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm0
1063 ; AVX512VLBWDQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1064 ; AVX512VLBWDQ-NEXT: vpxor %xmm3, %xmm3, %xmm3
1065 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
1066 ; AVX512VLBWDQ-NEXT: vpand %ymm3, %ymm2, %ymm2
1067 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm0
1068 ; AVX512VLBWDQ-NEXT: vpaddb %ymm0, %ymm2, %ymm0
1069 ; AVX512VLBWDQ-NEXT: retq
1071 ; AVX512-LABEL: testv32i8u:
1073 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1074 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
1075 ; AVX512-NEXT: vplzcntd %zmm1, %zmm1
1076 ; AVX512-NEXT: vpmovdb %zmm1, %xmm1
1077 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
1078 ; AVX512-NEXT: vpsubb %xmm2, %xmm1, %xmm1
1079 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1080 ; AVX512-NEXT: vplzcntd %zmm0, %zmm0
1081 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
1082 ; AVX512-NEXT: vpsubb %xmm2, %xmm0, %xmm0
1083 ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1086 ; X32-AVX-LABEL: testv32i8u:
1088 ; X32-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1089 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
1090 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
1091 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
1092 ; X32-AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
1093 ; X32-AVX-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
1094 ; X32-AVX-NEXT: vpand %ymm3, %ymm2, %ymm2
1095 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm0
1096 ; X32-AVX-NEXT: vpaddb %ymm0, %ymm2, %ymm0
1097 ; X32-AVX-NEXT: retl
1098 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 -1)
1102 define <4 x i64> @foldv4i64() nounwind {
1103 ; X64-LABEL: foldv4i64:
1105 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
1108 ; X32-AVX-LABEL: foldv4i64:
1110 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,0,0,64,0,56,0]
1111 ; X32-AVX-NEXT: retl
1112 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
1116 define <4 x i64> @foldv4i64u() nounwind {
1117 ; X64-LABEL: foldv4i64u:
1119 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
1122 ; X32-AVX-LABEL: foldv4i64u:
1124 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,0,0,64,0,56,0]
1125 ; X32-AVX-NEXT: retl
1126 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
1130 define <8 x i32> @foldv8i32() nounwind {
1131 ; X64-LABEL: foldv8i32:
1133 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1136 ; X32-AVX-LABEL: foldv8i32:
1138 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1139 ; X32-AVX-NEXT: retl
1140 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
1144 define <8 x i32> @foldv8i32u() nounwind {
1145 ; X64-LABEL: foldv8i32u:
1147 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1150 ; X32-AVX-LABEL: foldv8i32u:
1152 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1153 ; X32-AVX-NEXT: retl
1154 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
1158 define <16 x i16> @foldv16i16() nounwind {
1159 ; X64-LABEL: foldv16i16:
1161 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1164 ; X32-AVX-LABEL: foldv16i16:
1166 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1167 ; X32-AVX-NEXT: retl
1168 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
1172 define <16 x i16> @foldv16i16u() nounwind {
1173 ; X64-LABEL: foldv16i16u:
1175 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1178 ; X32-AVX-LABEL: foldv16i16u:
1180 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1181 ; X32-AVX-NEXT: retl
1182 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
1186 define <32 x i8> @foldv32i8() nounwind {
1187 ; X64-LABEL: foldv32i8:
1189 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1192 ; X32-AVX-LABEL: foldv32i8:
1194 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1195 ; X32-AVX-NEXT: retl
1196 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
1200 define <32 x i8> @foldv32i8u() nounwind {
1201 ; X64-LABEL: foldv32i8u:
1203 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1206 ; X32-AVX-LABEL: foldv32i8u:
1208 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1209 ; X32-AVX-NEXT: retl
1210 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
1214 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
1215 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
1216 declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
1217 declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)