1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,AVX2
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64,AVX512VL
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=X64,AVX512VLBWDQ
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd,+avx512vl | FileCheck %s --check-prefixes=X64,AVX512,AVX512VLCD
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd | FileCheck %s --check-prefixes=X64,AVX512,AVX512CD
9 ; Just one 32-bit run to make sure we do reasonable things for i64 lzcnt.
10 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX
12 define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
13 ; AVX1-LABEL: testv4i64:
15 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
16 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
17 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm4
18 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm1
19 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20 ; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm6
21 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm6, %xmm7
23 ; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
24 ; AVX1-NEXT: vpshufb %xmm6, %xmm3, %xmm6
25 ; AVX1-NEXT: vpaddb %xmm6, %xmm4, %xmm4
26 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm6
27 ; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6
28 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm6
29 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
30 ; AVX1-NEXT: vpaddw %xmm6, %xmm4, %xmm4
31 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm6
32 ; AVX1-NEXT: vpsrld $16, %xmm6, %xmm6
33 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm6
34 ; AVX1-NEXT: vpsrld $16, %xmm4, %xmm4
35 ; AVX1-NEXT: vpaddd %xmm6, %xmm4, %xmm4
36 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm2
37 ; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2
38 ; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm2
39 ; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
40 ; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2
41 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm4
42 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm6
43 ; AVX1-NEXT: vpand %xmm5, %xmm6, %xmm5
44 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm5, %xmm6
45 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
46 ; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm3
47 ; AVX1-NEXT: vpaddb %xmm3, %xmm4, %xmm3
48 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm4
49 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
50 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4
51 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
52 ; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3
53 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm4
54 ; AVX1-NEXT: vpsrld $16, %xmm4, %xmm4
55 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4
56 ; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3
57 ; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3
58 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
59 ; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
60 ; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
61 ; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm1
62 ; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0
63 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
66 ; AVX2-LABEL: testv4i64:
68 ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
69 ; AVX2-NEXT: # ymm1 = mem[0,1,0,1]
70 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
71 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
72 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
73 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
74 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
75 ; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
76 ; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1
77 ; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
78 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
79 ; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
80 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2
81 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
82 ; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1
83 ; AVX2-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
84 ; AVX2-NEXT: vpsrld $16, %ymm2, %ymm2
85 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2
86 ; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1
87 ; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1
88 ; AVX2-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
89 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
90 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
91 ; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm1
92 ; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0
95 ; AVX512VL-LABEL: testv4i64:
97 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
98 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
99 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
100 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
101 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
102 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
103 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
104 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
105 ; AVX512VL-NEXT: vpshufb %ymm3, %ymm1, %ymm1
106 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
107 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
108 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
109 ; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2
110 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
111 ; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1
112 ; AVX512VL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
113 ; AVX512VL-NEXT: vpsrld $16, %ymm2, %ymm2
114 ; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2
115 ; AVX512VL-NEXT: vpsrld $16, %ymm1, %ymm1
116 ; AVX512VL-NEXT: vpaddd %ymm2, %ymm1, %ymm1
117 ; AVX512VL-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
118 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
119 ; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0
120 ; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm1
121 ; AVX512VL-NEXT: vpaddq %ymm0, %ymm1, %ymm0
122 ; AVX512VL-NEXT: retq
124 ; AVX512VLBWDQ-LABEL: testv4i64:
125 ; AVX512VLBWDQ: # %bb.0:
126 ; AVX512VLBWDQ-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
127 ; AVX512VLBWDQ-NEXT: # ymm1 = mem[0,1,0,1]
128 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
129 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
130 ; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
131 ; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
132 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
133 ; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
134 ; AVX512VLBWDQ-NEXT: vpshufb %ymm3, %ymm1, %ymm1
135 ; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1
136 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
137 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm2, %ymm2
138 ; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2
139 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1
140 ; AVX512VLBWDQ-NEXT: vpaddw %ymm2, %ymm1, %ymm1
141 ; AVX512VLBWDQ-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
142 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm2, %ymm2
143 ; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2
144 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm1, %ymm1
145 ; AVX512VLBWDQ-NEXT: vpaddd %ymm2, %ymm1, %ymm1
146 ; AVX512VLBWDQ-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
147 ; AVX512VLBWDQ-NEXT: vpsrlq $32, %ymm0, %ymm0
148 ; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0
149 ; AVX512VLBWDQ-NEXT: vpsrlq $32, %ymm1, %ymm1
150 ; AVX512VLBWDQ-NEXT: vpaddq %ymm0, %ymm1, %ymm0
151 ; AVX512VLBWDQ-NEXT: retq
153 ; AVX512VLCD-LABEL: testv4i64:
154 ; AVX512VLCD: # %bb.0:
155 ; AVX512VLCD-NEXT: vplzcntq %ymm0, %ymm0
156 ; AVX512VLCD-NEXT: retq
158 ; AVX512CD-LABEL: testv4i64:
160 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
161 ; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
162 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
163 ; AVX512CD-NEXT: retq
165 ; X32-AVX-LABEL: testv4i64:
167 ; X32-AVX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
168 ; X32-AVX-NEXT: # ymm1 = mem[0,1,0,1]
169 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
170 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm3
171 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm3, %ymm3
172 ; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
173 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
174 ; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
175 ; X32-AVX-NEXT: vpshufb %ymm3, %ymm1, %ymm1
176 ; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1
177 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
178 ; X32-AVX-NEXT: vpsrlw $8, %ymm2, %ymm2
179 ; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2
180 ; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1
181 ; X32-AVX-NEXT: vpaddw %ymm2, %ymm1, %ymm1
182 ; X32-AVX-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
183 ; X32-AVX-NEXT: vpsrld $16, %ymm2, %ymm2
184 ; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2
185 ; X32-AVX-NEXT: vpsrld $16, %ymm1, %ymm1
186 ; X32-AVX-NEXT: vpaddd %ymm2, %ymm1, %ymm1
187 ; X32-AVX-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
188 ; X32-AVX-NEXT: vpsrlq $32, %ymm0, %ymm0
189 ; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
190 ; X32-AVX-NEXT: vpsrlq $32, %ymm1, %ymm1
191 ; X32-AVX-NEXT: vpaddq %ymm0, %ymm1, %ymm0
194 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 0)
198 define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
199 ; AVX1-LABEL: testv4i64u:
201 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
202 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
203 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm4
204 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm1
205 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
206 ; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm6
207 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
208 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm6, %xmm7
209 ; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
210 ; AVX1-NEXT: vpshufb %xmm6, %xmm3, %xmm6
211 ; AVX1-NEXT: vpaddb %xmm6, %xmm4, %xmm4
212 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm6
213 ; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6
214 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm6
215 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
216 ; AVX1-NEXT: vpaddw %xmm6, %xmm4, %xmm4
217 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm6
218 ; AVX1-NEXT: vpsrld $16, %xmm6, %xmm6
219 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm6
220 ; AVX1-NEXT: vpsrld $16, %xmm4, %xmm4
221 ; AVX1-NEXT: vpaddd %xmm6, %xmm4, %xmm4
222 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm2
223 ; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2
224 ; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm2
225 ; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
226 ; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2
227 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm4
228 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm6
229 ; AVX1-NEXT: vpand %xmm5, %xmm6, %xmm5
230 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm5, %xmm6
231 ; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
232 ; AVX1-NEXT: vpshufb %xmm5, %xmm3, %xmm3
233 ; AVX1-NEXT: vpaddb %xmm3, %xmm4, %xmm3
234 ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm4
235 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
236 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4
237 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
238 ; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3
239 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm4
240 ; AVX1-NEXT: vpsrld $16, %xmm4, %xmm4
241 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4
242 ; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3
243 ; AVX1-NEXT: vpaddd %xmm4, %xmm3, %xmm3
244 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
245 ; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
246 ; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
247 ; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm1
248 ; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0
249 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
252 ; AVX2-LABEL: testv4i64u:
254 ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
255 ; AVX2-NEXT: # ymm1 = mem[0,1,0,1]
256 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
257 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
258 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
259 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
260 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
261 ; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
262 ; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1
263 ; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
264 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
265 ; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
266 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2
267 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
268 ; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1
269 ; AVX2-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
270 ; AVX2-NEXT: vpsrld $16, %ymm2, %ymm2
271 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2
272 ; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1
273 ; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1
274 ; AVX2-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
275 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
276 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
277 ; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm1
278 ; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0
281 ; AVX512VL-LABEL: testv4i64u:
283 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
284 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
285 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
286 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
287 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
288 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
289 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
290 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
291 ; AVX512VL-NEXT: vpshufb %ymm3, %ymm1, %ymm1
292 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
293 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
294 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
295 ; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2
296 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
297 ; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1
298 ; AVX512VL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
299 ; AVX512VL-NEXT: vpsrld $16, %ymm2, %ymm2
300 ; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2
301 ; AVX512VL-NEXT: vpsrld $16, %ymm1, %ymm1
302 ; AVX512VL-NEXT: vpaddd %ymm2, %ymm1, %ymm1
303 ; AVX512VL-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
304 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
305 ; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0
306 ; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm1
307 ; AVX512VL-NEXT: vpaddq %ymm0, %ymm1, %ymm0
308 ; AVX512VL-NEXT: retq
310 ; AVX512VLBWDQ-LABEL: testv4i64u:
311 ; AVX512VLBWDQ: # %bb.0:
312 ; AVX512VLBWDQ-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
313 ; AVX512VLBWDQ-NEXT: # ymm1 = mem[0,1,0,1]
314 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
315 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
316 ; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
317 ; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
318 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
319 ; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
320 ; AVX512VLBWDQ-NEXT: vpshufb %ymm3, %ymm1, %ymm1
321 ; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1
322 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
323 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm2, %ymm2
324 ; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2
325 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1
326 ; AVX512VLBWDQ-NEXT: vpaddw %ymm2, %ymm1, %ymm1
327 ; AVX512VLBWDQ-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
328 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm2, %ymm2
329 ; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2
330 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm1, %ymm1
331 ; AVX512VLBWDQ-NEXT: vpaddd %ymm2, %ymm1, %ymm1
332 ; AVX512VLBWDQ-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
333 ; AVX512VLBWDQ-NEXT: vpsrlq $32, %ymm0, %ymm0
334 ; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0
335 ; AVX512VLBWDQ-NEXT: vpsrlq $32, %ymm1, %ymm1
336 ; AVX512VLBWDQ-NEXT: vpaddq %ymm0, %ymm1, %ymm0
337 ; AVX512VLBWDQ-NEXT: retq
339 ; AVX512VLCD-LABEL: testv4i64u:
340 ; AVX512VLCD: # %bb.0:
341 ; AVX512VLCD-NEXT: vplzcntq %ymm0, %ymm0
342 ; AVX512VLCD-NEXT: retq
344 ; AVX512CD-LABEL: testv4i64u:
346 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
347 ; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
348 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
349 ; AVX512CD-NEXT: retq
351 ; X32-AVX-LABEL: testv4i64u:
353 ; X32-AVX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
354 ; X32-AVX-NEXT: # ymm1 = mem[0,1,0,1]
355 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
356 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm3
357 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm3, %ymm3
358 ; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
359 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
360 ; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
361 ; X32-AVX-NEXT: vpshufb %ymm3, %ymm1, %ymm1
362 ; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1
363 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
364 ; X32-AVX-NEXT: vpsrlw $8, %ymm2, %ymm2
365 ; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2
366 ; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1
367 ; X32-AVX-NEXT: vpaddw %ymm2, %ymm1, %ymm1
368 ; X32-AVX-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm2
369 ; X32-AVX-NEXT: vpsrld $16, %ymm2, %ymm2
370 ; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2
371 ; X32-AVX-NEXT: vpsrld $16, %ymm1, %ymm1
372 ; X32-AVX-NEXT: vpaddd %ymm2, %ymm1, %ymm1
373 ; X32-AVX-NEXT: vpcmpeqd %ymm4, %ymm0, %ymm0
374 ; X32-AVX-NEXT: vpsrlq $32, %ymm0, %ymm0
375 ; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
376 ; X32-AVX-NEXT: vpsrlq $32, %ymm1, %ymm1
377 ; X32-AVX-NEXT: vpaddq %ymm0, %ymm1, %ymm0
380 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 -1)
384 define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
385 ; AVX1-LABEL: testv8i32:
387 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
388 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
389 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
390 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm4
391 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
392 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
393 ; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
394 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm7
395 ; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
396 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm4
397 ; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
398 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm4
399 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
400 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4
401 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
402 ; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3
403 ; AVX1-NEXT: vpcmpeqw %xmm6, %xmm1, %xmm1
404 ; AVX1-NEXT: vpsrld $16, %xmm1, %xmm1
405 ; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1
406 ; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3
407 ; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1
408 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
409 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
410 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
411 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm5
412 ; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
413 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2
414 ; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
415 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm3
416 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
417 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm3
418 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
419 ; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
420 ; AVX1-NEXT: vpcmpeqw %xmm6, %xmm0, %xmm0
421 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
422 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
423 ; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
424 ; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm0
425 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
428 ; AVX2-LABEL: testv8i32:
430 ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
431 ; AVX2-NEXT: # ymm1 = mem[0,1,0,1]
432 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
433 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
434 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
435 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
436 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
437 ; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
438 ; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1
439 ; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
440 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
441 ; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
442 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2
443 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
444 ; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1
445 ; AVX2-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
446 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
447 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
448 ; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1
449 ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
452 ; AVX512VL-LABEL: testv8i32:
454 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
455 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
456 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
457 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
458 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
459 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
460 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
461 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
462 ; AVX512VL-NEXT: vpshufb %ymm3, %ymm1, %ymm1
463 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
464 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
465 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
466 ; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2
467 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
468 ; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1
469 ; AVX512VL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
470 ; AVX512VL-NEXT: vpsrld $16, %ymm0, %ymm0
471 ; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0
472 ; AVX512VL-NEXT: vpsrld $16, %ymm1, %ymm1
473 ; AVX512VL-NEXT: vpaddd %ymm0, %ymm1, %ymm0
474 ; AVX512VL-NEXT: retq
476 ; AVX512VLBWDQ-LABEL: testv8i32:
477 ; AVX512VLBWDQ: # %bb.0:
478 ; AVX512VLBWDQ-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
479 ; AVX512VLBWDQ-NEXT: # ymm1 = mem[0,1,0,1]
480 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
481 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
482 ; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
483 ; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
484 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
485 ; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
486 ; AVX512VLBWDQ-NEXT: vpshufb %ymm3, %ymm1, %ymm1
487 ; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1
488 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
489 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm2, %ymm2
490 ; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2
491 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1
492 ; AVX512VLBWDQ-NEXT: vpaddw %ymm2, %ymm1, %ymm1
493 ; AVX512VLBWDQ-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
494 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm0, %ymm0
495 ; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0
496 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm1, %ymm1
497 ; AVX512VLBWDQ-NEXT: vpaddd %ymm0, %ymm1, %ymm0
498 ; AVX512VLBWDQ-NEXT: retq
500 ; AVX512VLCD-LABEL: testv8i32:
501 ; AVX512VLCD: # %bb.0:
502 ; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0
503 ; AVX512VLCD-NEXT: retq
505 ; AVX512CD-LABEL: testv8i32:
507 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
508 ; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
509 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
510 ; AVX512CD-NEXT: retq
512 ; X32-AVX-LABEL: testv8i32:
514 ; X32-AVX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
515 ; X32-AVX-NEXT: # ymm1 = mem[0,1,0,1]
516 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
517 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm3
518 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm3, %ymm3
519 ; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
520 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
521 ; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
522 ; X32-AVX-NEXT: vpshufb %ymm3, %ymm1, %ymm1
523 ; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1
524 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
525 ; X32-AVX-NEXT: vpsrlw $8, %ymm2, %ymm2
526 ; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2
527 ; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1
528 ; X32-AVX-NEXT: vpaddw %ymm2, %ymm1, %ymm1
529 ; X32-AVX-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
530 ; X32-AVX-NEXT: vpsrld $16, %ymm0, %ymm0
531 ; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
532 ; X32-AVX-NEXT: vpsrld $16, %ymm1, %ymm1
533 ; X32-AVX-NEXT: vpaddd %ymm0, %ymm1, %ymm0
536 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 0)
540 define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
541 ; AVX1-LABEL: testv8i32u:
543 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
544 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
545 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
546 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm4
547 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
548 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
549 ; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
550 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm7
551 ; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
552 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm4
553 ; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
554 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm4
555 ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
556 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm4
557 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
558 ; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3
559 ; AVX1-NEXT: vpcmpeqw %xmm6, %xmm1, %xmm1
560 ; AVX1-NEXT: vpsrld $16, %xmm1, %xmm1
561 ; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1
562 ; AVX1-NEXT: vpsrld $16, %xmm3, %xmm3
563 ; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1
564 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
565 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
566 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
567 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm5
568 ; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
569 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2
570 ; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
571 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm3
572 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
573 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm3
574 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
575 ; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
576 ; AVX1-NEXT: vpcmpeqw %xmm6, %xmm0, %xmm0
577 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
578 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
579 ; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
580 ; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm0
581 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
584 ; AVX2-LABEL: testv8i32u:
586 ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
587 ; AVX2-NEXT: # ymm1 = mem[0,1,0,1]
588 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
589 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
590 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
591 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
592 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
593 ; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
594 ; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1
595 ; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
596 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
597 ; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
598 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2
599 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
600 ; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1
601 ; AVX2-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
602 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
603 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
604 ; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1
605 ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
608 ; AVX512VL-LABEL: testv8i32u:
610 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
611 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
612 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
613 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
614 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
615 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
616 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
617 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
618 ; AVX512VL-NEXT: vpshufb %ymm3, %ymm1, %ymm1
619 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
620 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
621 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2
622 ; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm2
623 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
624 ; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1
625 ; AVX512VL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
626 ; AVX512VL-NEXT: vpsrld $16, %ymm0, %ymm0
627 ; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0
628 ; AVX512VL-NEXT: vpsrld $16, %ymm1, %ymm1
629 ; AVX512VL-NEXT: vpaddd %ymm0, %ymm1, %ymm0
630 ; AVX512VL-NEXT: retq
632 ; AVX512VLBWDQ-LABEL: testv8i32u:
633 ; AVX512VLBWDQ: # %bb.0:
634 ; AVX512VLBWDQ-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
635 ; AVX512VLBWDQ-NEXT: # ymm1 = mem[0,1,0,1]
636 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
637 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
638 ; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
639 ; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
640 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
641 ; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
642 ; AVX512VLBWDQ-NEXT: vpshufb %ymm3, %ymm1, %ymm1
643 ; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1
644 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
645 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm2, %ymm2
646 ; AVX512VLBWDQ-NEXT: vpand %ymm2, %ymm1, %ymm2
647 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1
648 ; AVX512VLBWDQ-NEXT: vpaddw %ymm2, %ymm1, %ymm1
649 ; AVX512VLBWDQ-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
650 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm0, %ymm0
651 ; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0
652 ; AVX512VLBWDQ-NEXT: vpsrld $16, %ymm1, %ymm1
653 ; AVX512VLBWDQ-NEXT: vpaddd %ymm0, %ymm1, %ymm0
654 ; AVX512VLBWDQ-NEXT: retq
656 ; AVX512VLCD-LABEL: testv8i32u:
657 ; AVX512VLCD: # %bb.0:
658 ; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0
659 ; AVX512VLCD-NEXT: retq
661 ; AVX512CD-LABEL: testv8i32u:
663 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
664 ; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
665 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
666 ; AVX512CD-NEXT: retq
668 ; X32-AVX-LABEL: testv8i32u:
670 ; X32-AVX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
671 ; X32-AVX-NEXT: # ymm1 = mem[0,1,0,1]
672 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
673 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm3
674 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm3, %ymm3
675 ; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
676 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
677 ; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
678 ; X32-AVX-NEXT: vpshufb %ymm3, %ymm1, %ymm1
679 ; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1
680 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm2
681 ; X32-AVX-NEXT: vpsrlw $8, %ymm2, %ymm2
682 ; X32-AVX-NEXT: vpand %ymm2, %ymm1, %ymm2
683 ; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1
684 ; X32-AVX-NEXT: vpaddw %ymm2, %ymm1, %ymm1
685 ; X32-AVX-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
686 ; X32-AVX-NEXT: vpsrld $16, %ymm0, %ymm0
687 ; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
688 ; X32-AVX-NEXT: vpsrld $16, %ymm1, %ymm1
689 ; X32-AVX-NEXT: vpaddd %ymm0, %ymm1, %ymm0
692 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 -1)
696 define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
697 ; AVX1-LABEL: testv16i16:
699 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
700 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
701 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
702 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm4
703 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
704 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
705 ; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
706 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm7
707 ; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
708 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm4
709 ; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
710 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm1
711 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
712 ; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1
713 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
714 ; AVX1-NEXT: vpaddw %xmm1, %xmm3, %xmm1
715 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
716 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
717 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
718 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm5
719 ; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
720 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2
721 ; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
722 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm0
723 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
724 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
725 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
726 ; AVX1-NEXT: vpaddw %xmm0, %xmm2, %xmm0
727 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
730 ; AVX2-LABEL: testv16i16:
732 ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
733 ; AVX2-NEXT: # ymm1 = mem[0,1,0,1]
734 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
735 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
736 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
737 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
738 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
739 ; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
740 ; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1
741 ; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
742 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
743 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
744 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
745 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
746 ; AVX2-NEXT: vpaddw %ymm0, %ymm1, %ymm0
749 ; AVX512VL-LABEL: testv16i16:
751 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
752 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
753 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
754 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
755 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
756 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
757 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
758 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
759 ; AVX512VL-NEXT: vpshufb %ymm3, %ymm1, %ymm1
760 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
761 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
762 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
763 ; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0
764 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
765 ; AVX512VL-NEXT: vpaddw %ymm0, %ymm1, %ymm0
766 ; AVX512VL-NEXT: retq
768 ; AVX512VLBWDQ-LABEL: testv16i16:
769 ; AVX512VLBWDQ: # %bb.0:
770 ; AVX512VLBWDQ-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
771 ; AVX512VLBWDQ-NEXT: # ymm1 = mem[0,1,0,1]
772 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
773 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
774 ; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
775 ; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
776 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
777 ; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
778 ; AVX512VLBWDQ-NEXT: vpshufb %ymm3, %ymm1, %ymm1
779 ; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1
780 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
781 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm0, %ymm0
782 ; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0
783 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1
784 ; AVX512VLBWDQ-NEXT: vpaddw %ymm0, %ymm1, %ymm0
785 ; AVX512VLBWDQ-NEXT: retq
787 ; AVX512-LABEL: testv16i16:
789 ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
790 ; AVX512-NEXT: vplzcntd %zmm0, %zmm0
791 ; AVX512-NEXT: vpmovdw %zmm0, %ymm0
792 ; AVX512-NEXT: vpsubw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
795 ; X32-AVX-LABEL: testv16i16:
797 ; X32-AVX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
798 ; X32-AVX-NEXT: # ymm1 = mem[0,1,0,1]
799 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
800 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm3
801 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm3, %ymm3
802 ; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
803 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
804 ; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
805 ; X32-AVX-NEXT: vpshufb %ymm3, %ymm1, %ymm1
806 ; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1
807 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
808 ; X32-AVX-NEXT: vpsrlw $8, %ymm0, %ymm0
809 ; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
810 ; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1
811 ; X32-AVX-NEXT: vpaddw %ymm0, %ymm1, %ymm0
813 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 0)
817 define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
818 ; AVX1-LABEL: testv16i16u:
820 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
821 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
822 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
823 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm4
824 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
825 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
826 ; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
827 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm7
828 ; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
829 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm4
830 ; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
831 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm1
832 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
833 ; AVX1-NEXT: vpand %xmm1, %xmm3, %xmm1
834 ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
835 ; AVX1-NEXT: vpaddw %xmm1, %xmm3, %xmm1
836 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
837 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
838 ; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
839 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm4, %xmm5
840 ; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
841 ; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2
842 ; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
843 ; AVX1-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm0
844 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
845 ; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
846 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
847 ; AVX1-NEXT: vpaddw %xmm0, %xmm2, %xmm0
848 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
851 ; AVX2-LABEL: testv16i16u:
853 ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
854 ; AVX2-NEXT: # ymm1 = mem[0,1,0,1]
855 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
856 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
857 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
858 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
859 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
860 ; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
861 ; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1
862 ; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
863 ; AVX2-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
864 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
865 ; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
866 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
867 ; AVX2-NEXT: vpaddw %ymm0, %ymm1, %ymm0
870 ; AVX512VL-LABEL: testv16i16u:
872 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
873 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
874 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
875 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
876 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
877 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4
878 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
879 ; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2
880 ; AVX512VL-NEXT: vpshufb %ymm3, %ymm1, %ymm1
881 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
882 ; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
883 ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
884 ; AVX512VL-NEXT: vpand %ymm0, %ymm1, %ymm0
885 ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1
886 ; AVX512VL-NEXT: vpaddw %ymm0, %ymm1, %ymm0
887 ; AVX512VL-NEXT: retq
889 ; AVX512VLBWDQ-LABEL: testv16i16u:
890 ; AVX512VLBWDQ: # %bb.0:
891 ; AVX512VLBWDQ-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
892 ; AVX512VLBWDQ-NEXT: # ymm1 = mem[0,1,0,1]
893 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
894 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm3
895 ; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm3, %ymm3
896 ; AVX512VLBWDQ-NEXT: vpxor %xmm4, %xmm4, %xmm4
897 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
898 ; AVX512VLBWDQ-NEXT: vpand %ymm5, %ymm2, %ymm2
899 ; AVX512VLBWDQ-NEXT: vpshufb %ymm3, %ymm1, %ymm1
900 ; AVX512VLBWDQ-NEXT: vpaddb %ymm1, %ymm2, %ymm1
901 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
902 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm0, %ymm0
903 ; AVX512VLBWDQ-NEXT: vpand %ymm0, %ymm1, %ymm0
904 ; AVX512VLBWDQ-NEXT: vpsrlw $8, %ymm1, %ymm1
905 ; AVX512VLBWDQ-NEXT: vpaddw %ymm0, %ymm1, %ymm0
906 ; AVX512VLBWDQ-NEXT: retq
908 ; AVX512-LABEL: testv16i16u:
910 ; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
911 ; AVX512-NEXT: vplzcntd %zmm0, %zmm0
912 ; AVX512-NEXT: vpmovdw %zmm0, %ymm0
913 ; AVX512-NEXT: vpsubw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
916 ; X32-AVX-LABEL: testv16i16u:
918 ; X32-AVX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
919 ; X32-AVX-NEXT: # ymm1 = mem[0,1,0,1]
920 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
921 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm3
922 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm3, %ymm3
923 ; X32-AVX-NEXT: vpxor %xmm4, %xmm4, %xmm4
924 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm5
925 ; X32-AVX-NEXT: vpand %ymm5, %ymm2, %ymm2
926 ; X32-AVX-NEXT: vpshufb %ymm3, %ymm1, %ymm1
927 ; X32-AVX-NEXT: vpaddb %ymm1, %ymm2, %ymm1
928 ; X32-AVX-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0
929 ; X32-AVX-NEXT: vpsrlw $8, %ymm0, %ymm0
930 ; X32-AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
931 ; X32-AVX-NEXT: vpsrlw $8, %ymm1, %ymm1
932 ; X32-AVX-NEXT: vpaddw %ymm0, %ymm1, %ymm0
934 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 -1)
938 define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
939 ; AVX1-LABEL: testv32i8:
941 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
942 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
943 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
944 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
945 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
946 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
947 ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
948 ; AVX1-NEXT: vpcmpeqb %xmm5, %xmm1, %xmm6
949 ; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
950 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1
951 ; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1
952 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
953 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
954 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
955 ; AVX1-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm4
956 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
957 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm0
958 ; AVX1-NEXT: vpaddb %xmm0, %xmm3, %xmm0
959 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
962 ; AVX2-LABEL: testv32i8:
964 ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
965 ; AVX2-NEXT: # ymm1 = mem[0,1,0,1]
966 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
967 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
968 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
969 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
970 ; AVX2-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
971 ; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
972 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm0
973 ; AVX2-NEXT: vpaddb %ymm0, %ymm2, %ymm0
976 ; AVX512VL-LABEL: testv32i8:
978 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
979 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
980 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
981 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
982 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
983 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
984 ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
985 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
986 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm0
987 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm2, %ymm0
988 ; AVX512VL-NEXT: retq
990 ; AVX512VLBWDQ-LABEL: testv32i8:
991 ; AVX512VLBWDQ: # %bb.0:
992 ; AVX512VLBWDQ-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
993 ; AVX512VLBWDQ-NEXT: # ymm1 = mem[0,1,0,1]
994 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
995 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm0
996 ; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
997 ; AVX512VLBWDQ-NEXT: vpxor %xmm3, %xmm3, %xmm3
998 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
999 ; AVX512VLBWDQ-NEXT: vpand %ymm3, %ymm2, %ymm2
1000 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm0
1001 ; AVX512VLBWDQ-NEXT: vpaddb %ymm0, %ymm2, %ymm0
1002 ; AVX512VLBWDQ-NEXT: retq
1004 ; AVX512-LABEL: testv32i8:
1006 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1007 ; AVX512-NEXT: vplzcntd %zmm1, %zmm1
1008 ; AVX512-NEXT: vpmovdb %zmm1, %xmm1
1009 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
1010 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1011 ; AVX512-NEXT: vplzcntd %zmm0, %zmm0
1012 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
1013 ; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1014 ; AVX512-NEXT: vpsubb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1017 ; X32-AVX-LABEL: testv32i8:
1019 ; X32-AVX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1020 ; X32-AVX-NEXT: # ymm1 = mem[0,1,0,1]
1021 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
1022 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
1023 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
1024 ; X32-AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
1025 ; X32-AVX-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
1026 ; X32-AVX-NEXT: vpand %ymm3, %ymm2, %ymm2
1027 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm0
1028 ; X32-AVX-NEXT: vpaddb %ymm0, %ymm2, %ymm0
1029 ; X32-AVX-NEXT: retl
1030 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 0)
1034 define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
1035 ; AVX1-LABEL: testv32i8u:
1037 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1038 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1039 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm3
1040 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
1041 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1042 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
1043 ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
1044 ; AVX1-NEXT: vpcmpeqb %xmm5, %xmm1, %xmm6
1045 ; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
1046 ; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1
1047 ; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1
1048 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm3
1049 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1050 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
1051 ; AVX1-NEXT: vpcmpeqb %xmm5, %xmm0, %xmm4
1052 ; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
1053 ; AVX1-NEXT: vpshufb %xmm0, %xmm2, %xmm0
1054 ; AVX1-NEXT: vpaddb %xmm0, %xmm3, %xmm0
1055 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1058 ; AVX2-LABEL: testv32i8u:
1060 ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1061 ; AVX2-NEXT: # ymm1 = mem[0,1,0,1]
1062 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm2
1063 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
1064 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1065 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
1066 ; AVX2-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
1067 ; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
1068 ; AVX2-NEXT: vpshufb %ymm0, %ymm1, %ymm0
1069 ; AVX2-NEXT: vpaddb %ymm0, %ymm2, %ymm0
1072 ; AVX512VL-LABEL: testv32i8u:
1073 ; AVX512VL: # %bb.0:
1074 ; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1075 ; AVX512VL-NEXT: # ymm1 = mem[0,1,0,1]
1076 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm2
1077 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
1078 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
1079 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
1080 ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
1081 ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2
1082 ; AVX512VL-NEXT: vpshufb %ymm0, %ymm1, %ymm0
1083 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm2, %ymm0
1084 ; AVX512VL-NEXT: retq
1086 ; AVX512VLBWDQ-LABEL: testv32i8u:
1087 ; AVX512VLBWDQ: # %bb.0:
1088 ; AVX512VLBWDQ-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1089 ; AVX512VLBWDQ-NEXT: # ymm1 = mem[0,1,0,1]
1090 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm2
1091 ; AVX512VLBWDQ-NEXT: vpsrlw $4, %ymm0, %ymm0
1092 ; AVX512VLBWDQ-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
1093 ; AVX512VLBWDQ-NEXT: vpxor %xmm3, %xmm3, %xmm3
1094 ; AVX512VLBWDQ-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
1095 ; AVX512VLBWDQ-NEXT: vpand %ymm3, %ymm2, %ymm2
1096 ; AVX512VLBWDQ-NEXT: vpshufb %ymm0, %ymm1, %ymm0
1097 ; AVX512VLBWDQ-NEXT: vpaddb %ymm0, %ymm2, %ymm0
1098 ; AVX512VLBWDQ-NEXT: retq
1100 ; AVX512-LABEL: testv32i8u:
1102 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1103 ; AVX512-NEXT: vplzcntd %zmm1, %zmm1
1104 ; AVX512-NEXT: vpmovdb %zmm1, %xmm1
1105 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
1106 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1107 ; AVX512-NEXT: vplzcntd %zmm0, %zmm0
1108 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0
1109 ; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1110 ; AVX512-NEXT: vpsubb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1113 ; X32-AVX-LABEL: testv32i8u:
1115 ; X32-AVX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1116 ; X32-AVX-NEXT: # ymm1 = mem[0,1,0,1]
1117 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm2
1118 ; X32-AVX-NEXT: vpsrlw $4, %ymm0, %ymm0
1119 ; X32-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
1120 ; X32-AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
1121 ; X32-AVX-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm3
1122 ; X32-AVX-NEXT: vpand %ymm3, %ymm2, %ymm2
1123 ; X32-AVX-NEXT: vpshufb %ymm0, %ymm1, %ymm0
1124 ; X32-AVX-NEXT: vpaddb %ymm0, %ymm2, %ymm0
1125 ; X32-AVX-NEXT: retl
1126 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 -1)
1130 define <4 x i64> @foldv4i64() nounwind {
1131 ; X64-LABEL: foldv4i64:
1133 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
1136 ; X32-AVX-LABEL: foldv4i64:
1138 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,0,0,64,0,56,0]
1139 ; X32-AVX-NEXT: retl
1140 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
1144 define <4 x i64> @foldv4i64u() nounwind {
1145 ; X64-LABEL: foldv4i64u:
1147 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
1150 ; X32-AVX-LABEL: foldv4i64u:
1152 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,0,0,64,0,56,0]
1153 ; X32-AVX-NEXT: retl
1154 %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
1158 define <8 x i32> @foldv8i32() nounwind {
1159 ; X64-LABEL: foldv8i32:
1161 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1164 ; X32-AVX-LABEL: foldv8i32:
1166 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1167 ; X32-AVX-NEXT: retl
1168 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
1172 define <8 x i32> @foldv8i32u() nounwind {
1173 ; X64-LABEL: foldv8i32u:
1175 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1178 ; X32-AVX-LABEL: foldv8i32u:
1180 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
1181 ; X32-AVX-NEXT: retl
1182 %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
1186 define <16 x i16> @foldv16i16() nounwind {
1187 ; X64-LABEL: foldv16i16:
1189 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1192 ; X32-AVX-LABEL: foldv16i16:
1194 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1195 ; X32-AVX-NEXT: retl
1196 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
1200 define <16 x i16> @foldv16i16u() nounwind {
1201 ; X64-LABEL: foldv16i16u:
1203 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1206 ; X32-AVX-LABEL: foldv16i16u:
1208 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
1209 ; X32-AVX-NEXT: retl
1210 %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
1214 define <32 x i8> @foldv32i8() nounwind {
1215 ; X64-LABEL: foldv32i8:
1217 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1220 ; X32-AVX-LABEL: foldv32i8:
1222 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1223 ; X32-AVX-NEXT: retl
1224 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
1228 define <32 x i8> @foldv32i8u() nounwind {
1229 ; X64-LABEL: foldv32i8u:
1231 ; X64-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1234 ; X32-AVX-LABEL: foldv32i8u:
1236 ; X32-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
1237 ; X32-AVX-NEXT: retl
1238 %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
1242 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
1243 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
1244 declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
1245 declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)