1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
7 define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
8 ; SSE-LABEL: test_v2f64_sext:
10 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
11 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
12 ; SSE-NEXT: por %xmm1, %xmm0
13 ; SSE-NEXT: movq %xmm0, %rax
16 ; AVX-LABEL: test_v2f64_sext:
18 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
19 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
20 ; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
21 ; AVX-NEXT: vmovq %xmm0, %rax
24 ; AVX512-LABEL: test_v2f64_sext:
26 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
27 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
28 ; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
29 ; AVX512-NEXT: vmovq %xmm0, %rax
31 %c = fcmp ogt <2 x double> %a0, %a1
32 %s = sext <2 x i1> %c to <2 x i64>
33 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
34 %2 = or <2 x i64> %s, %1
35 %3 = extractelement <2 x i64> %2, i32 0
39 define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
40 ; SSE-LABEL: test_v4f64_sext:
42 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
43 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
44 ; SSE-NEXT: orpd %xmm3, %xmm2
45 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
46 ; SSE-NEXT: por %xmm2, %xmm0
47 ; SSE-NEXT: movq %xmm0, %rax
50 ; AVX-LABEL: test_v4f64_sext:
52 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
53 ; AVX-NEXT: vmovmskpd %ymm0, %ecx
54 ; AVX-NEXT: xorl %eax, %eax
55 ; AVX-NEXT: cmpl %ecx, %eax
56 ; AVX-NEXT: sbbq %rax, %rax
57 ; AVX-NEXT: vzeroupper
60 ; AVX512-LABEL: test_v4f64_sext:
62 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
63 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
64 ; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
65 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
66 ; AVX512-NEXT: vorpd %xmm1, %xmm0, %xmm0
67 ; AVX512-NEXT: vmovq %xmm0, %rax
68 ; AVX512-NEXT: vzeroupper
70 %c = fcmp ogt <4 x double> %a0, %a1
71 %s = sext <4 x i1> %c to <4 x i64>
72 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
73 %2 = or <4 x i64> %s, %1
74 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
75 %4 = or <4 x i64> %2, %3
76 %5 = extractelement <4 x i64> %4, i64 0
80 define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
81 ; SSE-LABEL: test_v4f64_legal_sext:
83 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
84 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
85 ; SSE-NEXT: packssdw %xmm3, %xmm2
86 ; SSE-NEXT: movmskps %xmm2, %eax
87 ; SSE-NEXT: xorl %ecx, %ecx
88 ; SSE-NEXT: cmpl %eax, %ecx
89 ; SSE-NEXT: sbbl %ecx, %ecx
90 ; SSE-NEXT: movslq %ecx, %rax
93 ; AVX-LABEL: test_v4f64_legal_sext:
95 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
96 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
97 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
98 ; AVX-NEXT: vmovmskps %xmm0, %eax
99 ; AVX-NEXT: xorl %ecx, %ecx
100 ; AVX-NEXT: cmpl %eax, %ecx
101 ; AVX-NEXT: sbbl %ecx, %ecx
102 ; AVX-NEXT: movslq %ecx, %rax
103 ; AVX-NEXT: vzeroupper
106 ; AVX512-LABEL: test_v4f64_legal_sext:
108 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
109 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
110 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
111 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
112 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
113 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
114 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
115 ; AVX512-NEXT: vmovd %xmm0, %eax
117 ; AVX512-NEXT: vzeroupper
119 %c = fcmp ogt <4 x double> %a0, %a1
120 %s = sext <4 x i1> %c to <4 x i32>
121 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
122 %2 = or <4 x i32> %s, %1
123 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
124 %4 = or <4 x i32> %2, %3
125 %5 = extractelement <4 x i32> %4, i64 0
126 %6 = sext i32 %5 to i64
130 define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
131 ; SSE-LABEL: test_v4f32_sext:
133 ; SSE-NEXT: cmpltps %xmm0, %xmm1
134 ; SSE-NEXT: movmskps %xmm1, %ecx
135 ; SSE-NEXT: xorl %eax, %eax
136 ; SSE-NEXT: cmpl %ecx, %eax
137 ; SSE-NEXT: sbbl %eax, %eax
140 ; AVX-LABEL: test_v4f32_sext:
142 ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
143 ; AVX-NEXT: vmovmskps %xmm0, %ecx
144 ; AVX-NEXT: xorl %eax, %eax
145 ; AVX-NEXT: cmpl %ecx, %eax
146 ; AVX-NEXT: sbbl %eax, %eax
149 ; AVX512-LABEL: test_v4f32_sext:
151 ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
152 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
153 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
154 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
155 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
156 ; AVX512-NEXT: vmovd %xmm0, %eax
158 %c = fcmp ogt <4 x float> %a0, %a1
159 %s = sext <4 x i1> %c to <4 x i32>
160 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
161 %2 = or <4 x i32> %s, %1
162 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
163 %4 = or <4 x i32> %2, %3
164 %5 = extractelement <4 x i32> %4, i32 0
168 define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
169 ; SSE-LABEL: test_v8f32_sext:
171 ; SSE-NEXT: cmpltps %xmm1, %xmm3
172 ; SSE-NEXT: cmpltps %xmm0, %xmm2
173 ; SSE-NEXT: orps %xmm3, %xmm2
174 ; SSE-NEXT: movmskps %xmm2, %ecx
175 ; SSE-NEXT: xorl %eax, %eax
176 ; SSE-NEXT: cmpl %ecx, %eax
177 ; SSE-NEXT: sbbl %eax, %eax
180 ; AVX-LABEL: test_v8f32_sext:
182 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
183 ; AVX-NEXT: vmovmskps %ymm0, %ecx
184 ; AVX-NEXT: xorl %eax, %eax
185 ; AVX-NEXT: cmpl %ecx, %eax
186 ; AVX-NEXT: sbbl %eax, %eax
187 ; AVX-NEXT: vzeroupper
190 ; AVX512-LABEL: test_v8f32_sext:
192 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
193 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
194 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
195 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
196 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
197 ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
198 ; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0
199 ; AVX512-NEXT: vmovd %xmm0, %eax
200 ; AVX512-NEXT: vzeroupper
202 %c = fcmp ogt <8 x float> %a0, %a1
203 %s = sext <8 x i1> %c to <8 x i32>
204 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
205 %2 = or <8 x i32> %s, %1
206 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
207 %4 = or <8 x i32> %2, %3
208 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
209 %6 = or <8 x i32> %4, %5
210 %7 = extractelement <8 x i32> %6, i32 0
214 define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
215 ; SSE-LABEL: test_v8f32_legal_sext:
217 ; SSE-NEXT: cmpltps %xmm1, %xmm3
218 ; SSE-NEXT: cmpltps %xmm0, %xmm2
219 ; SSE-NEXT: packssdw %xmm3, %xmm2
220 ; SSE-NEXT: pmovmskb %xmm2, %ecx
221 ; SSE-NEXT: xorl %eax, %eax
222 ; SSE-NEXT: cmpl %ecx, %eax
223 ; SSE-NEXT: sbbl %eax, %eax
226 ; AVX-LABEL: test_v8f32_legal_sext:
228 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
229 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
230 ; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
231 ; AVX-NEXT: vpmovmskb %xmm0, %ecx
232 ; AVX-NEXT: xorl %eax, %eax
233 ; AVX-NEXT: cmpl %ecx, %eax
234 ; AVX-NEXT: sbbl %eax, %eax
235 ; AVX-NEXT: vzeroupper
238 ; AVX512-LABEL: test_v8f32_legal_sext:
240 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
241 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
242 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
243 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
244 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
245 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
246 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
247 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
248 ; AVX512-NEXT: vmovd %xmm0, %eax
250 ; AVX512-NEXT: vzeroupper
252 %c = fcmp ogt <8 x float> %a0, %a1
253 %s = sext <8 x i1> %c to <8 x i16>
254 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
255 %2 = or <8 x i16> %s, %1
256 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
257 %4 = or <8 x i16> %2, %3
258 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
259 %6 = or <8 x i16> %4, %5
260 %7 = extractelement <8 x i16> %6, i32 0
261 %8 = sext i16 %7 to i32
265 define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
266 ; SSE-LABEL: test_v2i64_sext:
268 ; SSE-NEXT: pcmpgtq %xmm1, %xmm0
269 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
270 ; SSE-NEXT: por %xmm0, %xmm1
271 ; SSE-NEXT: movq %xmm1, %rax
274 ; AVX-LABEL: test_v2i64_sext:
276 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
277 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
278 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
279 ; AVX-NEXT: vmovq %xmm0, %rax
282 ; AVX512-LABEL: test_v2i64_sext:
284 ; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
285 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
286 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
287 ; AVX512-NEXT: vmovq %xmm0, %rax
289 %c = icmp sgt <2 x i64> %a0, %a1
290 %s = sext <2 x i1> %c to <2 x i64>
291 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
292 %2 = or <2 x i64> %s, %1
293 %3 = extractelement <2 x i64> %2, i32 0
297 define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
298 ; SSE-LABEL: test_v4i64_sext:
300 ; SSE-NEXT: pcmpgtq %xmm3, %xmm1
301 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0
302 ; SSE-NEXT: por %xmm1, %xmm0
303 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
304 ; SSE-NEXT: por %xmm0, %xmm1
305 ; SSE-NEXT: movq %xmm1, %rax
308 ; AVX1-LABEL: test_v4i64_sext:
310 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
311 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
312 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
313 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
314 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
315 ; AVX1-NEXT: vmovmskpd %ymm0, %ecx
316 ; AVX1-NEXT: xorl %eax, %eax
317 ; AVX1-NEXT: cmpl %ecx, %eax
318 ; AVX1-NEXT: sbbq %rax, %rax
319 ; AVX1-NEXT: vzeroupper
322 ; AVX2-LABEL: test_v4i64_sext:
324 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
325 ; AVX2-NEXT: vmovmskpd %ymm0, %ecx
326 ; AVX2-NEXT: xorl %eax, %eax
327 ; AVX2-NEXT: cmpl %ecx, %eax
328 ; AVX2-NEXT: sbbq %rax, %rax
329 ; AVX2-NEXT: vzeroupper
332 ; AVX512-LABEL: test_v4i64_sext:
334 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
335 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
336 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
337 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
338 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
339 ; AVX512-NEXT: vmovq %xmm0, %rax
340 ; AVX512-NEXT: vzeroupper
342 %c = icmp sgt <4 x i64> %a0, %a1
343 %s = sext <4 x i1> %c to <4 x i64>
344 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
345 %2 = or <4 x i64> %s, %1
346 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
347 %4 = or <4 x i64> %2, %3
348 %5 = extractelement <4 x i64> %4, i64 0
352 define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
353 ; SSE-LABEL: test_v4i64_legal_sext:
355 ; SSE-NEXT: pcmpgtq %xmm3, %xmm1
356 ; SSE-NEXT: pcmpgtq %xmm2, %xmm0
357 ; SSE-NEXT: packssdw %xmm1, %xmm0
358 ; SSE-NEXT: movmskps %xmm0, %eax
359 ; SSE-NEXT: xorl %ecx, %ecx
360 ; SSE-NEXT: cmpl %eax, %ecx
361 ; SSE-NEXT: sbbl %ecx, %ecx
362 ; SSE-NEXT: movslq %ecx, %rax
365 ; AVX1-LABEL: test_v4i64_legal_sext:
367 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
368 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
369 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
370 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
371 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
372 ; AVX1-NEXT: vmovmskps %xmm0, %eax
373 ; AVX1-NEXT: xorl %ecx, %ecx
374 ; AVX1-NEXT: cmpl %eax, %ecx
375 ; AVX1-NEXT: sbbl %ecx, %ecx
376 ; AVX1-NEXT: movslq %ecx, %rax
377 ; AVX1-NEXT: vzeroupper
380 ; AVX2-LABEL: test_v4i64_legal_sext:
382 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
383 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
384 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
385 ; AVX2-NEXT: vmovmskps %xmm0, %eax
386 ; AVX2-NEXT: xorl %ecx, %ecx
387 ; AVX2-NEXT: cmpl %eax, %ecx
388 ; AVX2-NEXT: sbbl %ecx, %ecx
389 ; AVX2-NEXT: movslq %ecx, %rax
390 ; AVX2-NEXT: vzeroupper
393 ; AVX512-LABEL: test_v4i64_legal_sext:
395 ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
396 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
397 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
398 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
399 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
400 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
401 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
402 ; AVX512-NEXT: vmovd %xmm0, %eax
404 ; AVX512-NEXT: vzeroupper
406 %c = icmp sgt <4 x i64> %a0, %a1
407 %s = sext <4 x i1> %c to <4 x i32>
408 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
409 %2 = or <4 x i32> %s, %1
410 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
411 %4 = or <4 x i32> %2, %3
412 %5 = extractelement <4 x i32> %4, i64 0
413 %6 = sext i32 %5 to i64
417 define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
418 ; SSE-LABEL: test_v4i32_sext:
420 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0
421 ; SSE-NEXT: movmskps %xmm0, %ecx
422 ; SSE-NEXT: xorl %eax, %eax
423 ; SSE-NEXT: cmpl %ecx, %eax
424 ; SSE-NEXT: sbbl %eax, %eax
427 ; AVX-LABEL: test_v4i32_sext:
429 ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
430 ; AVX-NEXT: vmovmskps %xmm0, %ecx
431 ; AVX-NEXT: xorl %eax, %eax
432 ; AVX-NEXT: cmpl %ecx, %eax
433 ; AVX-NEXT: sbbl %eax, %eax
436 ; AVX512-LABEL: test_v4i32_sext:
438 ; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
439 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
440 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
441 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
442 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
443 ; AVX512-NEXT: vmovd %xmm0, %eax
445 %c = icmp sgt <4 x i32> %a0, %a1
446 %s = sext <4 x i1> %c to <4 x i32>
447 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
448 %2 = or <4 x i32> %s, %1
449 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
450 %4 = or <4 x i32> %2, %3
451 %5 = extractelement <4 x i32> %4, i32 0
455 define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
456 ; SSE-LABEL: test_v8i32_sext:
458 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
459 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
460 ; SSE-NEXT: por %xmm1, %xmm0
461 ; SSE-NEXT: movmskps %xmm0, %ecx
462 ; SSE-NEXT: xorl %eax, %eax
463 ; SSE-NEXT: cmpl %ecx, %eax
464 ; SSE-NEXT: sbbl %eax, %eax
467 ; AVX1-LABEL: test_v8i32_sext:
469 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
470 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
471 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
472 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
473 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
474 ; AVX1-NEXT: vmovmskps %ymm0, %ecx
475 ; AVX1-NEXT: xorl %eax, %eax
476 ; AVX1-NEXT: cmpl %ecx, %eax
477 ; AVX1-NEXT: sbbl %eax, %eax
478 ; AVX1-NEXT: vzeroupper
481 ; AVX2-LABEL: test_v8i32_sext:
483 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
484 ; AVX2-NEXT: vmovmskps %ymm0, %ecx
485 ; AVX2-NEXT: xorl %eax, %eax
486 ; AVX2-NEXT: cmpl %ecx, %eax
487 ; AVX2-NEXT: sbbl %eax, %eax
488 ; AVX2-NEXT: vzeroupper
491 ; AVX512-LABEL: test_v8i32_sext:
493 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
494 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
495 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
496 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
497 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
498 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
499 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
500 ; AVX512-NEXT: vmovd %xmm0, %eax
501 ; AVX512-NEXT: vzeroupper
503 %c = icmp sgt <8 x i32> %a0, %a1
504 %s = sext <8 x i1> %c to <8 x i32>
505 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
506 %2 = or <8 x i32> %s, %1
507 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
508 %4 = or <8 x i32> %2, %3
509 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
510 %6 = or <8 x i32> %4, %5
511 %7 = extractelement <8 x i32> %6, i32 0
515 define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
516 ; SSE-LABEL: test_v8i32_legal_sext:
518 ; SSE-NEXT: pcmpgtd %xmm3, %xmm1
519 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0
520 ; SSE-NEXT: packssdw %xmm1, %xmm0
521 ; SSE-NEXT: pmovmskb %xmm0, %ecx
522 ; SSE-NEXT: xorl %eax, %eax
523 ; SSE-NEXT: cmpl %ecx, %eax
524 ; SSE-NEXT: sbbl %eax, %eax
527 ; AVX1-LABEL: test_v8i32_legal_sext:
529 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
530 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
531 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
532 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
533 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
534 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
535 ; AVX1-NEXT: xorl %eax, %eax
536 ; AVX1-NEXT: cmpl %ecx, %eax
537 ; AVX1-NEXT: sbbl %eax, %eax
538 ; AVX1-NEXT: vzeroupper
541 ; AVX2-LABEL: test_v8i32_legal_sext:
543 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
544 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
545 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
546 ; AVX2-NEXT: vpmovmskb %xmm0, %ecx
547 ; AVX2-NEXT: xorl %eax, %eax
548 ; AVX2-NEXT: cmpl %ecx, %eax
549 ; AVX2-NEXT: sbbl %eax, %eax
550 ; AVX2-NEXT: vzeroupper
553 ; AVX512-LABEL: test_v8i32_legal_sext:
555 ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
556 ; AVX512-NEXT: vpmovm2w %k0, %xmm0
557 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
558 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
559 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
560 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
561 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
562 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
563 ; AVX512-NEXT: vmovd %xmm0, %eax
565 ; AVX512-NEXT: vzeroupper
567 %c = icmp sgt <8 x i32> %a0, %a1
568 %s = sext <8 x i1> %c to <8 x i16>
569 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
570 %2 = or <8 x i16> %s, %1
571 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
572 %4 = or <8 x i16> %2, %3
573 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
574 %6 = or <8 x i16> %4, %5
575 %7 = extractelement <8 x i16> %6, i32 0
576 %8 = sext i16 %7 to i32
580 define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
581 ; SSE-LABEL: test_v8i16_sext:
583 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0
584 ; SSE-NEXT: pmovmskb %xmm0, %ecx
585 ; SSE-NEXT: xorl %eax, %eax
586 ; SSE-NEXT: cmpl %ecx, %eax
587 ; SSE-NEXT: sbbl %eax, %eax
588 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
591 ; AVX-LABEL: test_v8i16_sext:
593 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
594 ; AVX-NEXT: vpmovmskb %xmm0, %ecx
595 ; AVX-NEXT: xorl %eax, %eax
596 ; AVX-NEXT: cmpl %ecx, %eax
597 ; AVX-NEXT: sbbl %eax, %eax
598 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax
601 ; AVX512-LABEL: test_v8i16_sext:
603 ; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
604 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
605 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
606 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
607 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
608 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
609 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
610 ; AVX512-NEXT: vmovd %xmm0, %eax
611 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
613 %c = icmp sgt <8 x i16> %a0, %a1
614 %s = sext <8 x i1> %c to <8 x i16>
615 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
616 %2 = or <8 x i16> %s, %1
617 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
618 %4 = or <8 x i16> %2, %3
619 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
620 %6 = or <8 x i16> %4, %5
621 %7 = extractelement <8 x i16> %6, i32 0
625 define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
626 ; SSE-LABEL: test_v16i16_sext:
628 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
629 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
630 ; SSE-NEXT: por %xmm1, %xmm0
631 ; SSE-NEXT: pmovmskb %xmm0, %ecx
632 ; SSE-NEXT: xorl %eax, %eax
633 ; SSE-NEXT: cmpl %ecx, %eax
634 ; SSE-NEXT: sbbl %eax, %eax
635 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
638 ; AVX1-LABEL: test_v16i16_sext:
640 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
641 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
642 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
643 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
644 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
645 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
646 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
647 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
648 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
649 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
650 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
651 ; AVX1-NEXT: vmovd %xmm0, %eax
652 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
653 ; AVX1-NEXT: vzeroupper
656 ; AVX2-LABEL: test_v16i16_sext:
658 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
659 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
660 ; AVX2-NEXT: xorl %eax, %eax
661 ; AVX2-NEXT: cmpl %ecx, %eax
662 ; AVX2-NEXT: sbbl %eax, %eax
663 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
664 ; AVX2-NEXT: vzeroupper
667 ; AVX512-LABEL: test_v16i16_sext:
669 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
670 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
671 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
672 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
673 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
674 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
675 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
676 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
677 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
678 ; AVX512-NEXT: vmovd %xmm0, %eax
679 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
680 ; AVX512-NEXT: vzeroupper
682 %c = icmp sgt <16 x i16> %a0, %a1
683 %s = sext <16 x i1> %c to <16 x i16>
684 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
685 %2 = or <16 x i16> %s, %1
686 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
687 %4 = or <16 x i16> %2, %3
688 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
689 %6 = or <16 x i16> %4, %5
690 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
691 %8 = or <16 x i16> %6, %7
692 %9 = extractelement <16 x i16> %8, i32 0
696 define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
697 ; SSE-LABEL: test_v16i16_legal_sext:
699 ; SSE-NEXT: pcmpgtw %xmm3, %xmm1
700 ; SSE-NEXT: pcmpgtw %xmm2, %xmm0
701 ; SSE-NEXT: packsswb %xmm1, %xmm0
702 ; SSE-NEXT: pmovmskb %xmm0, %ecx
703 ; SSE-NEXT: xorl %eax, %eax
704 ; SSE-NEXT: cmpl %ecx, %eax
705 ; SSE-NEXT: sbbl %eax, %eax
706 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax
709 ; AVX1-LABEL: test_v16i16_legal_sext:
711 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
712 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
713 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
714 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
715 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
716 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
717 ; AVX1-NEXT: xorl %eax, %eax
718 ; AVX1-NEXT: cmpl %ecx, %eax
719 ; AVX1-NEXT: sbbl %eax, %eax
720 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
721 ; AVX1-NEXT: vzeroupper
724 ; AVX2-LABEL: test_v16i16_legal_sext:
726 ; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
727 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
728 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
729 ; AVX2-NEXT: vpmovmskb %xmm0, %ecx
730 ; AVX2-NEXT: xorl %eax, %eax
731 ; AVX2-NEXT: cmpl %ecx, %eax
732 ; AVX2-NEXT: sbbl %eax, %eax
733 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
734 ; AVX2-NEXT: vzeroupper
737 ; AVX512-LABEL: test_v16i16_legal_sext:
739 ; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
740 ; AVX512-NEXT: vpmovm2b %k0, %xmm0
741 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
742 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
743 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
744 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
745 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
746 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
747 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
748 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
749 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
750 ; AVX512-NEXT: movsbl %al, %eax
751 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
752 ; AVX512-NEXT: vzeroupper
754 %c = icmp sgt <16 x i16> %a0, %a1
755 %s = sext <16 x i1> %c to <16 x i8>
756 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
757 %2 = or <16 x i8> %s, %1
758 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
759 %4 = or <16 x i8> %2, %3
760 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
761 %6 = or <16 x i8> %4, %5
762 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
763 %8 = or <16 x i8> %6, %7
764 %9 = extractelement <16 x i8> %8, i32 0
765 %10 = sext i8 %9 to i16
769 define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
770 ; SSE-LABEL: test_v16i8_sext:
772 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0
773 ; SSE-NEXT: pmovmskb %xmm0, %ecx
774 ; SSE-NEXT: xorl %eax, %eax
775 ; SSE-NEXT: cmpl %ecx, %eax
776 ; SSE-NEXT: sbbl %eax, %eax
777 ; SSE-NEXT: # kill: def $al killed $al killed $eax
780 ; AVX-LABEL: test_v16i8_sext:
782 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
783 ; AVX-NEXT: vpmovmskb %xmm0, %ecx
784 ; AVX-NEXT: xorl %eax, %eax
785 ; AVX-NEXT: cmpl %ecx, %eax
786 ; AVX-NEXT: sbbl %eax, %eax
787 ; AVX-NEXT: # kill: def $al killed $al killed $eax
790 ; AVX512-LABEL: test_v16i8_sext:
792 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
793 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
794 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
795 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
796 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
797 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
798 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
799 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
800 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
801 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
802 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
804 %c = icmp sgt <16 x i8> %a0, %a1
805 %s = sext <16 x i1> %c to <16 x i8>
806 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
807 %2 = or <16 x i8> %s, %1
808 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
809 %4 = or <16 x i8> %2, %3
810 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
811 %6 = or <16 x i8> %4, %5
812 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
813 %8 = or <16 x i8> %6, %7
814 %9 = extractelement <16 x i8> %8, i32 0
818 define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
819 ; SSE-LABEL: test_v32i8_sext:
821 ; SSE-NEXT: pcmpgtb %xmm3, %xmm1
822 ; SSE-NEXT: pcmpgtb %xmm2, %xmm0
823 ; SSE-NEXT: por %xmm1, %xmm0
824 ; SSE-NEXT: pmovmskb %xmm0, %ecx
825 ; SSE-NEXT: xorl %eax, %eax
826 ; SSE-NEXT: cmpl %ecx, %eax
827 ; SSE-NEXT: sbbl %eax, %eax
828 ; SSE-NEXT: # kill: def $al killed $al killed $eax
831 ; AVX1-LABEL: test_v32i8_sext:
833 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
834 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
835 ; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
836 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
837 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
838 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
839 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
840 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
841 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
842 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
843 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
844 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
845 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
846 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax
847 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
848 ; AVX1-NEXT: vzeroupper
851 ; AVX2-LABEL: test_v32i8_sext:
853 ; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
854 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
855 ; AVX2-NEXT: xorl %eax, %eax
856 ; AVX2-NEXT: cmpl %ecx, %eax
857 ; AVX2-NEXT: sbbl %eax, %eax
858 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
859 ; AVX2-NEXT: vzeroupper
862 ; AVX512-LABEL: test_v32i8_sext:
864 ; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
865 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
866 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
867 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
868 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
869 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
870 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
871 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
872 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
873 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
874 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
875 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax
876 ; AVX512-NEXT: # kill: def $al killed $al killed $eax
877 ; AVX512-NEXT: vzeroupper
879 %c = icmp sgt <32 x i8> %a0, %a1
880 %s = sext <32 x i1> %c to <32 x i8>
881 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
882 %2 = or <32 x i8> %s, %1
883 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
884 %4 = or <32 x i8> %2, %3
885 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
886 %6 = or <32 x i8> %4, %5
887 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
888 %8 = or <32 x i8> %6, %7
889 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
890 %10 = or <32 x i8> %8, %9
891 %11 = extractelement <32 x i8> %10, i32 0