1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
14 define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
15 ; SSE2-LABEL: max_gt_v2i64:
17 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
18 ; SSE2-NEXT: movdqa %xmm1, %xmm3
19 ; SSE2-NEXT: pxor %xmm2, %xmm3
20 ; SSE2-NEXT: pxor %xmm0, %xmm2
21 ; SSE2-NEXT: movdqa %xmm2, %xmm4
22 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
23 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
24 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
25 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
26 ; SSE2-NEXT: pand %xmm5, %xmm2
27 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
28 ; SSE2-NEXT: por %xmm2, %xmm3
29 ; SSE2-NEXT: pand %xmm3, %xmm0
30 ; SSE2-NEXT: pandn %xmm1, %xmm3
31 ; SSE2-NEXT: por %xmm3, %xmm0
34 ; SSE41-LABEL: max_gt_v2i64:
36 ; SSE41-NEXT: movdqa %xmm0, %xmm2
37 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
38 ; SSE41-NEXT: movdqa %xmm1, %xmm0
39 ; SSE41-NEXT: pxor %xmm3, %xmm0
40 ; SSE41-NEXT: pxor %xmm2, %xmm3
41 ; SSE41-NEXT: movdqa %xmm3, %xmm4
42 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
43 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
44 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
45 ; SSE41-NEXT: pand %xmm4, %xmm0
46 ; SSE41-NEXT: por %xmm3, %xmm0
47 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
48 ; SSE41-NEXT: movapd %xmm1, %xmm0
51 ; SSE42-LABEL: max_gt_v2i64:
53 ; SSE42-NEXT: movdqa %xmm0, %xmm2
54 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
55 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
56 ; SSE42-NEXT: movapd %xmm1, %xmm0
59 ; AVX1-LABEL: max_gt_v2i64:
61 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
62 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
65 ; AVX2-LABEL: max_gt_v2i64:
67 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
68 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
71 ; AVX512-LABEL: max_gt_v2i64:
73 ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
74 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
75 ; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
76 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
77 ; AVX512-NEXT: vzeroupper
79 %1 = icmp sgt <2 x i64> %a, %b
80 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
84 define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
85 ; SSE2-LABEL: max_gt_v4i64:
87 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
88 ; SSE2-NEXT: movdqa %xmm2, %xmm5
89 ; SSE2-NEXT: pxor %xmm4, %xmm5
90 ; SSE2-NEXT: movdqa %xmm0, %xmm6
91 ; SSE2-NEXT: pxor %xmm4, %xmm6
92 ; SSE2-NEXT: movdqa %xmm6, %xmm7
93 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
94 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
95 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
96 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
97 ; SSE2-NEXT: pand %xmm8, %xmm5
98 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
99 ; SSE2-NEXT: por %xmm5, %xmm6
100 ; SSE2-NEXT: pand %xmm6, %xmm0
101 ; SSE2-NEXT: pandn %xmm2, %xmm6
102 ; SSE2-NEXT: por %xmm6, %xmm0
103 ; SSE2-NEXT: movdqa %xmm3, %xmm2
104 ; SSE2-NEXT: pxor %xmm4, %xmm2
105 ; SSE2-NEXT: pxor %xmm1, %xmm4
106 ; SSE2-NEXT: movdqa %xmm4, %xmm5
107 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
108 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
109 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm4
110 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
111 ; SSE2-NEXT: pand %xmm6, %xmm2
112 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
113 ; SSE2-NEXT: por %xmm2, %xmm4
114 ; SSE2-NEXT: pand %xmm4, %xmm1
115 ; SSE2-NEXT: pandn %xmm3, %xmm4
116 ; SSE2-NEXT: por %xmm4, %xmm1
119 ; SSE41-LABEL: max_gt_v4i64:
121 ; SSE41-NEXT: movdqa %xmm0, %xmm4
122 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648]
123 ; SSE41-NEXT: movdqa %xmm2, %xmm6
124 ; SSE41-NEXT: pxor %xmm5, %xmm6
125 ; SSE41-NEXT: movdqa %xmm0, %xmm7
126 ; SSE41-NEXT: pxor %xmm5, %xmm7
127 ; SSE41-NEXT: movdqa %xmm7, %xmm0
128 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
129 ; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
130 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2]
131 ; SSE41-NEXT: pand %xmm6, %xmm0
132 ; SSE41-NEXT: por %xmm7, %xmm0
133 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
134 ; SSE41-NEXT: movdqa %xmm3, %xmm0
135 ; SSE41-NEXT: pxor %xmm5, %xmm0
136 ; SSE41-NEXT: pxor %xmm1, %xmm5
137 ; SSE41-NEXT: movdqa %xmm5, %xmm4
138 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
139 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
140 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
141 ; SSE41-NEXT: pand %xmm4, %xmm0
142 ; SSE41-NEXT: por %xmm5, %xmm0
143 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
144 ; SSE41-NEXT: movapd %xmm2, %xmm0
145 ; SSE41-NEXT: movapd %xmm3, %xmm1
148 ; SSE42-LABEL: max_gt_v4i64:
150 ; SSE42-NEXT: movdqa %xmm0, %xmm4
151 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
152 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
153 ; SSE42-NEXT: movdqa %xmm1, %xmm0
154 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
155 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
156 ; SSE42-NEXT: movapd %xmm2, %xmm0
157 ; SSE42-NEXT: movapd %xmm3, %xmm1
160 ; AVX1-LABEL: max_gt_v4i64:
162 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
163 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
164 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
165 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
166 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
167 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
170 ; AVX2-LABEL: max_gt_v4i64:
172 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
173 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
176 ; AVX512-LABEL: max_gt_v4i64:
178 ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
179 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
180 ; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
181 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
183 %1 = icmp sgt <4 x i64> %a, %b
184 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
188 define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) {
189 ; SSE2-LABEL: max_gt_v4i32:
191 ; SSE2-NEXT: movdqa %xmm0, %xmm2
192 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
193 ; SSE2-NEXT: pand %xmm2, %xmm0
194 ; SSE2-NEXT: pandn %xmm1, %xmm2
195 ; SSE2-NEXT: por %xmm0, %xmm2
196 ; SSE2-NEXT: movdqa %xmm2, %xmm0
199 ; SSE41-LABEL: max_gt_v4i32:
201 ; SSE41-NEXT: pmaxsd %xmm1, %xmm0
204 ; SSE42-LABEL: max_gt_v4i32:
206 ; SSE42-NEXT: pmaxsd %xmm1, %xmm0
209 ; AVX-LABEL: max_gt_v4i32:
211 ; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
213 %1 = icmp sgt <4 x i32> %a, %b
214 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
218 define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) {
219 ; SSE2-LABEL: max_gt_v8i32:
221 ; SSE2-NEXT: movdqa %xmm0, %xmm4
222 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
223 ; SSE2-NEXT: pand %xmm4, %xmm0
224 ; SSE2-NEXT: pandn %xmm2, %xmm4
225 ; SSE2-NEXT: por %xmm0, %xmm4
226 ; SSE2-NEXT: movdqa %xmm1, %xmm2
227 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
228 ; SSE2-NEXT: pand %xmm2, %xmm1
229 ; SSE2-NEXT: pandn %xmm3, %xmm2
230 ; SSE2-NEXT: por %xmm1, %xmm2
231 ; SSE2-NEXT: movdqa %xmm4, %xmm0
232 ; SSE2-NEXT: movdqa %xmm2, %xmm1
235 ; SSE41-LABEL: max_gt_v8i32:
237 ; SSE41-NEXT: pmaxsd %xmm2, %xmm0
238 ; SSE41-NEXT: pmaxsd %xmm3, %xmm1
241 ; SSE42-LABEL: max_gt_v8i32:
243 ; SSE42-NEXT: pmaxsd %xmm2, %xmm0
244 ; SSE42-NEXT: pmaxsd %xmm3, %xmm1
247 ; AVX1-LABEL: max_gt_v8i32:
249 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
250 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
251 ; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
252 ; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
253 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
256 ; AVX2-LABEL: max_gt_v8i32:
258 ; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
261 ; AVX512-LABEL: max_gt_v8i32:
263 ; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
265 %1 = icmp sgt <8 x i32> %a, %b
266 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
270 define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) {
271 ; SSE-LABEL: max_gt_v8i16:
273 ; SSE-NEXT: pmaxsw %xmm1, %xmm0
276 ; AVX-LABEL: max_gt_v8i16:
278 ; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
280 %1 = icmp sgt <8 x i16> %a, %b
281 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
285 define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) {
286 ; SSE-LABEL: max_gt_v16i16:
288 ; SSE-NEXT: pmaxsw %xmm2, %xmm0
289 ; SSE-NEXT: pmaxsw %xmm3, %xmm1
292 ; AVX1-LABEL: max_gt_v16i16:
294 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
295 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
296 ; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
297 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
298 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
301 ; AVX2-LABEL: max_gt_v16i16:
303 ; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
306 ; AVX512-LABEL: max_gt_v16i16:
308 ; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
310 %1 = icmp sgt <16 x i16> %a, %b
311 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
315 define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) {
316 ; SSE2-LABEL: max_gt_v16i8:
318 ; SSE2-NEXT: movdqa %xmm0, %xmm2
319 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
320 ; SSE2-NEXT: pand %xmm2, %xmm0
321 ; SSE2-NEXT: pandn %xmm1, %xmm2
322 ; SSE2-NEXT: por %xmm0, %xmm2
323 ; SSE2-NEXT: movdqa %xmm2, %xmm0
326 ; SSE41-LABEL: max_gt_v16i8:
328 ; SSE41-NEXT: pmaxsb %xmm1, %xmm0
331 ; SSE42-LABEL: max_gt_v16i8:
333 ; SSE42-NEXT: pmaxsb %xmm1, %xmm0
336 ; AVX-LABEL: max_gt_v16i8:
338 ; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
340 %1 = icmp sgt <16 x i8> %a, %b
341 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
345 define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) {
346 ; SSE2-LABEL: max_gt_v32i8:
348 ; SSE2-NEXT: movdqa %xmm0, %xmm4
349 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm4
350 ; SSE2-NEXT: pand %xmm4, %xmm0
351 ; SSE2-NEXT: pandn %xmm2, %xmm4
352 ; SSE2-NEXT: por %xmm0, %xmm4
353 ; SSE2-NEXT: movdqa %xmm1, %xmm2
354 ; SSE2-NEXT: pcmpgtb %xmm3, %xmm2
355 ; SSE2-NEXT: pand %xmm2, %xmm1
356 ; SSE2-NEXT: pandn %xmm3, %xmm2
357 ; SSE2-NEXT: por %xmm1, %xmm2
358 ; SSE2-NEXT: movdqa %xmm4, %xmm0
359 ; SSE2-NEXT: movdqa %xmm2, %xmm1
362 ; SSE41-LABEL: max_gt_v32i8:
364 ; SSE41-NEXT: pmaxsb %xmm2, %xmm0
365 ; SSE41-NEXT: pmaxsb %xmm3, %xmm1
368 ; SSE42-LABEL: max_gt_v32i8:
370 ; SSE42-NEXT: pmaxsb %xmm2, %xmm0
371 ; SSE42-NEXT: pmaxsb %xmm3, %xmm1
374 ; AVX1-LABEL: max_gt_v32i8:
376 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
377 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
378 ; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
379 ; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
380 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
383 ; AVX2-LABEL: max_gt_v32i8:
385 ; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
388 ; AVX512-LABEL: max_gt_v32i8:
390 ; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
392 %1 = icmp sgt <32 x i8> %a, %b
393 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
398 ; Signed Maximum (GE)
401 define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
402 ; SSE2-LABEL: max_ge_v2i64:
404 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
405 ; SSE2-NEXT: movdqa %xmm1, %xmm3
406 ; SSE2-NEXT: pxor %xmm2, %xmm3
407 ; SSE2-NEXT: pxor %xmm0, %xmm2
408 ; SSE2-NEXT: movdqa %xmm2, %xmm4
409 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
410 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
411 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
412 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
413 ; SSE2-NEXT: pand %xmm5, %xmm2
414 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
415 ; SSE2-NEXT: por %xmm2, %xmm3
416 ; SSE2-NEXT: pand %xmm3, %xmm0
417 ; SSE2-NEXT: pandn %xmm1, %xmm3
418 ; SSE2-NEXT: por %xmm3, %xmm0
421 ; SSE41-LABEL: max_ge_v2i64:
423 ; SSE41-NEXT: movdqa %xmm0, %xmm2
424 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
425 ; SSE41-NEXT: movdqa %xmm1, %xmm0
426 ; SSE41-NEXT: pxor %xmm3, %xmm0
427 ; SSE41-NEXT: pxor %xmm2, %xmm3
428 ; SSE41-NEXT: movdqa %xmm3, %xmm4
429 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
430 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
431 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
432 ; SSE41-NEXT: pand %xmm4, %xmm0
433 ; SSE41-NEXT: por %xmm3, %xmm0
434 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
435 ; SSE41-NEXT: movapd %xmm1, %xmm0
438 ; SSE42-LABEL: max_ge_v2i64:
440 ; SSE42-NEXT: movdqa %xmm0, %xmm2
441 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
442 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
443 ; SSE42-NEXT: movapd %xmm1, %xmm0
446 ; AVX1-LABEL: max_ge_v2i64:
448 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
449 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
452 ; AVX2-LABEL: max_ge_v2i64:
454 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
455 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
458 ; AVX512-LABEL: max_ge_v2i64:
460 ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
461 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
462 ; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
463 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
464 ; AVX512-NEXT: vzeroupper
466 %1 = icmp sge <2 x i64> %a, %b
467 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
471 define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
472 ; SSE2-LABEL: max_ge_v4i64:
474 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
475 ; SSE2-NEXT: movdqa %xmm2, %xmm5
476 ; SSE2-NEXT: pxor %xmm4, %xmm5
477 ; SSE2-NEXT: movdqa %xmm0, %xmm6
478 ; SSE2-NEXT: pxor %xmm4, %xmm6
479 ; SSE2-NEXT: movdqa %xmm6, %xmm7
480 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
481 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
482 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
483 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
484 ; SSE2-NEXT: pand %xmm8, %xmm5
485 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
486 ; SSE2-NEXT: por %xmm5, %xmm6
487 ; SSE2-NEXT: pand %xmm6, %xmm0
488 ; SSE2-NEXT: pandn %xmm2, %xmm6
489 ; SSE2-NEXT: por %xmm6, %xmm0
490 ; SSE2-NEXT: movdqa %xmm3, %xmm2
491 ; SSE2-NEXT: pxor %xmm4, %xmm2
492 ; SSE2-NEXT: pxor %xmm1, %xmm4
493 ; SSE2-NEXT: movdqa %xmm4, %xmm5
494 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
495 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
496 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm4
497 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
498 ; SSE2-NEXT: pand %xmm6, %xmm2
499 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
500 ; SSE2-NEXT: por %xmm2, %xmm4
501 ; SSE2-NEXT: pand %xmm4, %xmm1
502 ; SSE2-NEXT: pandn %xmm3, %xmm4
503 ; SSE2-NEXT: por %xmm4, %xmm1
506 ; SSE41-LABEL: max_ge_v4i64:
508 ; SSE41-NEXT: movdqa %xmm0, %xmm4
509 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648]
510 ; SSE41-NEXT: movdqa %xmm2, %xmm6
511 ; SSE41-NEXT: pxor %xmm5, %xmm6
512 ; SSE41-NEXT: movdqa %xmm0, %xmm7
513 ; SSE41-NEXT: pxor %xmm5, %xmm7
514 ; SSE41-NEXT: movdqa %xmm7, %xmm0
515 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
516 ; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
517 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2]
518 ; SSE41-NEXT: pand %xmm6, %xmm0
519 ; SSE41-NEXT: por %xmm7, %xmm0
520 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
521 ; SSE41-NEXT: movdqa %xmm3, %xmm0
522 ; SSE41-NEXT: pxor %xmm5, %xmm0
523 ; SSE41-NEXT: pxor %xmm1, %xmm5
524 ; SSE41-NEXT: movdqa %xmm5, %xmm4
525 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
526 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
527 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
528 ; SSE41-NEXT: pand %xmm4, %xmm0
529 ; SSE41-NEXT: por %xmm5, %xmm0
530 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
531 ; SSE41-NEXT: movapd %xmm2, %xmm0
532 ; SSE41-NEXT: movapd %xmm3, %xmm1
535 ; SSE42-LABEL: max_ge_v4i64:
537 ; SSE42-NEXT: movdqa %xmm0, %xmm4
538 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
539 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
540 ; SSE42-NEXT: movdqa %xmm1, %xmm0
541 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
542 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
543 ; SSE42-NEXT: movapd %xmm2, %xmm0
544 ; SSE42-NEXT: movapd %xmm3, %xmm1
547 ; AVX1-LABEL: max_ge_v4i64:
549 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
550 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
551 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
552 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
553 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
554 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
557 ; AVX2-LABEL: max_ge_v4i64:
559 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
560 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
563 ; AVX512-LABEL: max_ge_v4i64:
565 ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
566 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
567 ; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
568 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
570 %1 = icmp sge <4 x i64> %a, %b
571 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
575 define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) {
576 ; SSE2-LABEL: max_ge_v4i32:
578 ; SSE2-NEXT: movdqa %xmm0, %xmm2
579 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
580 ; SSE2-NEXT: pand %xmm2, %xmm0
581 ; SSE2-NEXT: pandn %xmm1, %xmm2
582 ; SSE2-NEXT: por %xmm0, %xmm2
583 ; SSE2-NEXT: movdqa %xmm2, %xmm0
586 ; SSE41-LABEL: max_ge_v4i32:
588 ; SSE41-NEXT: pmaxsd %xmm1, %xmm0
591 ; SSE42-LABEL: max_ge_v4i32:
593 ; SSE42-NEXT: pmaxsd %xmm1, %xmm0
596 ; AVX-LABEL: max_ge_v4i32:
598 ; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
600 %1 = icmp sge <4 x i32> %a, %b
601 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
605 define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) {
606 ; SSE2-LABEL: max_ge_v8i32:
608 ; SSE2-NEXT: movdqa %xmm0, %xmm4
609 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
610 ; SSE2-NEXT: pand %xmm4, %xmm0
611 ; SSE2-NEXT: pandn %xmm2, %xmm4
612 ; SSE2-NEXT: por %xmm0, %xmm4
613 ; SSE2-NEXT: movdqa %xmm1, %xmm2
614 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
615 ; SSE2-NEXT: pand %xmm2, %xmm1
616 ; SSE2-NEXT: pandn %xmm3, %xmm2
617 ; SSE2-NEXT: por %xmm1, %xmm2
618 ; SSE2-NEXT: movdqa %xmm4, %xmm0
619 ; SSE2-NEXT: movdqa %xmm2, %xmm1
622 ; SSE41-LABEL: max_ge_v8i32:
624 ; SSE41-NEXT: pmaxsd %xmm2, %xmm0
625 ; SSE41-NEXT: pmaxsd %xmm3, %xmm1
628 ; SSE42-LABEL: max_ge_v8i32:
630 ; SSE42-NEXT: pmaxsd %xmm2, %xmm0
631 ; SSE42-NEXT: pmaxsd %xmm3, %xmm1
634 ; AVX1-LABEL: max_ge_v8i32:
636 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
637 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
638 ; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
639 ; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
640 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
643 ; AVX2-LABEL: max_ge_v8i32:
645 ; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
648 ; AVX512-LABEL: max_ge_v8i32:
650 ; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
652 %1 = icmp sge <8 x i32> %a, %b
653 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
657 define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) {
658 ; SSE-LABEL: max_ge_v8i16:
660 ; SSE-NEXT: pmaxsw %xmm1, %xmm0
663 ; AVX-LABEL: max_ge_v8i16:
665 ; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
667 %1 = icmp sge <8 x i16> %a, %b
668 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
672 define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) {
673 ; SSE-LABEL: max_ge_v16i16:
675 ; SSE-NEXT: pmaxsw %xmm2, %xmm0
676 ; SSE-NEXT: pmaxsw %xmm3, %xmm1
679 ; AVX1-LABEL: max_ge_v16i16:
681 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
682 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
683 ; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
684 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
685 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
688 ; AVX2-LABEL: max_ge_v16i16:
690 ; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
693 ; AVX512-LABEL: max_ge_v16i16:
695 ; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
697 %1 = icmp sge <16 x i16> %a, %b
698 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
702 define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) {
703 ; SSE2-LABEL: max_ge_v16i8:
705 ; SSE2-NEXT: movdqa %xmm0, %xmm2
706 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
707 ; SSE2-NEXT: pand %xmm2, %xmm0
708 ; SSE2-NEXT: pandn %xmm1, %xmm2
709 ; SSE2-NEXT: por %xmm0, %xmm2
710 ; SSE2-NEXT: movdqa %xmm2, %xmm0
713 ; SSE41-LABEL: max_ge_v16i8:
715 ; SSE41-NEXT: pmaxsb %xmm1, %xmm0
718 ; SSE42-LABEL: max_ge_v16i8:
720 ; SSE42-NEXT: pmaxsb %xmm1, %xmm0
723 ; AVX-LABEL: max_ge_v16i8:
725 ; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
727 %1 = icmp sge <16 x i8> %a, %b
728 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
732 define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) {
733 ; SSE2-LABEL: max_ge_v32i8:
735 ; SSE2-NEXT: movdqa %xmm0, %xmm4
736 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm4
737 ; SSE2-NEXT: pand %xmm4, %xmm0
738 ; SSE2-NEXT: pandn %xmm2, %xmm4
739 ; SSE2-NEXT: por %xmm0, %xmm4
740 ; SSE2-NEXT: movdqa %xmm1, %xmm2
741 ; SSE2-NEXT: pcmpgtb %xmm3, %xmm2
742 ; SSE2-NEXT: pand %xmm2, %xmm1
743 ; SSE2-NEXT: pandn %xmm3, %xmm2
744 ; SSE2-NEXT: por %xmm1, %xmm2
745 ; SSE2-NEXT: movdqa %xmm4, %xmm0
746 ; SSE2-NEXT: movdqa %xmm2, %xmm1
749 ; SSE41-LABEL: max_ge_v32i8:
751 ; SSE41-NEXT: pmaxsb %xmm2, %xmm0
752 ; SSE41-NEXT: pmaxsb %xmm3, %xmm1
755 ; SSE42-LABEL: max_ge_v32i8:
757 ; SSE42-NEXT: pmaxsb %xmm2, %xmm0
758 ; SSE42-NEXT: pmaxsb %xmm3, %xmm1
761 ; AVX1-LABEL: max_ge_v32i8:
763 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
764 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
765 ; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
766 ; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
767 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
770 ; AVX2-LABEL: max_ge_v32i8:
772 ; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
775 ; AVX512-LABEL: max_ge_v32i8:
777 ; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
779 %1 = icmp sge <32 x i8> %a, %b
780 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
785 ; Signed Minimum (LT)
788 define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
789 ; SSE2-LABEL: min_lt_v2i64:
791 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
792 ; SSE2-NEXT: movdqa %xmm0, %xmm3
793 ; SSE2-NEXT: pxor %xmm2, %xmm3
794 ; SSE2-NEXT: pxor %xmm1, %xmm2
795 ; SSE2-NEXT: movdqa %xmm2, %xmm4
796 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
797 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
798 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
799 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
800 ; SSE2-NEXT: pand %xmm5, %xmm2
801 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
802 ; SSE2-NEXT: por %xmm2, %xmm3
803 ; SSE2-NEXT: pand %xmm3, %xmm0
804 ; SSE2-NEXT: pandn %xmm1, %xmm3
805 ; SSE2-NEXT: por %xmm3, %xmm0
808 ; SSE41-LABEL: min_lt_v2i64:
810 ; SSE41-NEXT: movdqa %xmm0, %xmm2
811 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
812 ; SSE41-NEXT: pxor %xmm3, %xmm0
813 ; SSE41-NEXT: pxor %xmm1, %xmm3
814 ; SSE41-NEXT: movdqa %xmm3, %xmm4
815 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
816 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
817 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
818 ; SSE41-NEXT: pand %xmm4, %xmm0
819 ; SSE41-NEXT: por %xmm3, %xmm0
820 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
821 ; SSE41-NEXT: movapd %xmm1, %xmm0
824 ; SSE42-LABEL: min_lt_v2i64:
826 ; SSE42-NEXT: movdqa %xmm0, %xmm2
827 ; SSE42-NEXT: movdqa %xmm1, %xmm0
828 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
829 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
830 ; SSE42-NEXT: movapd %xmm1, %xmm0
833 ; AVX1-LABEL: min_lt_v2i64:
835 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
836 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
839 ; AVX2-LABEL: min_lt_v2i64:
841 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
842 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
845 ; AVX512-LABEL: min_lt_v2i64:
847 ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
848 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
849 ; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
850 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
851 ; AVX512-NEXT: vzeroupper
853 %1 = icmp slt <2 x i64> %a, %b
854 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
858 define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) {
859 ; SSE2-LABEL: min_lt_v4i64:
861 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
862 ; SSE2-NEXT: movdqa %xmm0, %xmm5
863 ; SSE2-NEXT: pxor %xmm4, %xmm5
864 ; SSE2-NEXT: movdqa %xmm2, %xmm6
865 ; SSE2-NEXT: pxor %xmm4, %xmm6
866 ; SSE2-NEXT: movdqa %xmm6, %xmm7
867 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
868 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
869 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
870 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
871 ; SSE2-NEXT: pand %xmm8, %xmm5
872 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
873 ; SSE2-NEXT: por %xmm5, %xmm6
874 ; SSE2-NEXT: pand %xmm6, %xmm0
875 ; SSE2-NEXT: pandn %xmm2, %xmm6
876 ; SSE2-NEXT: por %xmm6, %xmm0
877 ; SSE2-NEXT: movdqa %xmm1, %xmm2
878 ; SSE2-NEXT: pxor %xmm4, %xmm2
879 ; SSE2-NEXT: pxor %xmm3, %xmm4
880 ; SSE2-NEXT: movdqa %xmm4, %xmm5
881 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
882 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
883 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm4
884 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
885 ; SSE2-NEXT: pand %xmm6, %xmm2
886 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
887 ; SSE2-NEXT: por %xmm2, %xmm4
888 ; SSE2-NEXT: pand %xmm4, %xmm1
889 ; SSE2-NEXT: pandn %xmm3, %xmm4
890 ; SSE2-NEXT: por %xmm4, %xmm1
893 ; SSE41-LABEL: min_lt_v4i64:
895 ; SSE41-NEXT: movdqa %xmm0, %xmm4
896 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648]
897 ; SSE41-NEXT: pxor %xmm5, %xmm0
898 ; SSE41-NEXT: movdqa %xmm2, %xmm6
899 ; SSE41-NEXT: pxor %xmm5, %xmm6
900 ; SSE41-NEXT: movdqa %xmm6, %xmm7
901 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
902 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
903 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
904 ; SSE41-NEXT: pand %xmm7, %xmm0
905 ; SSE41-NEXT: por %xmm6, %xmm0
906 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
907 ; SSE41-NEXT: movdqa %xmm1, %xmm0
908 ; SSE41-NEXT: pxor %xmm5, %xmm0
909 ; SSE41-NEXT: pxor %xmm3, %xmm5
910 ; SSE41-NEXT: movdqa %xmm5, %xmm4
911 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
912 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
913 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
914 ; SSE41-NEXT: pand %xmm4, %xmm0
915 ; SSE41-NEXT: por %xmm5, %xmm0
916 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
917 ; SSE41-NEXT: movapd %xmm2, %xmm0
918 ; SSE41-NEXT: movapd %xmm3, %xmm1
921 ; SSE42-LABEL: min_lt_v4i64:
923 ; SSE42-NEXT: movdqa %xmm0, %xmm4
924 ; SSE42-NEXT: movdqa %xmm2, %xmm0
925 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm0
926 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
927 ; SSE42-NEXT: movdqa %xmm3, %xmm0
928 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
929 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
930 ; SSE42-NEXT: movapd %xmm2, %xmm0
931 ; SSE42-NEXT: movapd %xmm3, %xmm1
934 ; AVX1-LABEL: min_lt_v4i64:
936 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
937 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
938 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
939 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
940 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
941 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
944 ; AVX2-LABEL: min_lt_v4i64:
946 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
947 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
950 ; AVX512-LABEL: min_lt_v4i64:
952 ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
953 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
954 ; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
955 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
957 %1 = icmp slt <4 x i64> %a, %b
958 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
962 define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) {
963 ; SSE2-LABEL: min_lt_v4i32:
965 ; SSE2-NEXT: movdqa %xmm1, %xmm2
966 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
967 ; SSE2-NEXT: pand %xmm2, %xmm0
968 ; SSE2-NEXT: pandn %xmm1, %xmm2
969 ; SSE2-NEXT: por %xmm2, %xmm0
972 ; SSE41-LABEL: min_lt_v4i32:
974 ; SSE41-NEXT: pminsd %xmm1, %xmm0
977 ; SSE42-LABEL: min_lt_v4i32:
979 ; SSE42-NEXT: pminsd %xmm1, %xmm0
982 ; AVX-LABEL: min_lt_v4i32:
984 ; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
986 %1 = icmp slt <4 x i32> %a, %b
987 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
991 define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) {
992 ; SSE2-LABEL: min_lt_v8i32:
994 ; SSE2-NEXT: movdqa %xmm2, %xmm4
995 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
996 ; SSE2-NEXT: pand %xmm4, %xmm0
997 ; SSE2-NEXT: pandn %xmm2, %xmm4
998 ; SSE2-NEXT: por %xmm4, %xmm0
999 ; SSE2-NEXT: movdqa %xmm3, %xmm2
1000 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1001 ; SSE2-NEXT: pand %xmm2, %xmm1
1002 ; SSE2-NEXT: pandn %xmm3, %xmm2
1003 ; SSE2-NEXT: por %xmm2, %xmm1
1006 ; SSE41-LABEL: min_lt_v8i32:
1008 ; SSE41-NEXT: pminsd %xmm2, %xmm0
1009 ; SSE41-NEXT: pminsd %xmm3, %xmm1
1012 ; SSE42-LABEL: min_lt_v8i32:
1014 ; SSE42-NEXT: pminsd %xmm2, %xmm0
1015 ; SSE42-NEXT: pminsd %xmm3, %xmm1
1018 ; AVX1-LABEL: min_lt_v8i32:
1020 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1021 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1022 ; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
1023 ; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1024 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1027 ; AVX2-LABEL: min_lt_v8i32:
1029 ; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1032 ; AVX512-LABEL: min_lt_v8i32:
1034 ; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1036 %1 = icmp slt <8 x i32> %a, %b
1037 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1041 define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) {
1042 ; SSE-LABEL: min_lt_v8i16:
1044 ; SSE-NEXT: pminsw %xmm1, %xmm0
1047 ; AVX-LABEL: min_lt_v8i16:
1049 ; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1051 %1 = icmp slt <8 x i16> %a, %b
1052 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1056 define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) {
1057 ; SSE-LABEL: min_lt_v16i16:
1059 ; SSE-NEXT: pminsw %xmm2, %xmm0
1060 ; SSE-NEXT: pminsw %xmm3, %xmm1
1063 ; AVX1-LABEL: min_lt_v16i16:
1065 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1066 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1067 ; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
1068 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1069 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1072 ; AVX2-LABEL: min_lt_v16i16:
1074 ; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1077 ; AVX512-LABEL: min_lt_v16i16:
1079 ; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1081 %1 = icmp slt <16 x i16> %a, %b
1082 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1086 define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) {
1087 ; SSE2-LABEL: min_lt_v16i8:
1089 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1090 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm2
1091 ; SSE2-NEXT: pand %xmm2, %xmm0
1092 ; SSE2-NEXT: pandn %xmm1, %xmm2
1093 ; SSE2-NEXT: por %xmm2, %xmm0
1096 ; SSE41-LABEL: min_lt_v16i8:
1098 ; SSE41-NEXT: pminsb %xmm1, %xmm0
1101 ; SSE42-LABEL: min_lt_v16i8:
1103 ; SSE42-NEXT: pminsb %xmm1, %xmm0
1106 ; AVX-LABEL: min_lt_v16i8:
1108 ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1110 %1 = icmp slt <16 x i8> %a, %b
1111 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1115 define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) {
1116 ; SSE2-LABEL: min_lt_v32i8:
1118 ; SSE2-NEXT: movdqa %xmm2, %xmm4
1119 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm4
1120 ; SSE2-NEXT: pand %xmm4, %xmm0
1121 ; SSE2-NEXT: pandn %xmm2, %xmm4
1122 ; SSE2-NEXT: por %xmm4, %xmm0
1123 ; SSE2-NEXT: movdqa %xmm3, %xmm2
1124 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1125 ; SSE2-NEXT: pand %xmm2, %xmm1
1126 ; SSE2-NEXT: pandn %xmm3, %xmm2
1127 ; SSE2-NEXT: por %xmm2, %xmm1
1130 ; SSE41-LABEL: min_lt_v32i8:
1132 ; SSE41-NEXT: pminsb %xmm2, %xmm0
1133 ; SSE41-NEXT: pminsb %xmm3, %xmm1
1136 ; SSE42-LABEL: min_lt_v32i8:
1138 ; SSE42-NEXT: pminsb %xmm2, %xmm0
1139 ; SSE42-NEXT: pminsb %xmm3, %xmm1
1142 ; AVX1-LABEL: min_lt_v32i8:
1144 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1145 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1146 ; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
1147 ; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1148 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1151 ; AVX2-LABEL: min_lt_v32i8:
1153 ; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1156 ; AVX512-LABEL: min_lt_v32i8:
1158 ; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1160 %1 = icmp slt <32 x i8> %a, %b
1161 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1166 ; Signed Minimum (LE)
1169 define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
1170 ; SSE2-LABEL: min_le_v2i64:
1172 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
1173 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1174 ; SSE2-NEXT: pxor %xmm2, %xmm3
1175 ; SSE2-NEXT: pxor %xmm1, %xmm2
1176 ; SSE2-NEXT: movdqa %xmm2, %xmm4
1177 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1178 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1179 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
1180 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1181 ; SSE2-NEXT: pand %xmm5, %xmm2
1182 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1183 ; SSE2-NEXT: por %xmm2, %xmm3
1184 ; SSE2-NEXT: pand %xmm3, %xmm0
1185 ; SSE2-NEXT: pandn %xmm1, %xmm3
1186 ; SSE2-NEXT: por %xmm3, %xmm0
1189 ; SSE41-LABEL: min_le_v2i64:
1191 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1192 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648]
1193 ; SSE41-NEXT: pxor %xmm3, %xmm0
1194 ; SSE41-NEXT: pxor %xmm1, %xmm3
1195 ; SSE41-NEXT: movdqa %xmm3, %xmm4
1196 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
1197 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
1198 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1199 ; SSE41-NEXT: pand %xmm4, %xmm0
1200 ; SSE41-NEXT: por %xmm3, %xmm0
1201 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
1202 ; SSE41-NEXT: movapd %xmm1, %xmm0
1205 ; SSE42-LABEL: min_le_v2i64:
1207 ; SSE42-NEXT: movdqa %xmm0, %xmm2
1208 ; SSE42-NEXT: movdqa %xmm1, %xmm0
1209 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
1210 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
1211 ; SSE42-NEXT: movapd %xmm1, %xmm0
1214 ; AVX1-LABEL: min_le_v2i64:
1216 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1217 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1220 ; AVX2-LABEL: min_le_v2i64:
1222 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1223 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1226 ; AVX512-LABEL: min_le_v2i64:
1228 ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1229 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1230 ; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1231 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1232 ; AVX512-NEXT: vzeroupper
1234 %1 = icmp sle <2 x i64> %a, %b
1235 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
1239 define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) {
1240 ; SSE2-LABEL: min_le_v4i64:
1242 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
1243 ; SSE2-NEXT: movdqa %xmm0, %xmm5
1244 ; SSE2-NEXT: pxor %xmm4, %xmm5
1245 ; SSE2-NEXT: movdqa %xmm2, %xmm6
1246 ; SSE2-NEXT: pxor %xmm4, %xmm6
1247 ; SSE2-NEXT: movdqa %xmm6, %xmm7
1248 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1249 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1250 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
1251 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
1252 ; SSE2-NEXT: pand %xmm8, %xmm5
1253 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
1254 ; SSE2-NEXT: por %xmm5, %xmm6
1255 ; SSE2-NEXT: pand %xmm6, %xmm0
1256 ; SSE2-NEXT: pandn %xmm2, %xmm6
1257 ; SSE2-NEXT: por %xmm6, %xmm0
1258 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1259 ; SSE2-NEXT: pxor %xmm4, %xmm2
1260 ; SSE2-NEXT: pxor %xmm3, %xmm4
1261 ; SSE2-NEXT: movdqa %xmm4, %xmm5
1262 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
1263 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1264 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm4
1265 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1266 ; SSE2-NEXT: pand %xmm6, %xmm2
1267 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1268 ; SSE2-NEXT: por %xmm2, %xmm4
1269 ; SSE2-NEXT: pand %xmm4, %xmm1
1270 ; SSE2-NEXT: pandn %xmm3, %xmm4
1271 ; SSE2-NEXT: por %xmm4, %xmm1
1274 ; SSE41-LABEL: min_le_v4i64:
1276 ; SSE41-NEXT: movdqa %xmm0, %xmm4
1277 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648]
1278 ; SSE41-NEXT: pxor %xmm5, %xmm0
1279 ; SSE41-NEXT: movdqa %xmm2, %xmm6
1280 ; SSE41-NEXT: pxor %xmm5, %xmm6
1281 ; SSE41-NEXT: movdqa %xmm6, %xmm7
1282 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
1283 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
1284 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
1285 ; SSE41-NEXT: pand %xmm7, %xmm0
1286 ; SSE41-NEXT: por %xmm6, %xmm0
1287 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1288 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1289 ; SSE41-NEXT: pxor %xmm5, %xmm0
1290 ; SSE41-NEXT: pxor %xmm3, %xmm5
1291 ; SSE41-NEXT: movdqa %xmm5, %xmm4
1292 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
1293 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
1294 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
1295 ; SSE41-NEXT: pand %xmm4, %xmm0
1296 ; SSE41-NEXT: por %xmm5, %xmm0
1297 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1298 ; SSE41-NEXT: movapd %xmm2, %xmm0
1299 ; SSE41-NEXT: movapd %xmm3, %xmm1
1302 ; SSE42-LABEL: min_le_v4i64:
1304 ; SSE42-NEXT: movdqa %xmm0, %xmm4
1305 ; SSE42-NEXT: movdqa %xmm2, %xmm0
1306 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm0
1307 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1308 ; SSE42-NEXT: movdqa %xmm3, %xmm0
1309 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
1310 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1311 ; SSE42-NEXT: movapd %xmm2, %xmm0
1312 ; SSE42-NEXT: movapd %xmm3, %xmm1
1315 ; AVX1-LABEL: min_le_v4i64:
1317 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1318 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1319 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1320 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
1321 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1322 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1325 ; AVX2-LABEL: min_le_v4i64:
1327 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1328 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1331 ; AVX512-LABEL: min_le_v4i64:
1333 ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1334 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1335 ; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1336 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1338 %1 = icmp sle <4 x i64> %a, %b
1339 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
1343 define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) {
1344 ; SSE2-LABEL: min_le_v4i32:
1346 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1347 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1348 ; SSE2-NEXT: pand %xmm2, %xmm0
1349 ; SSE2-NEXT: pandn %xmm1, %xmm2
1350 ; SSE2-NEXT: por %xmm2, %xmm0
1353 ; SSE41-LABEL: min_le_v4i32:
1355 ; SSE41-NEXT: pminsd %xmm1, %xmm0
1358 ; SSE42-LABEL: min_le_v4i32:
1360 ; SSE42-NEXT: pminsd %xmm1, %xmm0
1363 ; AVX-LABEL: min_le_v4i32:
1365 ; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1367 %1 = icmp sle <4 x i32> %a, %b
1368 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
1372 define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) {
1373 ; SSE2-LABEL: min_le_v8i32:
1375 ; SSE2-NEXT: movdqa %xmm2, %xmm4
1376 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1377 ; SSE2-NEXT: pand %xmm4, %xmm0
1378 ; SSE2-NEXT: pandn %xmm2, %xmm4
1379 ; SSE2-NEXT: por %xmm4, %xmm0
1380 ; SSE2-NEXT: movdqa %xmm3, %xmm2
1381 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1382 ; SSE2-NEXT: pand %xmm2, %xmm1
1383 ; SSE2-NEXT: pandn %xmm3, %xmm2
1384 ; SSE2-NEXT: por %xmm2, %xmm1
1387 ; SSE41-LABEL: min_le_v8i32:
1389 ; SSE41-NEXT: pminsd %xmm2, %xmm0
1390 ; SSE41-NEXT: pminsd %xmm3, %xmm1
1393 ; SSE42-LABEL: min_le_v8i32:
1395 ; SSE42-NEXT: pminsd %xmm2, %xmm0
1396 ; SSE42-NEXT: pminsd %xmm3, %xmm1
1399 ; AVX1-LABEL: min_le_v8i32:
1401 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1402 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1403 ; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
1404 ; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1405 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1408 ; AVX2-LABEL: min_le_v8i32:
1410 ; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1413 ; AVX512-LABEL: min_le_v8i32:
1415 ; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1417 %1 = icmp sle <8 x i32> %a, %b
1418 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1422 define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) {
1423 ; SSE-LABEL: min_le_v8i16:
1425 ; SSE-NEXT: pminsw %xmm1, %xmm0
1428 ; AVX-LABEL: min_le_v8i16:
1430 ; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1432 %1 = icmp sle <8 x i16> %a, %b
1433 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1437 define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) {
1438 ; SSE-LABEL: min_le_v16i16:
1440 ; SSE-NEXT: pminsw %xmm2, %xmm0
1441 ; SSE-NEXT: pminsw %xmm3, %xmm1
1444 ; AVX1-LABEL: min_le_v16i16:
1446 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1447 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1448 ; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
1449 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1450 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1453 ; AVX2-LABEL: min_le_v16i16:
1455 ; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1458 ; AVX512-LABEL: min_le_v16i16:
1460 ; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1462 %1 = icmp sle <16 x i16> %a, %b
1463 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1467 define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) {
1468 ; SSE2-LABEL: min_le_v16i8:
1470 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1471 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm2
1472 ; SSE2-NEXT: pand %xmm2, %xmm0
1473 ; SSE2-NEXT: pandn %xmm1, %xmm2
1474 ; SSE2-NEXT: por %xmm2, %xmm0
1477 ; SSE41-LABEL: min_le_v16i8:
1479 ; SSE41-NEXT: pminsb %xmm1, %xmm0
1482 ; SSE42-LABEL: min_le_v16i8:
1484 ; SSE42-NEXT: pminsb %xmm1, %xmm0
1487 ; AVX-LABEL: min_le_v16i8:
1489 ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1491 %1 = icmp sle <16 x i8> %a, %b
1492 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1496 define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) {
1497 ; SSE2-LABEL: min_le_v32i8:
1499 ; SSE2-NEXT: movdqa %xmm2, %xmm4
1500 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm4
1501 ; SSE2-NEXT: pand %xmm4, %xmm0
1502 ; SSE2-NEXT: pandn %xmm2, %xmm4
1503 ; SSE2-NEXT: por %xmm4, %xmm0
1504 ; SSE2-NEXT: movdqa %xmm3, %xmm2
1505 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1506 ; SSE2-NEXT: pand %xmm2, %xmm1
1507 ; SSE2-NEXT: pandn %xmm3, %xmm2
1508 ; SSE2-NEXT: por %xmm2, %xmm1
1511 ; SSE41-LABEL: min_le_v32i8:
1513 ; SSE41-NEXT: pminsb %xmm2, %xmm0
1514 ; SSE41-NEXT: pminsb %xmm3, %xmm1
1517 ; SSE42-LABEL: min_le_v32i8:
1519 ; SSE42-NEXT: pminsb %xmm2, %xmm0
1520 ; SSE42-NEXT: pminsb %xmm3, %xmm1
1523 ; AVX1-LABEL: min_le_v32i8:
1525 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1526 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1527 ; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
1528 ; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1529 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1532 ; AVX2-LABEL: min_le_v32i8:
1534 ; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1537 ; AVX512-LABEL: min_le_v32i8:
1539 ; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1541 %1 = icmp sle <32 x i8> %a, %b
1542 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1550 define <2 x i64> @max_gt_v2i64c() {
1551 ; SSE-LABEL: max_gt_v2i64c:
1553 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7]
1556 ; AVX-LABEL: max_gt_v2i64c:
1558 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1560 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1561 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1562 %3 = icmp sgt <2 x i64> %1, %2
1563 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1567 define <4 x i64> @max_gt_v4i64c() {
1568 ; SSE-LABEL: max_gt_v4i64c:
1570 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7]
1571 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0
1574 ; AVX-LABEL: max_gt_v4i64c:
1576 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1578 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1579 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1580 %3 = icmp sgt <4 x i64> %1, %2
1581 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1585 define <4 x i32> @max_gt_v4i32c() {
1586 ; SSE-LABEL: max_gt_v4i32c:
1588 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1591 ; AVX-LABEL: max_gt_v4i32c:
1593 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1595 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1596 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1597 %3 = icmp sgt <4 x i32> %1, %2
1598 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1602 define <8 x i32> @max_gt_v8i32c() {
1603 ; SSE-LABEL: max_gt_v8i32c:
1605 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
1606 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
1609 ; AVX-LABEL: max_gt_v8i32c:
1611 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1613 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1614 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1615 %3 = icmp sgt <8 x i32> %1, %2
1616 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1620 define <8 x i16> @max_gt_v8i16c() {
1621 ; SSE-LABEL: max_gt_v8i16c:
1623 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1626 ; AVX-LABEL: max_gt_v8i16c:
1628 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1630 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1631 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1632 %3 = icmp sgt <8 x i16> %1, %2
1633 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1637 define <16 x i16> @max_gt_v16i16c() {
1638 ; SSE-LABEL: max_gt_v16i16c:
1640 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
1641 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
1644 ; AVX-LABEL: max_gt_v16i16c:
1646 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
1648 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1649 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
1650 %3 = icmp sgt <16 x i16> %1, %2
1651 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1655 define <16 x i8> @max_gt_v16i8c() {
1656 ; SSE-LABEL: max_gt_v16i8c:
1658 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1661 ; AVX-LABEL: max_gt_v16i8c:
1663 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1665 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
1666 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
1667 %3 = icmp sgt <16 x i8> %1, %2
1668 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1672 define <2 x i64> @max_ge_v2i64c() {
1673 ; SSE-LABEL: max_ge_v2i64c:
1675 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7]
1678 ; AVX-LABEL: max_ge_v2i64c:
1680 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1682 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1683 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1684 %3 = icmp sge <2 x i64> %1, %2
1685 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1689 define <4 x i64> @max_ge_v4i64c() {
1690 ; SSE-LABEL: max_ge_v4i64c:
1692 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7]
1693 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0
1696 ; AVX-LABEL: max_ge_v4i64c:
1698 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1700 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1701 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1702 %3 = icmp sge <4 x i64> %1, %2
1703 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1707 define <4 x i32> @max_ge_v4i32c() {
1708 ; SSE-LABEL: max_ge_v4i32c:
1710 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1713 ; AVX-LABEL: max_ge_v4i32c:
1715 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1717 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1718 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1719 %3 = icmp sge <4 x i32> %1, %2
1720 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1724 define <8 x i32> @max_ge_v8i32c() {
1725 ; SSE-LABEL: max_ge_v8i32c:
1727 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
1728 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
1731 ; AVX-LABEL: max_ge_v8i32c:
1733 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1735 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1736 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1737 %3 = icmp sge <8 x i32> %1, %2
1738 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1742 define <8 x i16> @max_ge_v8i16c() {
1743 ; SSE-LABEL: max_ge_v8i16c:
1745 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1748 ; AVX-LABEL: max_ge_v8i16c:
1750 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1752 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1753 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1754 %3 = icmp sge <8 x i16> %1, %2
1755 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1759 define <16 x i16> @max_ge_v16i16c() {
1760 ; SSE-LABEL: max_ge_v16i16c:
1762 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
1763 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
1766 ; AVX-LABEL: max_ge_v16i16c:
1768 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
1770 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1771 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
1772 %3 = icmp sge <16 x i16> %1, %2
1773 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1777 define <16 x i8> @max_ge_v16i8c() {
1778 ; SSE-LABEL: max_ge_v16i8c:
1780 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1783 ; AVX-LABEL: max_ge_v16i8c:
1785 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1787 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
1788 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
1789 %3 = icmp sge <16 x i8> %1, %2
1790 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1794 define <2 x i64> @min_lt_v2i64c() {
1795 ; SSE-LABEL: min_lt_v2i64c:
1797 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1]
1800 ; AVX-LABEL: min_lt_v2i64c:
1802 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
1804 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1805 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1806 %3 = icmp slt <2 x i64> %1, %2
1807 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1811 define <4 x i64> @min_lt_v4i64c() {
1812 ; SSE-LABEL: min_lt_v4i64c:
1814 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
1815 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1]
1818 ; AVX-LABEL: min_lt_v4i64c:
1820 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
1822 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1823 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1824 %3 = icmp slt <4 x i64> %1, %2
1825 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1829 define <4 x i32> @min_lt_v4i32c() {
1830 ; SSE-LABEL: min_lt_v4i32c:
1832 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
1835 ; AVX-LABEL: min_lt_v4i32c:
1837 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
1839 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1840 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1841 %3 = icmp slt <4 x i32> %1, %2
1842 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1846 define <8 x i32> @min_lt_v8i32c() {
1847 ; SSE-LABEL: min_lt_v8i32c:
1849 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
1850 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
1853 ; AVX-LABEL: min_lt_v8i32c:
1855 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
1857 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1858 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1859 %3 = icmp slt <8 x i32> %1, %2
1860 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1864 define <8 x i16> @min_lt_v8i16c() {
1865 ; SSE-LABEL: min_lt_v8i16c:
1867 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
1870 ; AVX-LABEL: min_lt_v8i16c:
1872 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
1874 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1875 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1876 %3 = icmp slt <8 x i16> %1, %2
1877 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1881 define <16 x i16> @min_lt_v16i16c() {
1882 ; SSE-LABEL: min_lt_v16i16c:
1884 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
1885 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
1888 ; AVX-LABEL: min_lt_v16i16c:
1890 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
1892 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1893 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
1894 %3 = icmp slt <16 x i16> %1, %2
1895 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1899 define <16 x i8> @min_lt_v16i8c() {
1900 ; SSE-LABEL: min_lt_v16i8c:
1902 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
1905 ; AVX-LABEL: min_lt_v16i8c:
1907 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
1909 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
1910 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
1911 %3 = icmp slt <16 x i8> %1, %2
1912 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1916 define <2 x i64> @min_le_v2i64c() {
1917 ; SSE-LABEL: min_le_v2i64c:
1919 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1]
1922 ; AVX-LABEL: min_le_v2i64c:
1924 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
1926 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1927 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1928 %3 = icmp sle <2 x i64> %1, %2
1929 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1933 define <4 x i64> @min_le_v4i64c() {
1934 ; SSE-LABEL: min_le_v4i64c:
1936 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
1937 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1]
1940 ; AVX-LABEL: min_le_v4i64c:
1942 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
1944 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1945 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1946 %3 = icmp sle <4 x i64> %1, %2
1947 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1951 define <4 x i32> @min_le_v4i32c() {
1952 ; SSE-LABEL: min_le_v4i32c:
1954 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
1957 ; AVX-LABEL: min_le_v4i32c:
1959 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
1961 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1962 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1963 %3 = icmp sle <4 x i32> %1, %2
1964 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1968 define <8 x i32> @min_le_v8i32c() {
1969 ; SSE-LABEL: min_le_v8i32c:
1971 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
1972 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
1975 ; AVX-LABEL: min_le_v8i32c:
1977 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
1979 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1980 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1981 %3 = icmp sle <8 x i32> %1, %2
1982 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1986 define <8 x i16> @min_le_v8i16c() {
1987 ; SSE-LABEL: min_le_v8i16c:
1989 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
1992 ; AVX-LABEL: min_le_v8i16c:
1994 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
1996 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1997 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1998 %3 = icmp sle <8 x i16> %1, %2
1999 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2003 define <16 x i16> @min_le_v16i16c() {
2004 ; SSE-LABEL: min_le_v16i16c:
2006 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
2007 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
2010 ; AVX-LABEL: min_le_v16i16c:
2012 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
2014 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
2015 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
2016 %3 = icmp sle <16 x i16> %1, %2
2017 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2021 define <16 x i8> @min_le_v16i8c() {
2022 ; SSE-LABEL: min_le_v16i8c:
2024 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2027 ; AVX-LABEL: min_le_v16i8c:
2029 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2031 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
2032 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
2033 %3 = icmp sle <16 x i8> %1, %2
2034 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2