1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
14 define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
15 ; SSE2-LABEL: max_gt_v2i64:
17 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
18 ; SSE2-NEXT: movdqa %xmm1, %xmm3
19 ; SSE2-NEXT: pxor %xmm2, %xmm3
20 ; SSE2-NEXT: pxor %xmm0, %xmm2
21 ; SSE2-NEXT: movdqa %xmm2, %xmm4
22 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
23 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
24 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
25 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
26 ; SSE2-NEXT: pand %xmm5, %xmm2
27 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
28 ; SSE2-NEXT: por %xmm2, %xmm3
29 ; SSE2-NEXT: pand %xmm3, %xmm0
30 ; SSE2-NEXT: pandn %xmm1, %xmm3
31 ; SSE2-NEXT: por %xmm3, %xmm0
34 ; SSE41-LABEL: max_gt_v2i64:
36 ; SSE41-NEXT: movdqa %xmm0, %xmm2
37 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
38 ; SSE41-NEXT: movdqa %xmm1, %xmm3
39 ; SSE41-NEXT: pxor %xmm0, %xmm3
40 ; SSE41-NEXT: pxor %xmm2, %xmm0
41 ; SSE41-NEXT: movdqa %xmm0, %xmm4
42 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
43 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
44 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
45 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
46 ; SSE41-NEXT: pand %xmm5, %xmm3
47 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
48 ; SSE41-NEXT: por %xmm3, %xmm0
49 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
50 ; SSE41-NEXT: movapd %xmm1, %xmm0
53 ; SSE42-LABEL: max_gt_v2i64:
55 ; SSE42-NEXT: movdqa %xmm0, %xmm2
56 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
57 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
58 ; SSE42-NEXT: movapd %xmm1, %xmm0
61 ; AVX1-LABEL: max_gt_v2i64:
63 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
64 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
67 ; AVX2-LABEL: max_gt_v2i64:
69 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
70 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
73 ; AVX512-LABEL: max_gt_v2i64:
75 ; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
76 ; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
77 ; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
78 ; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
79 ; AVX512-NEXT: vzeroupper
81 %1 = icmp sgt <2 x i64> %a, %b
82 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
86 define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
87 ; SSE2-LABEL: max_gt_v4i64:
89 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
90 ; SSE2-NEXT: movdqa %xmm3, %xmm5
91 ; SSE2-NEXT: pxor %xmm4, %xmm5
92 ; SSE2-NEXT: movdqa %xmm1, %xmm6
93 ; SSE2-NEXT: pxor %xmm4, %xmm6
94 ; SSE2-NEXT: movdqa %xmm6, %xmm7
95 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
96 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
97 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
98 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
99 ; SSE2-NEXT: pand %xmm8, %xmm5
100 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
101 ; SSE2-NEXT: por %xmm5, %xmm6
102 ; SSE2-NEXT: movdqa %xmm2, %xmm5
103 ; SSE2-NEXT: pxor %xmm4, %xmm5
104 ; SSE2-NEXT: pxor %xmm0, %xmm4
105 ; SSE2-NEXT: movdqa %xmm4, %xmm7
106 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
107 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
108 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm4
109 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
110 ; SSE2-NEXT: pand %xmm8, %xmm4
111 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
112 ; SSE2-NEXT: por %xmm4, %xmm5
113 ; SSE2-NEXT: pand %xmm5, %xmm0
114 ; SSE2-NEXT: pandn %xmm2, %xmm5
115 ; SSE2-NEXT: por %xmm5, %xmm0
116 ; SSE2-NEXT: pand %xmm6, %xmm1
117 ; SSE2-NEXT: pandn %xmm3, %xmm6
118 ; SSE2-NEXT: por %xmm6, %xmm1
121 ; SSE41-LABEL: max_gt_v4i64:
123 ; SSE41-NEXT: movdqa %xmm0, %xmm8
124 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
125 ; SSE41-NEXT: movdqa %xmm3, %xmm5
126 ; SSE41-NEXT: pxor %xmm0, %xmm5
127 ; SSE41-NEXT: movdqa %xmm1, %xmm6
128 ; SSE41-NEXT: pxor %xmm0, %xmm6
129 ; SSE41-NEXT: movdqa %xmm6, %xmm7
130 ; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
131 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
132 ; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
133 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
134 ; SSE41-NEXT: pand %xmm4, %xmm6
135 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
136 ; SSE41-NEXT: por %xmm6, %xmm5
137 ; SSE41-NEXT: movdqa %xmm2, %xmm4
138 ; SSE41-NEXT: pxor %xmm0, %xmm4
139 ; SSE41-NEXT: pxor %xmm8, %xmm0
140 ; SSE41-NEXT: movdqa %xmm0, %xmm6
141 ; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
142 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
143 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
144 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
145 ; SSE41-NEXT: pand %xmm7, %xmm4
146 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
147 ; SSE41-NEXT: por %xmm4, %xmm0
148 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2
149 ; SSE41-NEXT: movdqa %xmm5, %xmm0
150 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
151 ; SSE41-NEXT: movapd %xmm2, %xmm0
152 ; SSE41-NEXT: movapd %xmm3, %xmm1
155 ; SSE42-LABEL: max_gt_v4i64:
157 ; SSE42-NEXT: movdqa %xmm0, %xmm4
158 ; SSE42-NEXT: movdqa %xmm1, %xmm5
159 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm5
160 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
161 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
162 ; SSE42-NEXT: movdqa %xmm5, %xmm0
163 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
164 ; SSE42-NEXT: movapd %xmm2, %xmm0
165 ; SSE42-NEXT: movapd %xmm3, %xmm1
168 ; AVX1-LABEL: max_gt_v4i64:
170 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
171 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
172 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
173 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
174 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
175 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
178 ; AVX2-LABEL: max_gt_v4i64:
180 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
181 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
184 ; AVX512-LABEL: max_gt_v4i64:
186 ; AVX512-NEXT: # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
187 ; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
188 ; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
189 ; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
191 %1 = icmp sgt <4 x i64> %a, %b
192 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
196 define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) {
197 ; SSE2-LABEL: max_gt_v4i32:
199 ; SSE2-NEXT: movdqa %xmm0, %xmm2
200 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
201 ; SSE2-NEXT: pand %xmm2, %xmm0
202 ; SSE2-NEXT: pandn %xmm1, %xmm2
203 ; SSE2-NEXT: por %xmm0, %xmm2
204 ; SSE2-NEXT: movdqa %xmm2, %xmm0
207 ; SSE41-LABEL: max_gt_v4i32:
209 ; SSE41-NEXT: pmaxsd %xmm1, %xmm0
212 ; SSE42-LABEL: max_gt_v4i32:
214 ; SSE42-NEXT: pmaxsd %xmm1, %xmm0
217 ; AVX-LABEL: max_gt_v4i32:
219 ; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
221 %1 = icmp sgt <4 x i32> %a, %b
222 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
226 define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) {
227 ; SSE2-LABEL: max_gt_v8i32:
229 ; SSE2-NEXT: movdqa %xmm1, %xmm4
230 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
231 ; SSE2-NEXT: movdqa %xmm0, %xmm5
232 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
233 ; SSE2-NEXT: pand %xmm5, %xmm0
234 ; SSE2-NEXT: pandn %xmm2, %xmm5
235 ; SSE2-NEXT: por %xmm0, %xmm5
236 ; SSE2-NEXT: pand %xmm4, %xmm1
237 ; SSE2-NEXT: pandn %xmm3, %xmm4
238 ; SSE2-NEXT: por %xmm1, %xmm4
239 ; SSE2-NEXT: movdqa %xmm5, %xmm0
240 ; SSE2-NEXT: movdqa %xmm4, %xmm1
243 ; SSE41-LABEL: max_gt_v8i32:
245 ; SSE41-NEXT: pmaxsd %xmm2, %xmm0
246 ; SSE41-NEXT: pmaxsd %xmm3, %xmm1
249 ; SSE42-LABEL: max_gt_v8i32:
251 ; SSE42-NEXT: pmaxsd %xmm2, %xmm0
252 ; SSE42-NEXT: pmaxsd %xmm3, %xmm1
255 ; AVX1-LABEL: max_gt_v8i32:
257 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
258 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
259 ; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
260 ; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
261 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
264 ; AVX2-LABEL: max_gt_v8i32:
266 ; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
269 ; AVX512-LABEL: max_gt_v8i32:
271 ; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
273 %1 = icmp sgt <8 x i32> %a, %b
274 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
278 define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) {
279 ; SSE-LABEL: max_gt_v8i16:
281 ; SSE-NEXT: pmaxsw %xmm1, %xmm0
284 ; AVX-LABEL: max_gt_v8i16:
286 ; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
288 %1 = icmp sgt <8 x i16> %a, %b
289 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
293 define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) {
294 ; SSE-LABEL: max_gt_v16i16:
296 ; SSE-NEXT: pmaxsw %xmm2, %xmm0
297 ; SSE-NEXT: pmaxsw %xmm3, %xmm1
300 ; AVX1-LABEL: max_gt_v16i16:
302 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
303 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
304 ; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
305 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
306 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
309 ; AVX2-LABEL: max_gt_v16i16:
311 ; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
314 ; AVX512-LABEL: max_gt_v16i16:
316 ; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
318 %1 = icmp sgt <16 x i16> %a, %b
319 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
323 define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) {
324 ; SSE2-LABEL: max_gt_v16i8:
326 ; SSE2-NEXT: movdqa %xmm0, %xmm2
327 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
328 ; SSE2-NEXT: pand %xmm2, %xmm0
329 ; SSE2-NEXT: pandn %xmm1, %xmm2
330 ; SSE2-NEXT: por %xmm0, %xmm2
331 ; SSE2-NEXT: movdqa %xmm2, %xmm0
334 ; SSE41-LABEL: max_gt_v16i8:
336 ; SSE41-NEXT: pmaxsb %xmm1, %xmm0
339 ; SSE42-LABEL: max_gt_v16i8:
341 ; SSE42-NEXT: pmaxsb %xmm1, %xmm0
344 ; AVX-LABEL: max_gt_v16i8:
346 ; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
348 %1 = icmp sgt <16 x i8> %a, %b
349 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
353 define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) {
354 ; SSE2-LABEL: max_gt_v32i8:
356 ; SSE2-NEXT: movdqa %xmm1, %xmm4
357 ; SSE2-NEXT: pcmpgtb %xmm3, %xmm4
358 ; SSE2-NEXT: movdqa %xmm0, %xmm5
359 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm5
360 ; SSE2-NEXT: pand %xmm5, %xmm0
361 ; SSE2-NEXT: pandn %xmm2, %xmm5
362 ; SSE2-NEXT: por %xmm0, %xmm5
363 ; SSE2-NEXT: pand %xmm4, %xmm1
364 ; SSE2-NEXT: pandn %xmm3, %xmm4
365 ; SSE2-NEXT: por %xmm1, %xmm4
366 ; SSE2-NEXT: movdqa %xmm5, %xmm0
367 ; SSE2-NEXT: movdqa %xmm4, %xmm1
370 ; SSE41-LABEL: max_gt_v32i8:
372 ; SSE41-NEXT: pmaxsb %xmm2, %xmm0
373 ; SSE41-NEXT: pmaxsb %xmm3, %xmm1
376 ; SSE42-LABEL: max_gt_v32i8:
378 ; SSE42-NEXT: pmaxsb %xmm2, %xmm0
379 ; SSE42-NEXT: pmaxsb %xmm3, %xmm1
382 ; AVX1-LABEL: max_gt_v32i8:
384 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
385 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
386 ; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
387 ; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
388 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
391 ; AVX2-LABEL: max_gt_v32i8:
393 ; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
396 ; AVX512-LABEL: max_gt_v32i8:
398 ; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
400 %1 = icmp sgt <32 x i8> %a, %b
401 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
406 ; Signed Maximum (GE)
409 define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
410 ; SSE2-LABEL: max_ge_v2i64:
412 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
413 ; SSE2-NEXT: movdqa %xmm0, %xmm3
414 ; SSE2-NEXT: pxor %xmm2, %xmm3
415 ; SSE2-NEXT: pxor %xmm1, %xmm2
416 ; SSE2-NEXT: movdqa %xmm2, %xmm4
417 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
418 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
419 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
420 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
421 ; SSE2-NEXT: pand %xmm5, %xmm2
422 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
423 ; SSE2-NEXT: por %xmm2, %xmm3
424 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
425 ; SSE2-NEXT: pxor %xmm3, %xmm2
426 ; SSE2-NEXT: pandn %xmm0, %xmm3
427 ; SSE2-NEXT: pandn %xmm1, %xmm2
428 ; SSE2-NEXT: por %xmm3, %xmm2
429 ; SSE2-NEXT: movdqa %xmm2, %xmm0
432 ; SSE41-LABEL: max_ge_v2i64:
434 ; SSE41-NEXT: movdqa %xmm0, %xmm2
435 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
436 ; SSE41-NEXT: movdqa %xmm2, %xmm3
437 ; SSE41-NEXT: pxor %xmm0, %xmm3
438 ; SSE41-NEXT: pxor %xmm1, %xmm0
439 ; SSE41-NEXT: movdqa %xmm0, %xmm4
440 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
441 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
442 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
443 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
444 ; SSE41-NEXT: pand %xmm5, %xmm0
445 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
446 ; SSE41-NEXT: por %xmm0, %xmm3
447 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
448 ; SSE41-NEXT: pxor %xmm3, %xmm0
449 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
450 ; SSE41-NEXT: movapd %xmm1, %xmm0
453 ; SSE42-LABEL: max_ge_v2i64:
455 ; SSE42-NEXT: movdqa %xmm0, %xmm2
456 ; SSE42-NEXT: movdqa %xmm1, %xmm3
457 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm3
458 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
459 ; SSE42-NEXT: pxor %xmm3, %xmm0
460 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
461 ; SSE42-NEXT: movapd %xmm1, %xmm0
464 ; AVX1-LABEL: max_ge_v2i64:
466 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
467 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
468 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
469 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
472 ; AVX2-LABEL: max_ge_v2i64:
474 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
475 ; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
476 ; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
477 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
480 ; AVX512-LABEL: max_ge_v2i64:
482 ; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
483 ; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
484 ; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
485 ; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
486 ; AVX512-NEXT: vzeroupper
488 %1 = icmp sge <2 x i64> %a, %b
489 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
493 define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
494 ; SSE2-LABEL: max_ge_v4i64:
496 ; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
497 ; SSE2-NEXT: movdqa %xmm1, %xmm4
498 ; SSE2-NEXT: pxor %xmm7, %xmm4
499 ; SSE2-NEXT: movdqa %xmm3, %xmm5
500 ; SSE2-NEXT: pxor %xmm7, %xmm5
501 ; SSE2-NEXT: movdqa %xmm5, %xmm6
502 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
503 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
504 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
505 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
506 ; SSE2-NEXT: pand %xmm8, %xmm4
507 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
508 ; SSE2-NEXT: por %xmm4, %xmm8
509 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
510 ; SSE2-NEXT: movdqa %xmm8, %xmm9
511 ; SSE2-NEXT: pxor %xmm4, %xmm9
512 ; SSE2-NEXT: movdqa %xmm0, %xmm6
513 ; SSE2-NEXT: pxor %xmm7, %xmm6
514 ; SSE2-NEXT: pxor %xmm2, %xmm7
515 ; SSE2-NEXT: movdqa %xmm7, %xmm5
516 ; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
517 ; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
518 ; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
519 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
520 ; SSE2-NEXT: pand %xmm10, %xmm6
521 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
522 ; SSE2-NEXT: por %xmm6, %xmm5
523 ; SSE2-NEXT: pxor %xmm5, %xmm4
524 ; SSE2-NEXT: pandn %xmm0, %xmm5
525 ; SSE2-NEXT: pandn %xmm2, %xmm4
526 ; SSE2-NEXT: por %xmm5, %xmm4
527 ; SSE2-NEXT: pandn %xmm1, %xmm8
528 ; SSE2-NEXT: pandn %xmm3, %xmm9
529 ; SSE2-NEXT: por %xmm8, %xmm9
530 ; SSE2-NEXT: movdqa %xmm4, %xmm0
531 ; SSE2-NEXT: movdqa %xmm9, %xmm1
534 ; SSE41-LABEL: max_ge_v4i64:
536 ; SSE41-NEXT: movdqa %xmm0, %xmm8
537 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
538 ; SSE41-NEXT: movdqa %xmm1, %xmm5
539 ; SSE41-NEXT: pxor %xmm0, %xmm5
540 ; SSE41-NEXT: movdqa %xmm3, %xmm6
541 ; SSE41-NEXT: pxor %xmm0, %xmm6
542 ; SSE41-NEXT: movdqa %xmm6, %xmm7
543 ; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
544 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
545 ; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
546 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
547 ; SSE41-NEXT: pand %xmm4, %xmm6
548 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
549 ; SSE41-NEXT: por %xmm6, %xmm5
550 ; SSE41-NEXT: pcmpeqd %xmm9, %xmm9
551 ; SSE41-NEXT: pxor %xmm9, %xmm5
552 ; SSE41-NEXT: movdqa %xmm8, %xmm6
553 ; SSE41-NEXT: pxor %xmm0, %xmm6
554 ; SSE41-NEXT: pxor %xmm2, %xmm0
555 ; SSE41-NEXT: movdqa %xmm0, %xmm7
556 ; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
557 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
558 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
559 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
560 ; SSE41-NEXT: pand %xmm4, %xmm6
561 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
562 ; SSE41-NEXT: por %xmm6, %xmm0
563 ; SSE41-NEXT: pxor %xmm9, %xmm0
564 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2
565 ; SSE41-NEXT: movdqa %xmm5, %xmm0
566 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
567 ; SSE41-NEXT: movapd %xmm2, %xmm0
568 ; SSE41-NEXT: movapd %xmm3, %xmm1
571 ; SSE42-LABEL: max_ge_v4i64:
573 ; SSE42-NEXT: movdqa %xmm0, %xmm4
574 ; SSE42-NEXT: movdqa %xmm3, %xmm5
575 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm5
576 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
577 ; SSE42-NEXT: pxor %xmm0, %xmm5
578 ; SSE42-NEXT: movdqa %xmm2, %xmm6
579 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm6
580 ; SSE42-NEXT: pxor %xmm6, %xmm0
581 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
582 ; SSE42-NEXT: movdqa %xmm5, %xmm0
583 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
584 ; SSE42-NEXT: movapd %xmm2, %xmm0
585 ; SSE42-NEXT: movapd %xmm3, %xmm1
588 ; AVX1-LABEL: max_ge_v4i64:
590 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
591 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
592 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
593 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
594 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
595 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
596 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
597 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
598 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
601 ; AVX2-LABEL: max_ge_v4i64:
603 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
604 ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
605 ; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
606 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
609 ; AVX512-LABEL: max_ge_v4i64:
611 ; AVX512-NEXT: # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
612 ; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
613 ; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
614 ; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
616 %1 = icmp sge <4 x i64> %a, %b
617 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
621 define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) {
622 ; SSE2-LABEL: max_ge_v4i32:
624 ; SSE2-NEXT: movdqa %xmm1, %xmm3
625 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
626 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
627 ; SSE2-NEXT: pxor %xmm3, %xmm2
628 ; SSE2-NEXT: pandn %xmm0, %xmm3
629 ; SSE2-NEXT: pandn %xmm1, %xmm2
630 ; SSE2-NEXT: por %xmm3, %xmm2
631 ; SSE2-NEXT: movdqa %xmm2, %xmm0
634 ; SSE41-LABEL: max_ge_v4i32:
636 ; SSE41-NEXT: pmaxsd %xmm1, %xmm0
639 ; SSE42-LABEL: max_ge_v4i32:
641 ; SSE42-NEXT: pmaxsd %xmm1, %xmm0
644 ; AVX-LABEL: max_ge_v4i32:
646 ; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
648 %1 = icmp sge <4 x i32> %a, %b
649 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
653 define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) {
654 ; SSE2-LABEL: max_ge_v8i32:
656 ; SSE2-NEXT: movdqa %xmm3, %xmm6
657 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm6
658 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
659 ; SSE2-NEXT: movdqa %xmm6, %xmm5
660 ; SSE2-NEXT: pxor %xmm4, %xmm5
661 ; SSE2-NEXT: movdqa %xmm2, %xmm7
662 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm7
663 ; SSE2-NEXT: pxor %xmm7, %xmm4
664 ; SSE2-NEXT: pandn %xmm0, %xmm7
665 ; SSE2-NEXT: pandn %xmm2, %xmm4
666 ; SSE2-NEXT: por %xmm7, %xmm4
667 ; SSE2-NEXT: pandn %xmm1, %xmm6
668 ; SSE2-NEXT: pandn %xmm3, %xmm5
669 ; SSE2-NEXT: por %xmm6, %xmm5
670 ; SSE2-NEXT: movdqa %xmm4, %xmm0
671 ; SSE2-NEXT: movdqa %xmm5, %xmm1
674 ; SSE41-LABEL: max_ge_v8i32:
676 ; SSE41-NEXT: pmaxsd %xmm2, %xmm0
677 ; SSE41-NEXT: pmaxsd %xmm3, %xmm1
680 ; SSE42-LABEL: max_ge_v8i32:
682 ; SSE42-NEXT: pmaxsd %xmm2, %xmm0
683 ; SSE42-NEXT: pmaxsd %xmm3, %xmm1
686 ; AVX1-LABEL: max_ge_v8i32:
688 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
689 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
690 ; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
691 ; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
692 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
695 ; AVX2-LABEL: max_ge_v8i32:
697 ; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
700 ; AVX512-LABEL: max_ge_v8i32:
702 ; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
704 %1 = icmp sge <8 x i32> %a, %b
705 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
709 define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) {
710 ; SSE-LABEL: max_ge_v8i16:
712 ; SSE-NEXT: pmaxsw %xmm1, %xmm0
715 ; AVX-LABEL: max_ge_v8i16:
717 ; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
719 %1 = icmp sge <8 x i16> %a, %b
720 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
724 define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) {
725 ; SSE-LABEL: max_ge_v16i16:
727 ; SSE-NEXT: pmaxsw %xmm2, %xmm0
728 ; SSE-NEXT: pmaxsw %xmm3, %xmm1
731 ; AVX1-LABEL: max_ge_v16i16:
733 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
734 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
735 ; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
736 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
737 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
740 ; AVX2-LABEL: max_ge_v16i16:
742 ; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
745 ; AVX512-LABEL: max_ge_v16i16:
747 ; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
749 %1 = icmp sge <16 x i16> %a, %b
750 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
754 define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) {
755 ; SSE2-LABEL: max_ge_v16i8:
757 ; SSE2-NEXT: movdqa %xmm1, %xmm3
758 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm3
759 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
760 ; SSE2-NEXT: pxor %xmm3, %xmm2
761 ; SSE2-NEXT: pandn %xmm0, %xmm3
762 ; SSE2-NEXT: pandn %xmm1, %xmm2
763 ; SSE2-NEXT: por %xmm3, %xmm2
764 ; SSE2-NEXT: movdqa %xmm2, %xmm0
767 ; SSE41-LABEL: max_ge_v16i8:
769 ; SSE41-NEXT: pmaxsb %xmm1, %xmm0
772 ; SSE42-LABEL: max_ge_v16i8:
774 ; SSE42-NEXT: pmaxsb %xmm1, %xmm0
777 ; AVX-LABEL: max_ge_v16i8:
779 ; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
781 %1 = icmp sge <16 x i8> %a, %b
782 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
786 define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) {
787 ; SSE2-LABEL: max_ge_v32i8:
789 ; SSE2-NEXT: movdqa %xmm3, %xmm6
790 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm6
791 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
792 ; SSE2-NEXT: movdqa %xmm6, %xmm5
793 ; SSE2-NEXT: pxor %xmm4, %xmm5
794 ; SSE2-NEXT: movdqa %xmm2, %xmm7
795 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm7
796 ; SSE2-NEXT: pxor %xmm7, %xmm4
797 ; SSE2-NEXT: pandn %xmm0, %xmm7
798 ; SSE2-NEXT: pandn %xmm2, %xmm4
799 ; SSE2-NEXT: por %xmm7, %xmm4
800 ; SSE2-NEXT: pandn %xmm1, %xmm6
801 ; SSE2-NEXT: pandn %xmm3, %xmm5
802 ; SSE2-NEXT: por %xmm6, %xmm5
803 ; SSE2-NEXT: movdqa %xmm4, %xmm0
804 ; SSE2-NEXT: movdqa %xmm5, %xmm1
807 ; SSE41-LABEL: max_ge_v32i8:
809 ; SSE41-NEXT: pmaxsb %xmm2, %xmm0
810 ; SSE41-NEXT: pmaxsb %xmm3, %xmm1
813 ; SSE42-LABEL: max_ge_v32i8:
815 ; SSE42-NEXT: pmaxsb %xmm2, %xmm0
816 ; SSE42-NEXT: pmaxsb %xmm3, %xmm1
819 ; AVX1-LABEL: max_ge_v32i8:
821 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
822 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
823 ; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
824 ; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
825 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
828 ; AVX2-LABEL: max_ge_v32i8:
830 ; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
833 ; AVX512-LABEL: max_ge_v32i8:
835 ; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
837 %1 = icmp sge <32 x i8> %a, %b
838 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
843 ; Signed Minimum (LT)
846 define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
847 ; SSE2-LABEL: min_lt_v2i64:
849 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
850 ; SSE2-NEXT: movdqa %xmm0, %xmm3
851 ; SSE2-NEXT: pxor %xmm2, %xmm3
852 ; SSE2-NEXT: pxor %xmm1, %xmm2
853 ; SSE2-NEXT: movdqa %xmm2, %xmm4
854 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
855 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
856 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
857 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
858 ; SSE2-NEXT: pand %xmm5, %xmm2
859 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
860 ; SSE2-NEXT: por %xmm2, %xmm3
861 ; SSE2-NEXT: pand %xmm3, %xmm0
862 ; SSE2-NEXT: pandn %xmm1, %xmm3
863 ; SSE2-NEXT: por %xmm3, %xmm0
866 ; SSE41-LABEL: min_lt_v2i64:
868 ; SSE41-NEXT: movdqa %xmm0, %xmm2
869 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
870 ; SSE41-NEXT: movdqa %xmm2, %xmm3
871 ; SSE41-NEXT: pxor %xmm0, %xmm3
872 ; SSE41-NEXT: pxor %xmm1, %xmm0
873 ; SSE41-NEXT: movdqa %xmm0, %xmm4
874 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
875 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
876 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
877 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
878 ; SSE41-NEXT: pand %xmm5, %xmm3
879 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
880 ; SSE41-NEXT: por %xmm3, %xmm0
881 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
882 ; SSE41-NEXT: movapd %xmm1, %xmm0
885 ; SSE42-LABEL: min_lt_v2i64:
887 ; SSE42-NEXT: movdqa %xmm0, %xmm2
888 ; SSE42-NEXT: movdqa %xmm1, %xmm0
889 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
890 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
891 ; SSE42-NEXT: movapd %xmm1, %xmm0
894 ; AVX1-LABEL: min_lt_v2i64:
896 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
897 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
900 ; AVX2-LABEL: min_lt_v2i64:
902 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
903 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
906 ; AVX512-LABEL: min_lt_v2i64:
908 ; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
909 ; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
910 ; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
911 ; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
912 ; AVX512-NEXT: vzeroupper
914 %1 = icmp slt <2 x i64> %a, %b
915 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
919 define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) {
920 ; SSE2-LABEL: min_lt_v4i64:
922 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
923 ; SSE2-NEXT: movdqa %xmm1, %xmm5
924 ; SSE2-NEXT: pxor %xmm4, %xmm5
925 ; SSE2-NEXT: movdqa %xmm3, %xmm6
926 ; SSE2-NEXT: pxor %xmm4, %xmm6
927 ; SSE2-NEXT: movdqa %xmm6, %xmm7
928 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
929 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
930 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
931 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
932 ; SSE2-NEXT: pand %xmm8, %xmm5
933 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
934 ; SSE2-NEXT: por %xmm5, %xmm6
935 ; SSE2-NEXT: movdqa %xmm0, %xmm5
936 ; SSE2-NEXT: pxor %xmm4, %xmm5
937 ; SSE2-NEXT: pxor %xmm2, %xmm4
938 ; SSE2-NEXT: movdqa %xmm4, %xmm7
939 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
940 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
941 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm4
942 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
943 ; SSE2-NEXT: pand %xmm8, %xmm4
944 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
945 ; SSE2-NEXT: por %xmm4, %xmm5
946 ; SSE2-NEXT: pand %xmm5, %xmm0
947 ; SSE2-NEXT: pandn %xmm2, %xmm5
948 ; SSE2-NEXT: por %xmm5, %xmm0
949 ; SSE2-NEXT: pand %xmm6, %xmm1
950 ; SSE2-NEXT: pandn %xmm3, %xmm6
951 ; SSE2-NEXT: por %xmm6, %xmm1
954 ; SSE41-LABEL: min_lt_v4i64:
956 ; SSE41-NEXT: movdqa %xmm0, %xmm8
957 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
958 ; SSE41-NEXT: movdqa %xmm1, %xmm5
959 ; SSE41-NEXT: pxor %xmm0, %xmm5
960 ; SSE41-NEXT: movdqa %xmm3, %xmm6
961 ; SSE41-NEXT: pxor %xmm0, %xmm6
962 ; SSE41-NEXT: movdqa %xmm6, %xmm7
963 ; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
964 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
965 ; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
966 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
967 ; SSE41-NEXT: pand %xmm4, %xmm6
968 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
969 ; SSE41-NEXT: por %xmm6, %xmm5
970 ; SSE41-NEXT: movdqa %xmm8, %xmm4
971 ; SSE41-NEXT: pxor %xmm0, %xmm4
972 ; SSE41-NEXT: pxor %xmm2, %xmm0
973 ; SSE41-NEXT: movdqa %xmm0, %xmm6
974 ; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
975 ; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
976 ; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
977 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
978 ; SSE41-NEXT: pand %xmm7, %xmm4
979 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
980 ; SSE41-NEXT: por %xmm4, %xmm0
981 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2
982 ; SSE41-NEXT: movdqa %xmm5, %xmm0
983 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
984 ; SSE41-NEXT: movapd %xmm2, %xmm0
985 ; SSE41-NEXT: movapd %xmm3, %xmm1
988 ; SSE42-LABEL: min_lt_v4i64:
990 ; SSE42-NEXT: movdqa %xmm0, %xmm4
991 ; SSE42-NEXT: movdqa %xmm3, %xmm5
992 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm5
993 ; SSE42-NEXT: movdqa %xmm2, %xmm0
994 ; SSE42-NEXT: pcmpgtq %xmm4, %xmm0
995 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
996 ; SSE42-NEXT: movdqa %xmm5, %xmm0
997 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
998 ; SSE42-NEXT: movapd %xmm2, %xmm0
999 ; SSE42-NEXT: movapd %xmm3, %xmm1
1002 ; AVX1-LABEL: min_lt_v4i64:
1004 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1005 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1006 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1007 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
1008 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1009 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1012 ; AVX2-LABEL: min_lt_v4i64:
1014 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1015 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1018 ; AVX512-LABEL: min_lt_v4i64:
1020 ; AVX512-NEXT: # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
1021 ; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
1022 ; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1023 ; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
1025 %1 = icmp slt <4 x i64> %a, %b
1026 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
1030 define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) {
1031 ; SSE2-LABEL: min_lt_v4i32:
1033 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1034 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1035 ; SSE2-NEXT: pand %xmm2, %xmm0
1036 ; SSE2-NEXT: pandn %xmm1, %xmm2
1037 ; SSE2-NEXT: por %xmm2, %xmm0
1040 ; SSE41-LABEL: min_lt_v4i32:
1042 ; SSE41-NEXT: pminsd %xmm1, %xmm0
1045 ; SSE42-LABEL: min_lt_v4i32:
1047 ; SSE42-NEXT: pminsd %xmm1, %xmm0
1050 ; AVX-LABEL: min_lt_v4i32:
1052 ; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1054 %1 = icmp slt <4 x i32> %a, %b
1055 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
1059 define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) {
1060 ; SSE2-LABEL: min_lt_v8i32:
1062 ; SSE2-NEXT: movdqa %xmm3, %xmm4
1063 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
1064 ; SSE2-NEXT: movdqa %xmm2, %xmm5
1065 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
1066 ; SSE2-NEXT: pand %xmm5, %xmm0
1067 ; SSE2-NEXT: pandn %xmm2, %xmm5
1068 ; SSE2-NEXT: por %xmm5, %xmm0
1069 ; SSE2-NEXT: pand %xmm4, %xmm1
1070 ; SSE2-NEXT: pandn %xmm3, %xmm4
1071 ; SSE2-NEXT: por %xmm4, %xmm1
1074 ; SSE41-LABEL: min_lt_v8i32:
1076 ; SSE41-NEXT: pminsd %xmm2, %xmm0
1077 ; SSE41-NEXT: pminsd %xmm3, %xmm1
1080 ; SSE42-LABEL: min_lt_v8i32:
1082 ; SSE42-NEXT: pminsd %xmm2, %xmm0
1083 ; SSE42-NEXT: pminsd %xmm3, %xmm1
1086 ; AVX1-LABEL: min_lt_v8i32:
1088 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1089 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1090 ; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
1091 ; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1092 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1095 ; AVX2-LABEL: min_lt_v8i32:
1097 ; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1100 ; AVX512-LABEL: min_lt_v8i32:
1102 ; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1104 %1 = icmp slt <8 x i32> %a, %b
1105 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1109 define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) {
1110 ; SSE-LABEL: min_lt_v8i16:
1112 ; SSE-NEXT: pminsw %xmm1, %xmm0
1115 ; AVX-LABEL: min_lt_v8i16:
1117 ; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1119 %1 = icmp slt <8 x i16> %a, %b
1120 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1124 define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) {
1125 ; SSE-LABEL: min_lt_v16i16:
1127 ; SSE-NEXT: pminsw %xmm2, %xmm0
1128 ; SSE-NEXT: pminsw %xmm3, %xmm1
1131 ; AVX1-LABEL: min_lt_v16i16:
1133 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1134 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1135 ; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
1136 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1137 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1140 ; AVX2-LABEL: min_lt_v16i16:
1142 ; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1145 ; AVX512-LABEL: min_lt_v16i16:
1147 ; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1149 %1 = icmp slt <16 x i16> %a, %b
1150 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1154 define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) {
1155 ; SSE2-LABEL: min_lt_v16i8:
1157 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1158 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm2
1159 ; SSE2-NEXT: pand %xmm2, %xmm0
1160 ; SSE2-NEXT: pandn %xmm1, %xmm2
1161 ; SSE2-NEXT: por %xmm2, %xmm0
1164 ; SSE41-LABEL: min_lt_v16i8:
1166 ; SSE41-NEXT: pminsb %xmm1, %xmm0
1169 ; SSE42-LABEL: min_lt_v16i8:
1171 ; SSE42-NEXT: pminsb %xmm1, %xmm0
1174 ; AVX-LABEL: min_lt_v16i8:
1176 ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1178 %1 = icmp slt <16 x i8> %a, %b
1179 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1183 define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) {
1184 ; SSE2-LABEL: min_lt_v32i8:
1186 ; SSE2-NEXT: movdqa %xmm3, %xmm4
1187 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm4
1188 ; SSE2-NEXT: movdqa %xmm2, %xmm5
1189 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm5
1190 ; SSE2-NEXT: pand %xmm5, %xmm0
1191 ; SSE2-NEXT: pandn %xmm2, %xmm5
1192 ; SSE2-NEXT: por %xmm5, %xmm0
1193 ; SSE2-NEXT: pand %xmm4, %xmm1
1194 ; SSE2-NEXT: pandn %xmm3, %xmm4
1195 ; SSE2-NEXT: por %xmm4, %xmm1
1198 ; SSE41-LABEL: min_lt_v32i8:
1200 ; SSE41-NEXT: pminsb %xmm2, %xmm0
1201 ; SSE41-NEXT: pminsb %xmm3, %xmm1
1204 ; SSE42-LABEL: min_lt_v32i8:
1206 ; SSE42-NEXT: pminsb %xmm2, %xmm0
1207 ; SSE42-NEXT: pminsb %xmm3, %xmm1
1210 ; AVX1-LABEL: min_lt_v32i8:
1212 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1213 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1214 ; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
1215 ; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1216 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1219 ; AVX2-LABEL: min_lt_v32i8:
1221 ; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1224 ; AVX512-LABEL: min_lt_v32i8:
1226 ; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1228 %1 = icmp slt <32 x i8> %a, %b
1229 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1234 ; Signed Minimum (LE)
1237 define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
1238 ; SSE2-LABEL: min_le_v2i64:
1240 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
1241 ; SSE2-NEXT: movdqa %xmm1, %xmm3
1242 ; SSE2-NEXT: pxor %xmm2, %xmm3
1243 ; SSE2-NEXT: pxor %xmm0, %xmm2
1244 ; SSE2-NEXT: movdqa %xmm2, %xmm4
1245 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1246 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1247 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
1248 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1249 ; SSE2-NEXT: pand %xmm5, %xmm2
1250 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1251 ; SSE2-NEXT: por %xmm2, %xmm3
1252 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
1253 ; SSE2-NEXT: pxor %xmm3, %xmm2
1254 ; SSE2-NEXT: pandn %xmm0, %xmm3
1255 ; SSE2-NEXT: pandn %xmm1, %xmm2
1256 ; SSE2-NEXT: por %xmm3, %xmm2
1257 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1260 ; SSE41-LABEL: min_le_v2i64:
1262 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1263 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
1264 ; SSE41-NEXT: movdqa %xmm1, %xmm3
1265 ; SSE41-NEXT: pxor %xmm0, %xmm3
1266 ; SSE41-NEXT: pxor %xmm2, %xmm0
1267 ; SSE41-NEXT: movdqa %xmm0, %xmm4
1268 ; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
1269 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1270 ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
1271 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1272 ; SSE41-NEXT: pand %xmm5, %xmm0
1273 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1274 ; SSE41-NEXT: por %xmm0, %xmm3
1275 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
1276 ; SSE41-NEXT: pxor %xmm3, %xmm0
1277 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
1278 ; SSE41-NEXT: movapd %xmm1, %xmm0
1281 ; SSE42-LABEL: min_le_v2i64:
1283 ; SSE42-NEXT: movdqa %xmm0, %xmm2
1284 ; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
1285 ; SSE42-NEXT: pcmpeqd %xmm3, %xmm3
1286 ; SSE42-NEXT: pxor %xmm3, %xmm0
1287 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
1288 ; SSE42-NEXT: movapd %xmm1, %xmm0
1291 ; AVX1-LABEL: min_le_v2i64:
1293 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
1294 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
1295 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
1296 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1299 ; AVX2-LABEL: min_le_v2i64:
1301 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
1302 ; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
1303 ; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
1304 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1307 ; AVX512-LABEL: min_le_v2i64:
1309 ; AVX512-NEXT: # kill: %xmm1<def> %xmm1<kill> %zmm1<def>
1310 ; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<def>
1311 ; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1312 ; AVX512-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
1313 ; AVX512-NEXT: vzeroupper
1315 %1 = icmp sle <2 x i64> %a, %b
1316 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
1320 define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) {
1321 ; SSE2-LABEL: min_le_v4i64:
1323 ; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
1324 ; SSE2-NEXT: movdqa %xmm3, %xmm4
1325 ; SSE2-NEXT: pxor %xmm7, %xmm4
1326 ; SSE2-NEXT: movdqa %xmm1, %xmm5
1327 ; SSE2-NEXT: pxor %xmm7, %xmm5
1328 ; SSE2-NEXT: movdqa %xmm5, %xmm6
1329 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
1330 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
1331 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
1332 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1333 ; SSE2-NEXT: pand %xmm8, %xmm4
1334 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
1335 ; SSE2-NEXT: por %xmm4, %xmm8
1336 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
1337 ; SSE2-NEXT: movdqa %xmm8, %xmm9
1338 ; SSE2-NEXT: pxor %xmm4, %xmm9
1339 ; SSE2-NEXT: movdqa %xmm2, %xmm6
1340 ; SSE2-NEXT: pxor %xmm7, %xmm6
1341 ; SSE2-NEXT: pxor %xmm0, %xmm7
1342 ; SSE2-NEXT: movdqa %xmm7, %xmm5
1343 ; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
1344 ; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
1345 ; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
1346 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
1347 ; SSE2-NEXT: pand %xmm10, %xmm6
1348 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1349 ; SSE2-NEXT: por %xmm6, %xmm5
1350 ; SSE2-NEXT: pxor %xmm5, %xmm4
1351 ; SSE2-NEXT: pandn %xmm0, %xmm5
1352 ; SSE2-NEXT: pandn %xmm2, %xmm4
1353 ; SSE2-NEXT: por %xmm5, %xmm4
1354 ; SSE2-NEXT: pandn %xmm1, %xmm8
1355 ; SSE2-NEXT: pandn %xmm3, %xmm9
1356 ; SSE2-NEXT: por %xmm8, %xmm9
1357 ; SSE2-NEXT: movdqa %xmm4, %xmm0
1358 ; SSE2-NEXT: movdqa %xmm9, %xmm1
1361 ; SSE41-LABEL: min_le_v4i64:
1363 ; SSE41-NEXT: movdqa %xmm0, %xmm8
1364 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
1365 ; SSE41-NEXT: movdqa %xmm3, %xmm5
1366 ; SSE41-NEXT: pxor %xmm0, %xmm5
1367 ; SSE41-NEXT: movdqa %xmm1, %xmm6
1368 ; SSE41-NEXT: pxor %xmm0, %xmm6
1369 ; SSE41-NEXT: movdqa %xmm6, %xmm7
1370 ; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
1371 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
1372 ; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
1373 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1374 ; SSE41-NEXT: pand %xmm4, %xmm6
1375 ; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1376 ; SSE41-NEXT: por %xmm6, %xmm5
1377 ; SSE41-NEXT: pcmpeqd %xmm9, %xmm9
1378 ; SSE41-NEXT: pxor %xmm9, %xmm5
1379 ; SSE41-NEXT: movdqa %xmm2, %xmm6
1380 ; SSE41-NEXT: pxor %xmm0, %xmm6
1381 ; SSE41-NEXT: pxor %xmm8, %xmm0
1382 ; SSE41-NEXT: movdqa %xmm0, %xmm7
1383 ; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
1384 ; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
1385 ; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
1386 ; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
1387 ; SSE41-NEXT: pand %xmm4, %xmm6
1388 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
1389 ; SSE41-NEXT: por %xmm6, %xmm0
1390 ; SSE41-NEXT: pxor %xmm9, %xmm0
1391 ; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2
1392 ; SSE41-NEXT: movdqa %xmm5, %xmm0
1393 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1394 ; SSE41-NEXT: movapd %xmm2, %xmm0
1395 ; SSE41-NEXT: movapd %xmm3, %xmm1
1398 ; SSE42-LABEL: min_le_v4i64:
1400 ; SSE42-NEXT: movdqa %xmm0, %xmm4
1401 ; SSE42-NEXT: movdqa %xmm1, %xmm5
1402 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm5
1403 ; SSE42-NEXT: pcmpeqd %xmm6, %xmm6
1404 ; SSE42-NEXT: pxor %xmm6, %xmm5
1405 ; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
1406 ; SSE42-NEXT: pxor %xmm6, %xmm0
1407 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1408 ; SSE42-NEXT: movdqa %xmm5, %xmm0
1409 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1410 ; SSE42-NEXT: movapd %xmm2, %xmm0
1411 ; SSE42-NEXT: movapd %xmm3, %xmm1
1414 ; AVX1-LABEL: min_le_v4i64:
1416 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1417 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1418 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1419 ; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
1420 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
1421 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4
1422 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
1423 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1424 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1427 ; AVX2-LABEL: min_le_v4i64:
1429 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
1430 ; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
1431 ; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
1432 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1435 ; AVX512-LABEL: min_le_v4i64:
1437 ; AVX512-NEXT: # kill: %ymm1<def> %ymm1<kill> %zmm1<def>
1438 ; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
1439 ; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1440 ; AVX512-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
1442 %1 = icmp sle <4 x i64> %a, %b
1443 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
1447 define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) {
1448 ; SSE2-LABEL: min_le_v4i32:
1450 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1451 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1452 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
1453 ; SSE2-NEXT: pxor %xmm2, %xmm3
1454 ; SSE2-NEXT: pandn %xmm0, %xmm2
1455 ; SSE2-NEXT: pandn %xmm1, %xmm3
1456 ; SSE2-NEXT: por %xmm3, %xmm2
1457 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1460 ; SSE41-LABEL: min_le_v4i32:
1462 ; SSE41-NEXT: pminsd %xmm1, %xmm0
1465 ; SSE42-LABEL: min_le_v4i32:
1467 ; SSE42-NEXT: pminsd %xmm1, %xmm0
1470 ; AVX-LABEL: min_le_v4i32:
1472 ; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1474 %1 = icmp sle <4 x i32> %a, %b
1475 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
1479 define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) {
1480 ; SSE2-LABEL: min_le_v8i32:
1482 ; SSE2-NEXT: movdqa %xmm1, %xmm6
1483 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
1484 ; SSE2-NEXT: pcmpeqd %xmm7, %xmm7
1485 ; SSE2-NEXT: movdqa %xmm6, %xmm4
1486 ; SSE2-NEXT: pxor %xmm7, %xmm4
1487 ; SSE2-NEXT: movdqa %xmm0, %xmm5
1488 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
1489 ; SSE2-NEXT: pxor %xmm5, %xmm7
1490 ; SSE2-NEXT: pandn %xmm0, %xmm5
1491 ; SSE2-NEXT: pandn %xmm2, %xmm7
1492 ; SSE2-NEXT: por %xmm7, %xmm5
1493 ; SSE2-NEXT: pandn %xmm1, %xmm6
1494 ; SSE2-NEXT: pandn %xmm3, %xmm4
1495 ; SSE2-NEXT: por %xmm6, %xmm4
1496 ; SSE2-NEXT: movdqa %xmm5, %xmm0
1497 ; SSE2-NEXT: movdqa %xmm4, %xmm1
1500 ; SSE41-LABEL: min_le_v8i32:
1502 ; SSE41-NEXT: pminsd %xmm2, %xmm0
1503 ; SSE41-NEXT: pminsd %xmm3, %xmm1
1506 ; SSE42-LABEL: min_le_v8i32:
1508 ; SSE42-NEXT: pminsd %xmm2, %xmm0
1509 ; SSE42-NEXT: pminsd %xmm3, %xmm1
1512 ; AVX1-LABEL: min_le_v8i32:
1514 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1515 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1516 ; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
1517 ; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1518 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1521 ; AVX2-LABEL: min_le_v8i32:
1523 ; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1526 ; AVX512-LABEL: min_le_v8i32:
1528 ; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1530 %1 = icmp sle <8 x i32> %a, %b
1531 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1535 define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) {
1536 ; SSE-LABEL: min_le_v8i16:
1538 ; SSE-NEXT: pminsw %xmm1, %xmm0
1541 ; AVX-LABEL: min_le_v8i16:
1543 ; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1545 %1 = icmp sle <8 x i16> %a, %b
1546 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1550 define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) {
1551 ; SSE-LABEL: min_le_v16i16:
1553 ; SSE-NEXT: pminsw %xmm2, %xmm0
1554 ; SSE-NEXT: pminsw %xmm3, %xmm1
1557 ; AVX1-LABEL: min_le_v16i16:
1559 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1560 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1561 ; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
1562 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1563 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1566 ; AVX2-LABEL: min_le_v16i16:
1568 ; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1571 ; AVX512-LABEL: min_le_v16i16:
1573 ; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1575 %1 = icmp sle <16 x i16> %a, %b
1576 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1580 define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) {
1581 ; SSE2-LABEL: min_le_v16i8:
1583 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1584 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1585 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
1586 ; SSE2-NEXT: pxor %xmm2, %xmm3
1587 ; SSE2-NEXT: pandn %xmm0, %xmm2
1588 ; SSE2-NEXT: pandn %xmm1, %xmm3
1589 ; SSE2-NEXT: por %xmm3, %xmm2
1590 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1593 ; SSE41-LABEL: min_le_v16i8:
1595 ; SSE41-NEXT: pminsb %xmm1, %xmm0
1598 ; SSE42-LABEL: min_le_v16i8:
1600 ; SSE42-NEXT: pminsb %xmm1, %xmm0
1603 ; AVX-LABEL: min_le_v16i8:
1605 ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1607 %1 = icmp sle <16 x i8> %a, %b
1608 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1612 define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) {
1613 ; SSE2-LABEL: min_le_v32i8:
1615 ; SSE2-NEXT: movdqa %xmm1, %xmm6
1616 ; SSE2-NEXT: pcmpgtb %xmm3, %xmm6
1617 ; SSE2-NEXT: pcmpeqd %xmm7, %xmm7
1618 ; SSE2-NEXT: movdqa %xmm6, %xmm4
1619 ; SSE2-NEXT: pxor %xmm7, %xmm4
1620 ; SSE2-NEXT: movdqa %xmm0, %xmm5
1621 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm5
1622 ; SSE2-NEXT: pxor %xmm5, %xmm7
1623 ; SSE2-NEXT: pandn %xmm0, %xmm5
1624 ; SSE2-NEXT: pandn %xmm2, %xmm7
1625 ; SSE2-NEXT: por %xmm7, %xmm5
1626 ; SSE2-NEXT: pandn %xmm1, %xmm6
1627 ; SSE2-NEXT: pandn %xmm3, %xmm4
1628 ; SSE2-NEXT: por %xmm6, %xmm4
1629 ; SSE2-NEXT: movdqa %xmm5, %xmm0
1630 ; SSE2-NEXT: movdqa %xmm4, %xmm1
1633 ; SSE41-LABEL: min_le_v32i8:
1635 ; SSE41-NEXT: pminsb %xmm2, %xmm0
1636 ; SSE41-NEXT: pminsb %xmm3, %xmm1
1639 ; SSE42-LABEL: min_le_v32i8:
1641 ; SSE42-NEXT: pminsb %xmm2, %xmm0
1642 ; SSE42-NEXT: pminsb %xmm3, %xmm1
1645 ; AVX1-LABEL: min_le_v32i8:
1647 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1648 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1649 ; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
1650 ; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1651 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1654 ; AVX2-LABEL: min_le_v32i8:
1656 ; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1659 ; AVX512-LABEL: min_le_v32i8:
1661 ; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1663 %1 = icmp sle <32 x i8> %a, %b
1664 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1672 define <2 x i64> @max_gt_v2i64c() {
1673 ; SSE-LABEL: max_gt_v2i64c:
1675 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7]
1678 ; AVX-LABEL: max_gt_v2i64c:
1680 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1682 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1683 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1684 %3 = icmp sgt <2 x i64> %1, %2
1685 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1689 define <4 x i64> @max_gt_v4i64c() {
1690 ; SSE-LABEL: max_gt_v4i64c:
1692 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7]
1693 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0
1696 ; AVX-LABEL: max_gt_v4i64c:
1698 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1700 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1701 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1702 %3 = icmp sgt <4 x i64> %1, %2
1703 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1707 define <4 x i32> @max_gt_v4i32c() {
1708 ; SSE-LABEL: max_gt_v4i32c:
1710 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1713 ; AVX-LABEL: max_gt_v4i32c:
1715 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1717 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1718 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1719 %3 = icmp sgt <4 x i32> %1, %2
1720 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1724 define <8 x i32> @max_gt_v8i32c() {
1725 ; SSE-LABEL: max_gt_v8i32c:
1727 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
1728 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
1731 ; AVX-LABEL: max_gt_v8i32c:
1733 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1735 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1736 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1737 %3 = icmp sgt <8 x i32> %1, %2
1738 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1742 define <8 x i16> @max_gt_v8i16c() {
1743 ; SSE-LABEL: max_gt_v8i16c:
1745 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1748 ; AVX-LABEL: max_gt_v8i16c:
1750 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1752 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1753 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1754 %3 = icmp sgt <8 x i16> %1, %2
1755 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1759 define <16 x i16> @max_gt_v16i16c() {
1760 ; SSE-LABEL: max_gt_v16i16c:
1762 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
1763 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
1766 ; AVX-LABEL: max_gt_v16i16c:
1768 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
1770 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1771 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
1772 %3 = icmp sgt <16 x i16> %1, %2
1773 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1777 define <16 x i8> @max_gt_v16i8c() {
1778 ; SSE-LABEL: max_gt_v16i8c:
1780 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1783 ; AVX-LABEL: max_gt_v16i8c:
1785 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1787 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
1788 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
1789 %3 = icmp sgt <16 x i8> %1, %2
1790 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1794 define <2 x i64> @max_ge_v2i64c() {
1795 ; SSE-LABEL: max_ge_v2i64c:
1797 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7]
1800 ; AVX-LABEL: max_ge_v2i64c:
1802 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1804 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1805 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1806 %3 = icmp sge <2 x i64> %1, %2
1807 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1811 define <4 x i64> @max_ge_v4i64c() {
1812 ; SSE-LABEL: max_ge_v4i64c:
1814 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7]
1815 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0
1818 ; AVX-LABEL: max_ge_v4i64c:
1820 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1822 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1823 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1824 %3 = icmp sge <4 x i64> %1, %2
1825 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1829 define <4 x i32> @max_ge_v4i32c() {
1830 ; SSE-LABEL: max_ge_v4i32c:
1832 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1835 ; AVX-LABEL: max_ge_v4i32c:
1837 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1839 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1840 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1841 %3 = icmp sge <4 x i32> %1, %2
1842 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1846 define <8 x i32> @max_ge_v8i32c() {
1847 ; SSE-LABEL: max_ge_v8i32c:
1849 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
1850 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
1853 ; AVX-LABEL: max_ge_v8i32c:
1855 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1857 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1858 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1859 %3 = icmp sge <8 x i32> %1, %2
1860 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1864 define <8 x i16> @max_ge_v8i16c() {
1865 ; SSE-LABEL: max_ge_v8i16c:
1867 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1870 ; AVX-LABEL: max_ge_v8i16c:
1872 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1874 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1875 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1876 %3 = icmp sge <8 x i16> %1, %2
1877 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1881 define <16 x i16> @max_ge_v16i16c() {
1882 ; SSE-LABEL: max_ge_v16i16c:
1884 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
1885 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
1888 ; AVX-LABEL: max_ge_v16i16c:
1890 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
1892 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1893 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
1894 %3 = icmp sge <16 x i16> %1, %2
1895 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1899 define <16 x i8> @max_ge_v16i8c() {
1900 ; SSE-LABEL: max_ge_v16i8c:
1902 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1905 ; AVX-LABEL: max_ge_v16i8c:
1907 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1909 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
1910 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
1911 %3 = icmp sge <16 x i8> %1, %2
1912 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1916 define <2 x i64> @min_lt_v2i64c() {
1917 ; SSE-LABEL: min_lt_v2i64c:
1919 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1]
1922 ; AVX-LABEL: min_lt_v2i64c:
1924 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
1926 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1927 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1928 %3 = icmp slt <2 x i64> %1, %2
1929 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1933 define <4 x i64> @min_lt_v4i64c() {
1934 ; SSE-LABEL: min_lt_v4i64c:
1936 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
1937 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1]
1940 ; AVX-LABEL: min_lt_v4i64c:
1942 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
1944 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1945 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1946 %3 = icmp slt <4 x i64> %1, %2
1947 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1951 define <4 x i32> @min_lt_v4i32c() {
1952 ; SSE-LABEL: min_lt_v4i32c:
1954 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
1957 ; AVX-LABEL: min_lt_v4i32c:
1959 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
1961 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1962 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1963 %3 = icmp slt <4 x i32> %1, %2
1964 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1968 define <8 x i32> @min_lt_v8i32c() {
1969 ; SSE-LABEL: min_lt_v8i32c:
1971 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
1972 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
1975 ; AVX-LABEL: min_lt_v8i32c:
1977 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
1979 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1980 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1981 %3 = icmp slt <8 x i32> %1, %2
1982 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1986 define <8 x i16> @min_lt_v8i16c() {
1987 ; SSE-LABEL: min_lt_v8i16c:
1989 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
1992 ; AVX-LABEL: min_lt_v8i16c:
1994 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
1996 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1997 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1998 %3 = icmp slt <8 x i16> %1, %2
1999 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2003 define <16 x i16> @min_lt_v16i16c() {
2004 ; SSE-LABEL: min_lt_v16i16c:
2006 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
2007 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
2010 ; AVX-LABEL: min_lt_v16i16c:
2012 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
2014 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
2015 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
2016 %3 = icmp slt <16 x i16> %1, %2
2017 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2021 define <16 x i8> @min_lt_v16i8c() {
2022 ; SSE-LABEL: min_lt_v16i8c:
2024 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2027 ; AVX-LABEL: min_lt_v16i8c:
2029 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2031 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
2032 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
2033 %3 = icmp slt <16 x i8> %1, %2
2034 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2038 define <2 x i64> @min_le_v2i64c() {
2039 ; SSE-LABEL: min_le_v2i64c:
2041 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1]
2044 ; AVX-LABEL: min_le_v2i64c:
2046 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
2048 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
2049 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
2050 %3 = icmp sle <2 x i64> %1, %2
2051 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
2055 define <4 x i64> @min_le_v4i64c() {
2056 ; SSE-LABEL: min_le_v4i64c:
2058 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
2059 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1]
2062 ; AVX-LABEL: min_le_v4i64c:
2064 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
2066 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
2067 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
2068 %3 = icmp sle <4 x i64> %1, %2
2069 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
2073 define <4 x i32> @min_le_v4i32c() {
2074 ; SSE-LABEL: min_le_v4i32c:
2076 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2079 ; AVX-LABEL: min_le_v4i32c:
2081 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2083 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
2084 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
2085 %3 = icmp sle <4 x i32> %1, %2
2086 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
2090 define <8 x i32> @min_le_v8i32c() {
2091 ; SSE-LABEL: min_le_v8i32c:
2093 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
2094 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
2097 ; AVX-LABEL: min_le_v8i32c:
2099 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
2101 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
2102 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
2103 %3 = icmp sle <8 x i32> %1, %2
2104 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2108 define <8 x i16> @min_le_v8i16c() {
2109 ; SSE-LABEL: min_le_v8i16c:
2111 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
2114 ; AVX-LABEL: min_le_v8i16c:
2116 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
2118 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
2119 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
2120 %3 = icmp sle <8 x i16> %1, %2
2121 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2125 define <16 x i16> @min_le_v16i16c() {
2126 ; SSE-LABEL: min_le_v16i16c:
2128 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
2129 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
2132 ; AVX-LABEL: min_le_v16i16c:
2134 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
2136 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
2137 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
2138 %3 = icmp sle <16 x i16> %1, %2
2139 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2143 define <16 x i8> @min_le_v16i8c() {
2144 ; SSE-LABEL: min_le_v16i8c:
2146 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2149 ; AVX-LABEL: min_le_v16i8c:
2151 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2153 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
2154 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
2155 %3 = icmp sle <16 x i8> %1, %2
2156 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2