1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
11 ; Unsigned Maximum (GT)
14 define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
15 ; SSE2-LABEL: max_gt_v2i64:
17 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
18 ; SSE2-NEXT: movdqa %xmm1, %xmm3
19 ; SSE2-NEXT: pxor %xmm2, %xmm3
20 ; SSE2-NEXT: pxor %xmm0, %xmm2
21 ; SSE2-NEXT: movdqa %xmm2, %xmm4
22 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
23 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
24 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
25 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
26 ; SSE2-NEXT: pand %xmm5, %xmm2
27 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
28 ; SSE2-NEXT: por %xmm2, %xmm3
29 ; SSE2-NEXT: pand %xmm3, %xmm0
30 ; SSE2-NEXT: pandn %xmm1, %xmm3
31 ; SSE2-NEXT: por %xmm3, %xmm0
34 ; SSE41-LABEL: max_gt_v2i64:
36 ; SSE41-NEXT: movdqa %xmm0, %xmm2
37 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
38 ; SSE41-NEXT: movdqa %xmm1, %xmm0
39 ; SSE41-NEXT: pxor %xmm3, %xmm0
40 ; SSE41-NEXT: pxor %xmm2, %xmm3
41 ; SSE41-NEXT: movdqa %xmm3, %xmm4
42 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
43 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
44 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
45 ; SSE41-NEXT: pand %xmm4, %xmm0
46 ; SSE41-NEXT: por %xmm3, %xmm0
47 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
48 ; SSE41-NEXT: movapd %xmm1, %xmm0
51 ; SSE42-LABEL: max_gt_v2i64:
53 ; SSE42-NEXT: movdqa %xmm0, %xmm2
54 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
55 ; SSE42-NEXT: movdqa %xmm1, %xmm3
56 ; SSE42-NEXT: pxor %xmm0, %xmm3
57 ; SSE42-NEXT: pxor %xmm2, %xmm0
58 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
59 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
60 ; SSE42-NEXT: movapd %xmm1, %xmm0
63 ; AVX1-LABEL: max_gt_v2i64:
65 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
66 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
67 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
68 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
69 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
72 ; AVX2-LABEL: max_gt_v2i64:
74 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
75 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
76 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
77 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
78 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
81 ; AVX512-LABEL: max_gt_v2i64:
83 ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
84 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
85 ; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
86 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
87 ; AVX512-NEXT: vzeroupper
89 %1 = icmp ugt <2 x i64> %a, %b
90 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
94 define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
95 ; SSE2-LABEL: max_gt_v4i64:
97 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
98 ; SSE2-NEXT: movdqa %xmm2, %xmm5
99 ; SSE2-NEXT: pxor %xmm4, %xmm5
100 ; SSE2-NEXT: movdqa %xmm0, %xmm6
101 ; SSE2-NEXT: pxor %xmm4, %xmm6
102 ; SSE2-NEXT: movdqa %xmm6, %xmm7
103 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
104 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
105 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
106 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
107 ; SSE2-NEXT: pand %xmm8, %xmm5
108 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
109 ; SSE2-NEXT: por %xmm5, %xmm6
110 ; SSE2-NEXT: pand %xmm6, %xmm0
111 ; SSE2-NEXT: pandn %xmm2, %xmm6
112 ; SSE2-NEXT: por %xmm6, %xmm0
113 ; SSE2-NEXT: movdqa %xmm3, %xmm2
114 ; SSE2-NEXT: pxor %xmm4, %xmm2
115 ; SSE2-NEXT: pxor %xmm1, %xmm4
116 ; SSE2-NEXT: movdqa %xmm4, %xmm5
117 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
118 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
119 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm4
120 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
121 ; SSE2-NEXT: pand %xmm6, %xmm2
122 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
123 ; SSE2-NEXT: por %xmm2, %xmm4
124 ; SSE2-NEXT: pand %xmm4, %xmm1
125 ; SSE2-NEXT: pandn %xmm3, %xmm4
126 ; SSE2-NEXT: por %xmm4, %xmm1
129 ; SSE41-LABEL: max_gt_v4i64:
131 ; SSE41-NEXT: movdqa %xmm0, %xmm4
132 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
133 ; SSE41-NEXT: movdqa %xmm2, %xmm0
134 ; SSE41-NEXT: pxor %xmm5, %xmm0
135 ; SSE41-NEXT: movdqa %xmm4, %xmm6
136 ; SSE41-NEXT: pxor %xmm5, %xmm6
137 ; SSE41-NEXT: movdqa %xmm6, %xmm7
138 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
139 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
140 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
141 ; SSE41-NEXT: pand %xmm7, %xmm0
142 ; SSE41-NEXT: por %xmm6, %xmm0
143 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
144 ; SSE41-NEXT: movdqa %xmm3, %xmm0
145 ; SSE41-NEXT: pxor %xmm5, %xmm0
146 ; SSE41-NEXT: pxor %xmm1, %xmm5
147 ; SSE41-NEXT: movdqa %xmm5, %xmm4
148 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
149 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
150 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
151 ; SSE41-NEXT: pand %xmm4, %xmm0
152 ; SSE41-NEXT: por %xmm5, %xmm0
153 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
154 ; SSE41-NEXT: movapd %xmm2, %xmm0
155 ; SSE41-NEXT: movapd %xmm3, %xmm1
158 ; SSE42-LABEL: max_gt_v4i64:
160 ; SSE42-NEXT: movdqa %xmm0, %xmm4
161 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
162 ; SSE42-NEXT: movdqa %xmm2, %xmm6
163 ; SSE42-NEXT: pxor %xmm5, %xmm6
164 ; SSE42-NEXT: pxor %xmm5, %xmm0
165 ; SSE42-NEXT: pcmpgtq %xmm6, %xmm0
166 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
167 ; SSE42-NEXT: movdqa %xmm3, %xmm0
168 ; SSE42-NEXT: pxor %xmm5, %xmm0
169 ; SSE42-NEXT: pxor %xmm1, %xmm5
170 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm5
171 ; SSE42-NEXT: movdqa %xmm5, %xmm0
172 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
173 ; SSE42-NEXT: movapd %xmm2, %xmm0
174 ; SSE42-NEXT: movapd %xmm3, %xmm1
177 ; AVX1-LABEL: max_gt_v4i64:
179 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
180 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
181 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
182 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
183 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4
184 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
185 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4
186 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3
187 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
188 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
189 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
192 ; AVX2-LABEL: max_gt_v4i64:
194 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
195 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
196 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
197 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
198 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
201 ; AVX512-LABEL: max_gt_v4i64:
203 ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
204 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
205 ; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
206 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
208 %1 = icmp ugt <4 x i64> %a, %b
209 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
213 define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) {
214 ; SSE2-LABEL: max_gt_v4i32:
216 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
217 ; SSE2-NEXT: movdqa %xmm1, %xmm3
218 ; SSE2-NEXT: pxor %xmm2, %xmm3
219 ; SSE2-NEXT: pxor %xmm0, %xmm2
220 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
221 ; SSE2-NEXT: pand %xmm2, %xmm0
222 ; SSE2-NEXT: pandn %xmm1, %xmm2
223 ; SSE2-NEXT: por %xmm2, %xmm0
226 ; SSE41-LABEL: max_gt_v4i32:
228 ; SSE41-NEXT: pmaxud %xmm1, %xmm0
231 ; SSE42-LABEL: max_gt_v4i32:
233 ; SSE42-NEXT: pmaxud %xmm1, %xmm0
236 ; AVX-LABEL: max_gt_v4i32:
238 ; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
240 %1 = icmp ugt <4 x i32> %a, %b
241 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
245 define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) {
246 ; SSE2-LABEL: max_gt_v8i32:
248 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
249 ; SSE2-NEXT: movdqa %xmm2, %xmm5
250 ; SSE2-NEXT: pxor %xmm4, %xmm5
251 ; SSE2-NEXT: movdqa %xmm0, %xmm6
252 ; SSE2-NEXT: pxor %xmm4, %xmm6
253 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
254 ; SSE2-NEXT: pand %xmm6, %xmm0
255 ; SSE2-NEXT: pandn %xmm2, %xmm6
256 ; SSE2-NEXT: por %xmm6, %xmm0
257 ; SSE2-NEXT: movdqa %xmm3, %xmm2
258 ; SSE2-NEXT: pxor %xmm4, %xmm2
259 ; SSE2-NEXT: pxor %xmm1, %xmm4
260 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
261 ; SSE2-NEXT: pand %xmm4, %xmm1
262 ; SSE2-NEXT: pandn %xmm3, %xmm4
263 ; SSE2-NEXT: por %xmm4, %xmm1
266 ; SSE41-LABEL: max_gt_v8i32:
268 ; SSE41-NEXT: pmaxud %xmm2, %xmm0
269 ; SSE41-NEXT: pmaxud %xmm3, %xmm1
272 ; SSE42-LABEL: max_gt_v8i32:
274 ; SSE42-NEXT: pmaxud %xmm2, %xmm0
275 ; SSE42-NEXT: pmaxud %xmm3, %xmm1
278 ; AVX1-LABEL: max_gt_v8i32:
280 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
281 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
282 ; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
283 ; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
284 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
287 ; AVX2-LABEL: max_gt_v8i32:
289 ; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
292 ; AVX512-LABEL: max_gt_v8i32:
294 ; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
296 %1 = icmp ugt <8 x i32> %a, %b
297 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
301 define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) {
302 ; SSE2-LABEL: max_gt_v8i16:
304 ; SSE2-NEXT: psubusw %xmm0, %xmm1
305 ; SSE2-NEXT: paddw %xmm1, %xmm0
308 ; SSE41-LABEL: max_gt_v8i16:
310 ; SSE41-NEXT: pmaxuw %xmm1, %xmm0
313 ; SSE42-LABEL: max_gt_v8i16:
315 ; SSE42-NEXT: pmaxuw %xmm1, %xmm0
318 ; AVX-LABEL: max_gt_v8i16:
320 ; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
322 %1 = icmp ugt <8 x i16> %a, %b
323 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
327 define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) {
328 ; SSE2-LABEL: max_gt_v16i16:
330 ; SSE2-NEXT: psubusw %xmm0, %xmm2
331 ; SSE2-NEXT: paddw %xmm2, %xmm0
332 ; SSE2-NEXT: psubusw %xmm1, %xmm3
333 ; SSE2-NEXT: paddw %xmm3, %xmm1
336 ; SSE41-LABEL: max_gt_v16i16:
338 ; SSE41-NEXT: pmaxuw %xmm2, %xmm0
339 ; SSE41-NEXT: pmaxuw %xmm3, %xmm1
342 ; SSE42-LABEL: max_gt_v16i16:
344 ; SSE42-NEXT: pmaxuw %xmm2, %xmm0
345 ; SSE42-NEXT: pmaxuw %xmm3, %xmm1
348 ; AVX1-LABEL: max_gt_v16i16:
350 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
351 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
352 ; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
353 ; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
354 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
357 ; AVX2-LABEL: max_gt_v16i16:
359 ; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
362 ; AVX512-LABEL: max_gt_v16i16:
364 ; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
366 %1 = icmp ugt <16 x i16> %a, %b
367 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
371 define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) {
372 ; SSE-LABEL: max_gt_v16i8:
374 ; SSE-NEXT: pmaxub %xmm1, %xmm0
377 ; AVX-LABEL: max_gt_v16i8:
379 ; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
381 %1 = icmp ugt <16 x i8> %a, %b
382 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
386 define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) {
387 ; SSE-LABEL: max_gt_v32i8:
389 ; SSE-NEXT: pmaxub %xmm2, %xmm0
390 ; SSE-NEXT: pmaxub %xmm3, %xmm1
393 ; AVX1-LABEL: max_gt_v32i8:
395 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
396 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
397 ; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
398 ; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
399 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
402 ; AVX2-LABEL: max_gt_v32i8:
404 ; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
407 ; AVX512-LABEL: max_gt_v32i8:
409 ; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
411 %1 = icmp ugt <32 x i8> %a, %b
412 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
417 ; Unsigned Maximum (GE)
420 define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
421 ; SSE2-LABEL: max_ge_v2i64:
423 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
424 ; SSE2-NEXT: movdqa %xmm1, %xmm3
425 ; SSE2-NEXT: pxor %xmm2, %xmm3
426 ; SSE2-NEXT: pxor %xmm0, %xmm2
427 ; SSE2-NEXT: movdqa %xmm2, %xmm4
428 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
429 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
430 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
431 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
432 ; SSE2-NEXT: pand %xmm5, %xmm2
433 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
434 ; SSE2-NEXT: por %xmm2, %xmm3
435 ; SSE2-NEXT: pand %xmm3, %xmm0
436 ; SSE2-NEXT: pandn %xmm1, %xmm3
437 ; SSE2-NEXT: por %xmm3, %xmm0
440 ; SSE41-LABEL: max_ge_v2i64:
442 ; SSE41-NEXT: movdqa %xmm0, %xmm2
443 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
444 ; SSE41-NEXT: movdqa %xmm1, %xmm0
445 ; SSE41-NEXT: pxor %xmm3, %xmm0
446 ; SSE41-NEXT: pxor %xmm2, %xmm3
447 ; SSE41-NEXT: movdqa %xmm3, %xmm4
448 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
449 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
450 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
451 ; SSE41-NEXT: pand %xmm4, %xmm0
452 ; SSE41-NEXT: por %xmm3, %xmm0
453 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
454 ; SSE41-NEXT: movapd %xmm1, %xmm0
457 ; SSE42-LABEL: max_ge_v2i64:
459 ; SSE42-NEXT: movdqa %xmm0, %xmm2
460 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
461 ; SSE42-NEXT: movdqa %xmm1, %xmm3
462 ; SSE42-NEXT: pxor %xmm0, %xmm3
463 ; SSE42-NEXT: pxor %xmm2, %xmm0
464 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
465 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
466 ; SSE42-NEXT: movapd %xmm1, %xmm0
469 ; AVX1-LABEL: max_ge_v2i64:
471 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
472 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
473 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
474 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
475 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
478 ; AVX2-LABEL: max_ge_v2i64:
480 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
481 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
482 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
483 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
484 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
487 ; AVX512-LABEL: max_ge_v2i64:
489 ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
490 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
491 ; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
492 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
493 ; AVX512-NEXT: vzeroupper
495 %1 = icmp uge <2 x i64> %a, %b
496 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
500 define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
501 ; SSE2-LABEL: max_ge_v4i64:
503 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
504 ; SSE2-NEXT: movdqa %xmm2, %xmm5
505 ; SSE2-NEXT: pxor %xmm4, %xmm5
506 ; SSE2-NEXT: movdqa %xmm0, %xmm6
507 ; SSE2-NEXT: pxor %xmm4, %xmm6
508 ; SSE2-NEXT: movdqa %xmm6, %xmm7
509 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
510 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
511 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
512 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
513 ; SSE2-NEXT: pand %xmm8, %xmm5
514 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
515 ; SSE2-NEXT: por %xmm5, %xmm6
516 ; SSE2-NEXT: pand %xmm6, %xmm0
517 ; SSE2-NEXT: pandn %xmm2, %xmm6
518 ; SSE2-NEXT: por %xmm6, %xmm0
519 ; SSE2-NEXT: movdqa %xmm3, %xmm2
520 ; SSE2-NEXT: pxor %xmm4, %xmm2
521 ; SSE2-NEXT: pxor %xmm1, %xmm4
522 ; SSE2-NEXT: movdqa %xmm4, %xmm5
523 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
524 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
525 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm4
526 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
527 ; SSE2-NEXT: pand %xmm6, %xmm2
528 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
529 ; SSE2-NEXT: por %xmm2, %xmm4
530 ; SSE2-NEXT: pand %xmm4, %xmm1
531 ; SSE2-NEXT: pandn %xmm3, %xmm4
532 ; SSE2-NEXT: por %xmm4, %xmm1
535 ; SSE41-LABEL: max_ge_v4i64:
537 ; SSE41-NEXT: movdqa %xmm0, %xmm4
538 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
539 ; SSE41-NEXT: movdqa %xmm2, %xmm0
540 ; SSE41-NEXT: pxor %xmm5, %xmm0
541 ; SSE41-NEXT: movdqa %xmm4, %xmm6
542 ; SSE41-NEXT: pxor %xmm5, %xmm6
543 ; SSE41-NEXT: movdqa %xmm6, %xmm7
544 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
545 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
546 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
547 ; SSE41-NEXT: pand %xmm7, %xmm0
548 ; SSE41-NEXT: por %xmm6, %xmm0
549 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
550 ; SSE41-NEXT: movdqa %xmm3, %xmm0
551 ; SSE41-NEXT: pxor %xmm5, %xmm0
552 ; SSE41-NEXT: pxor %xmm1, %xmm5
553 ; SSE41-NEXT: movdqa %xmm5, %xmm4
554 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
555 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
556 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
557 ; SSE41-NEXT: pand %xmm4, %xmm0
558 ; SSE41-NEXT: por %xmm5, %xmm0
559 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
560 ; SSE41-NEXT: movapd %xmm2, %xmm0
561 ; SSE41-NEXT: movapd %xmm3, %xmm1
564 ; SSE42-LABEL: max_ge_v4i64:
566 ; SSE42-NEXT: movdqa %xmm0, %xmm4
567 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
568 ; SSE42-NEXT: movdqa %xmm2, %xmm6
569 ; SSE42-NEXT: pxor %xmm5, %xmm6
570 ; SSE42-NEXT: pxor %xmm5, %xmm0
571 ; SSE42-NEXT: pcmpgtq %xmm6, %xmm0
572 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
573 ; SSE42-NEXT: movdqa %xmm3, %xmm0
574 ; SSE42-NEXT: pxor %xmm5, %xmm0
575 ; SSE42-NEXT: pxor %xmm1, %xmm5
576 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm5
577 ; SSE42-NEXT: movdqa %xmm5, %xmm0
578 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
579 ; SSE42-NEXT: movapd %xmm2, %xmm0
580 ; SSE42-NEXT: movapd %xmm3, %xmm1
583 ; AVX1-LABEL: max_ge_v4i64:
585 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
586 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
587 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
588 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
589 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4
590 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
591 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4
592 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3
593 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
594 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
595 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
598 ; AVX2-LABEL: max_ge_v4i64:
600 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
601 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
602 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
603 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
604 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
607 ; AVX512-LABEL: max_ge_v4i64:
609 ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
610 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
611 ; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
612 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
614 %1 = icmp uge <4 x i64> %a, %b
615 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
619 define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) {
620 ; SSE2-LABEL: max_ge_v4i32:
622 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
623 ; SSE2-NEXT: movdqa %xmm1, %xmm3
624 ; SSE2-NEXT: pxor %xmm2, %xmm3
625 ; SSE2-NEXT: pxor %xmm0, %xmm2
626 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
627 ; SSE2-NEXT: pand %xmm2, %xmm0
628 ; SSE2-NEXT: pandn %xmm1, %xmm2
629 ; SSE2-NEXT: por %xmm2, %xmm0
632 ; SSE41-LABEL: max_ge_v4i32:
634 ; SSE41-NEXT: pmaxud %xmm1, %xmm0
637 ; SSE42-LABEL: max_ge_v4i32:
639 ; SSE42-NEXT: pmaxud %xmm1, %xmm0
642 ; AVX-LABEL: max_ge_v4i32:
644 ; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
646 %1 = icmp uge <4 x i32> %a, %b
647 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
651 define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) {
652 ; SSE2-LABEL: max_ge_v8i32:
654 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
655 ; SSE2-NEXT: movdqa %xmm2, %xmm5
656 ; SSE2-NEXT: pxor %xmm4, %xmm5
657 ; SSE2-NEXT: movdqa %xmm0, %xmm6
658 ; SSE2-NEXT: pxor %xmm4, %xmm6
659 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
660 ; SSE2-NEXT: pand %xmm6, %xmm0
661 ; SSE2-NEXT: pandn %xmm2, %xmm6
662 ; SSE2-NEXT: por %xmm6, %xmm0
663 ; SSE2-NEXT: movdqa %xmm3, %xmm2
664 ; SSE2-NEXT: pxor %xmm4, %xmm2
665 ; SSE2-NEXT: pxor %xmm1, %xmm4
666 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
667 ; SSE2-NEXT: pand %xmm4, %xmm1
668 ; SSE2-NEXT: pandn %xmm3, %xmm4
669 ; SSE2-NEXT: por %xmm4, %xmm1
672 ; SSE41-LABEL: max_ge_v8i32:
674 ; SSE41-NEXT: pmaxud %xmm2, %xmm0
675 ; SSE41-NEXT: pmaxud %xmm3, %xmm1
678 ; SSE42-LABEL: max_ge_v8i32:
680 ; SSE42-NEXT: pmaxud %xmm2, %xmm0
681 ; SSE42-NEXT: pmaxud %xmm3, %xmm1
684 ; AVX1-LABEL: max_ge_v8i32:
686 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
687 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
688 ; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
689 ; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
690 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
693 ; AVX2-LABEL: max_ge_v8i32:
695 ; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
698 ; AVX512-LABEL: max_ge_v8i32:
700 ; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
702 %1 = icmp uge <8 x i32> %a, %b
703 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
707 define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) {
708 ; SSE2-LABEL: max_ge_v8i16:
710 ; SSE2-NEXT: psubusw %xmm0, %xmm1
711 ; SSE2-NEXT: paddw %xmm1, %xmm0
714 ; SSE41-LABEL: max_ge_v8i16:
716 ; SSE41-NEXT: pmaxuw %xmm1, %xmm0
719 ; SSE42-LABEL: max_ge_v8i16:
721 ; SSE42-NEXT: pmaxuw %xmm1, %xmm0
724 ; AVX-LABEL: max_ge_v8i16:
726 ; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
728 %1 = icmp uge <8 x i16> %a, %b
729 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
733 define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) {
734 ; SSE2-LABEL: max_ge_v16i16:
736 ; SSE2-NEXT: psubusw %xmm0, %xmm2
737 ; SSE2-NEXT: paddw %xmm2, %xmm0
738 ; SSE2-NEXT: psubusw %xmm1, %xmm3
739 ; SSE2-NEXT: paddw %xmm3, %xmm1
742 ; SSE41-LABEL: max_ge_v16i16:
744 ; SSE41-NEXT: pmaxuw %xmm2, %xmm0
745 ; SSE41-NEXT: pmaxuw %xmm3, %xmm1
748 ; SSE42-LABEL: max_ge_v16i16:
750 ; SSE42-NEXT: pmaxuw %xmm2, %xmm0
751 ; SSE42-NEXT: pmaxuw %xmm3, %xmm1
754 ; AVX1-LABEL: max_ge_v16i16:
756 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
757 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
758 ; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
759 ; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
760 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
763 ; AVX2-LABEL: max_ge_v16i16:
765 ; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
768 ; AVX512-LABEL: max_ge_v16i16:
770 ; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
772 %1 = icmp uge <16 x i16> %a, %b
773 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
777 define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) {
778 ; SSE-LABEL: max_ge_v16i8:
780 ; SSE-NEXT: pmaxub %xmm1, %xmm0
783 ; AVX-LABEL: max_ge_v16i8:
785 ; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
787 %1 = icmp uge <16 x i8> %a, %b
788 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
792 define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) {
793 ; SSE-LABEL: max_ge_v32i8:
795 ; SSE-NEXT: pmaxub %xmm2, %xmm0
796 ; SSE-NEXT: pmaxub %xmm3, %xmm1
799 ; AVX1-LABEL: max_ge_v32i8:
801 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
802 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
803 ; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
804 ; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
805 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
808 ; AVX2-LABEL: max_ge_v32i8:
810 ; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
813 ; AVX512-LABEL: max_ge_v32i8:
815 ; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
817 %1 = icmp uge <32 x i8> %a, %b
818 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
823 ; Unsigned Minimum (LT)
826 define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
827 ; SSE2-LABEL: min_lt_v2i64:
829 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
830 ; SSE2-NEXT: movdqa %xmm0, %xmm3
831 ; SSE2-NEXT: pxor %xmm2, %xmm3
832 ; SSE2-NEXT: pxor %xmm1, %xmm2
833 ; SSE2-NEXT: movdqa %xmm2, %xmm4
834 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
835 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
836 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
837 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
838 ; SSE2-NEXT: pand %xmm5, %xmm2
839 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
840 ; SSE2-NEXT: por %xmm2, %xmm3
841 ; SSE2-NEXT: pand %xmm3, %xmm0
842 ; SSE2-NEXT: pandn %xmm1, %xmm3
843 ; SSE2-NEXT: por %xmm3, %xmm0
846 ; SSE41-LABEL: min_lt_v2i64:
848 ; SSE41-NEXT: movdqa %xmm0, %xmm2
849 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
850 ; SSE41-NEXT: pxor %xmm3, %xmm0
851 ; SSE41-NEXT: pxor %xmm1, %xmm3
852 ; SSE41-NEXT: movdqa %xmm3, %xmm4
853 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
854 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
855 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
856 ; SSE41-NEXT: pand %xmm4, %xmm0
857 ; SSE41-NEXT: por %xmm3, %xmm0
858 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
859 ; SSE41-NEXT: movapd %xmm1, %xmm0
862 ; SSE42-LABEL: min_lt_v2i64:
864 ; SSE42-NEXT: movdqa %xmm0, %xmm2
865 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
866 ; SSE42-NEXT: movdqa %xmm2, %xmm3
867 ; SSE42-NEXT: pxor %xmm0, %xmm3
868 ; SSE42-NEXT: pxor %xmm1, %xmm0
869 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
870 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
871 ; SSE42-NEXT: movapd %xmm1, %xmm0
874 ; AVX1-LABEL: min_lt_v2i64:
876 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
877 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
878 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
879 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
880 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
883 ; AVX2-LABEL: min_lt_v2i64:
885 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
886 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
887 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
888 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
889 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
892 ; AVX512-LABEL: min_lt_v2i64:
894 ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
895 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
896 ; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
897 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
898 ; AVX512-NEXT: vzeroupper
900 %1 = icmp ult <2 x i64> %a, %b
901 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
905 define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) {
906 ; SSE2-LABEL: min_lt_v4i64:
908 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
909 ; SSE2-NEXT: movdqa %xmm0, %xmm5
910 ; SSE2-NEXT: pxor %xmm4, %xmm5
911 ; SSE2-NEXT: movdqa %xmm2, %xmm6
912 ; SSE2-NEXT: pxor %xmm4, %xmm6
913 ; SSE2-NEXT: movdqa %xmm6, %xmm7
914 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
915 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
916 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
917 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
918 ; SSE2-NEXT: pand %xmm8, %xmm5
919 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
920 ; SSE2-NEXT: por %xmm5, %xmm6
921 ; SSE2-NEXT: pand %xmm6, %xmm0
922 ; SSE2-NEXT: pandn %xmm2, %xmm6
923 ; SSE2-NEXT: por %xmm6, %xmm0
924 ; SSE2-NEXT: movdqa %xmm1, %xmm2
925 ; SSE2-NEXT: pxor %xmm4, %xmm2
926 ; SSE2-NEXT: pxor %xmm3, %xmm4
927 ; SSE2-NEXT: movdqa %xmm4, %xmm5
928 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
929 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
930 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm4
931 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
932 ; SSE2-NEXT: pand %xmm6, %xmm2
933 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
934 ; SSE2-NEXT: por %xmm2, %xmm4
935 ; SSE2-NEXT: pand %xmm4, %xmm1
936 ; SSE2-NEXT: pandn %xmm3, %xmm4
937 ; SSE2-NEXT: por %xmm4, %xmm1
940 ; SSE41-LABEL: min_lt_v4i64:
942 ; SSE41-NEXT: movdqa %xmm0, %xmm4
943 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
944 ; SSE41-NEXT: pxor %xmm5, %xmm0
945 ; SSE41-NEXT: movdqa %xmm2, %xmm6
946 ; SSE41-NEXT: pxor %xmm5, %xmm6
947 ; SSE41-NEXT: movdqa %xmm6, %xmm7
948 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
949 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
950 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
951 ; SSE41-NEXT: pand %xmm7, %xmm0
952 ; SSE41-NEXT: por %xmm6, %xmm0
953 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
954 ; SSE41-NEXT: movdqa %xmm1, %xmm0
955 ; SSE41-NEXT: pxor %xmm5, %xmm0
956 ; SSE41-NEXT: pxor %xmm3, %xmm5
957 ; SSE41-NEXT: movdqa %xmm5, %xmm4
958 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
959 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
960 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
961 ; SSE41-NEXT: pand %xmm4, %xmm0
962 ; SSE41-NEXT: por %xmm5, %xmm0
963 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
964 ; SSE41-NEXT: movapd %xmm2, %xmm0
965 ; SSE41-NEXT: movapd %xmm3, %xmm1
968 ; SSE42-LABEL: min_lt_v4i64:
970 ; SSE42-NEXT: movdqa %xmm0, %xmm4
971 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
972 ; SSE42-NEXT: movdqa %xmm0, %xmm6
973 ; SSE42-NEXT: pxor %xmm5, %xmm6
974 ; SSE42-NEXT: movdqa %xmm2, %xmm0
975 ; SSE42-NEXT: pxor %xmm5, %xmm0
976 ; SSE42-NEXT: pcmpgtq %xmm6, %xmm0
977 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
978 ; SSE42-NEXT: movdqa %xmm1, %xmm0
979 ; SSE42-NEXT: pxor %xmm5, %xmm0
980 ; SSE42-NEXT: pxor %xmm3, %xmm5
981 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm5
982 ; SSE42-NEXT: movdqa %xmm5, %xmm0
983 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
984 ; SSE42-NEXT: movapd %xmm2, %xmm0
985 ; SSE42-NEXT: movapd %xmm3, %xmm1
988 ; AVX1-LABEL: min_lt_v4i64:
990 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
991 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
992 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
993 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
994 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4
995 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
996 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
997 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm3
998 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
999 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1000 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1003 ; AVX2-LABEL: min_lt_v4i64:
1005 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1006 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
1007 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
1008 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
1009 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1012 ; AVX512-LABEL: min_lt_v4i64:
1014 ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1015 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1016 ; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
1017 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1019 %1 = icmp ult <4 x i64> %a, %b
1020 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
1024 define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) {
1025 ; SSE2-LABEL: min_lt_v4i32:
1027 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1028 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1029 ; SSE2-NEXT: pxor %xmm2, %xmm3
1030 ; SSE2-NEXT: pxor %xmm1, %xmm2
1031 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
1032 ; SSE2-NEXT: pand %xmm2, %xmm0
1033 ; SSE2-NEXT: pandn %xmm1, %xmm2
1034 ; SSE2-NEXT: por %xmm2, %xmm0
1037 ; SSE41-LABEL: min_lt_v4i32:
1039 ; SSE41-NEXT: pminud %xmm1, %xmm0
1042 ; SSE42-LABEL: min_lt_v4i32:
1044 ; SSE42-NEXT: pminud %xmm1, %xmm0
1047 ; AVX-LABEL: min_lt_v4i32:
1049 ; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
1051 %1 = icmp ult <4 x i32> %a, %b
1052 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
1056 define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) {
1057 ; SSE2-LABEL: min_lt_v8i32:
1059 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1060 ; SSE2-NEXT: movdqa %xmm0, %xmm5
1061 ; SSE2-NEXT: pxor %xmm4, %xmm5
1062 ; SSE2-NEXT: movdqa %xmm2, %xmm6
1063 ; SSE2-NEXT: pxor %xmm4, %xmm6
1064 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
1065 ; SSE2-NEXT: pand %xmm6, %xmm0
1066 ; SSE2-NEXT: pandn %xmm2, %xmm6
1067 ; SSE2-NEXT: por %xmm6, %xmm0
1068 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1069 ; SSE2-NEXT: pxor %xmm4, %xmm2
1070 ; SSE2-NEXT: pxor %xmm3, %xmm4
1071 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
1072 ; SSE2-NEXT: pand %xmm4, %xmm1
1073 ; SSE2-NEXT: pandn %xmm3, %xmm4
1074 ; SSE2-NEXT: por %xmm4, %xmm1
1077 ; SSE41-LABEL: min_lt_v8i32:
1079 ; SSE41-NEXT: pminud %xmm2, %xmm0
1080 ; SSE41-NEXT: pminud %xmm3, %xmm1
1083 ; SSE42-LABEL: min_lt_v8i32:
1085 ; SSE42-NEXT: pminud %xmm2, %xmm0
1086 ; SSE42-NEXT: pminud %xmm3, %xmm1
1089 ; AVX1-LABEL: min_lt_v8i32:
1091 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1092 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1093 ; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1094 ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1095 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1098 ; AVX2-LABEL: min_lt_v8i32:
1100 ; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
1103 ; AVX512-LABEL: min_lt_v8i32:
1105 ; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
1107 %1 = icmp ult <8 x i32> %a, %b
1108 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1112 define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) {
1113 ; SSE2-LABEL: min_lt_v8i16:
1115 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1116 ; SSE2-NEXT: psubusw %xmm1, %xmm2
1117 ; SSE2-NEXT: psubw %xmm2, %xmm0
1120 ; SSE41-LABEL: min_lt_v8i16:
1122 ; SSE41-NEXT: pminuw %xmm1, %xmm0
1125 ; SSE42-LABEL: min_lt_v8i16:
1127 ; SSE42-NEXT: pminuw %xmm1, %xmm0
1130 ; AVX-LABEL: min_lt_v8i16:
1132 ; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1134 %1 = icmp ult <8 x i16> %a, %b
1135 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1139 define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) {
1140 ; SSE2-LABEL: min_lt_v16i16:
1142 ; SSE2-NEXT: movdqa %xmm0, %xmm4
1143 ; SSE2-NEXT: psubusw %xmm2, %xmm4
1144 ; SSE2-NEXT: psubw %xmm4, %xmm0
1145 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1146 ; SSE2-NEXT: psubusw %xmm3, %xmm2
1147 ; SSE2-NEXT: psubw %xmm2, %xmm1
1150 ; SSE41-LABEL: min_lt_v16i16:
1152 ; SSE41-NEXT: pminuw %xmm2, %xmm0
1153 ; SSE41-NEXT: pminuw %xmm3, %xmm1
1156 ; SSE42-LABEL: min_lt_v16i16:
1158 ; SSE42-NEXT: pminuw %xmm2, %xmm0
1159 ; SSE42-NEXT: pminuw %xmm3, %xmm1
1162 ; AVX1-LABEL: min_lt_v16i16:
1164 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1165 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1166 ; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
1167 ; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1168 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1171 ; AVX2-LABEL: min_lt_v16i16:
1173 ; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1176 ; AVX512-LABEL: min_lt_v16i16:
1178 ; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1180 %1 = icmp ult <16 x i16> %a, %b
1181 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1185 define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) {
1186 ; SSE-LABEL: min_lt_v16i8:
1188 ; SSE-NEXT: pminub %xmm1, %xmm0
1191 ; AVX-LABEL: min_lt_v16i8:
1193 ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
1195 %1 = icmp ult <16 x i8> %a, %b
1196 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1200 define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) {
1201 ; SSE-LABEL: min_lt_v32i8:
1203 ; SSE-NEXT: pminub %xmm2, %xmm0
1204 ; SSE-NEXT: pminub %xmm3, %xmm1
1207 ; AVX1-LABEL: min_lt_v32i8:
1209 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1210 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1211 ; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
1212 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1213 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1216 ; AVX2-LABEL: min_lt_v32i8:
1218 ; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
1221 ; AVX512-LABEL: min_lt_v32i8:
1223 ; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
1225 %1 = icmp ult <32 x i8> %a, %b
1226 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1231 ; Unsigned Minimum (LE)
1234 define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
1235 ; SSE2-LABEL: min_le_v2i64:
1237 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
1238 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1239 ; SSE2-NEXT: pxor %xmm2, %xmm3
1240 ; SSE2-NEXT: pxor %xmm1, %xmm2
1241 ; SSE2-NEXT: movdqa %xmm2, %xmm4
1242 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1243 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1244 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
1245 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1246 ; SSE2-NEXT: pand %xmm5, %xmm2
1247 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1248 ; SSE2-NEXT: por %xmm2, %xmm3
1249 ; SSE2-NEXT: pand %xmm3, %xmm0
1250 ; SSE2-NEXT: pandn %xmm1, %xmm3
1251 ; SSE2-NEXT: por %xmm3, %xmm0
1254 ; SSE41-LABEL: min_le_v2i64:
1256 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1257 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
1258 ; SSE41-NEXT: pxor %xmm3, %xmm0
1259 ; SSE41-NEXT: pxor %xmm1, %xmm3
1260 ; SSE41-NEXT: movdqa %xmm3, %xmm4
1261 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
1262 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
1263 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1264 ; SSE41-NEXT: pand %xmm4, %xmm0
1265 ; SSE41-NEXT: por %xmm3, %xmm0
1266 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
1267 ; SSE41-NEXT: movapd %xmm1, %xmm0
1270 ; SSE42-LABEL: min_le_v2i64:
1272 ; SSE42-NEXT: movdqa %xmm0, %xmm2
1273 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
1274 ; SSE42-NEXT: movdqa %xmm2, %xmm3
1275 ; SSE42-NEXT: pxor %xmm0, %xmm3
1276 ; SSE42-NEXT: pxor %xmm1, %xmm0
1277 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
1278 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
1279 ; SSE42-NEXT: movapd %xmm1, %xmm0
1282 ; AVX1-LABEL: min_le_v2i64:
1284 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1285 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
1286 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
1287 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
1288 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1291 ; AVX2-LABEL: min_le_v2i64:
1293 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1294 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
1295 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
1296 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
1297 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1300 ; AVX512-LABEL: min_le_v2i64:
1302 ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1303 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1304 ; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
1305 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1306 ; AVX512-NEXT: vzeroupper
1308 %1 = icmp ule <2 x i64> %a, %b
1309 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
1313 define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) {
1314 ; SSE2-LABEL: min_le_v4i64:
1316 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
1317 ; SSE2-NEXT: movdqa %xmm0, %xmm5
1318 ; SSE2-NEXT: pxor %xmm4, %xmm5
1319 ; SSE2-NEXT: movdqa %xmm2, %xmm6
1320 ; SSE2-NEXT: pxor %xmm4, %xmm6
1321 ; SSE2-NEXT: movdqa %xmm6, %xmm7
1322 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1323 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1324 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
1325 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
1326 ; SSE2-NEXT: pand %xmm8, %xmm5
1327 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
1328 ; SSE2-NEXT: por %xmm5, %xmm6
1329 ; SSE2-NEXT: pand %xmm6, %xmm0
1330 ; SSE2-NEXT: pandn %xmm2, %xmm6
1331 ; SSE2-NEXT: por %xmm6, %xmm0
1332 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1333 ; SSE2-NEXT: pxor %xmm4, %xmm2
1334 ; SSE2-NEXT: pxor %xmm3, %xmm4
1335 ; SSE2-NEXT: movdqa %xmm4, %xmm5
1336 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
1337 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1338 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm4
1339 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1340 ; SSE2-NEXT: pand %xmm6, %xmm2
1341 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1342 ; SSE2-NEXT: por %xmm2, %xmm4
1343 ; SSE2-NEXT: pand %xmm4, %xmm1
1344 ; SSE2-NEXT: pandn %xmm3, %xmm4
1345 ; SSE2-NEXT: por %xmm4, %xmm1
1348 ; SSE41-LABEL: min_le_v4i64:
1350 ; SSE41-NEXT: movdqa %xmm0, %xmm4
1351 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
1352 ; SSE41-NEXT: pxor %xmm5, %xmm0
1353 ; SSE41-NEXT: movdqa %xmm2, %xmm6
1354 ; SSE41-NEXT: pxor %xmm5, %xmm6
1355 ; SSE41-NEXT: movdqa %xmm6, %xmm7
1356 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
1357 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
1358 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
1359 ; SSE41-NEXT: pand %xmm7, %xmm0
1360 ; SSE41-NEXT: por %xmm6, %xmm0
1361 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1362 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1363 ; SSE41-NEXT: pxor %xmm5, %xmm0
1364 ; SSE41-NEXT: pxor %xmm3, %xmm5
1365 ; SSE41-NEXT: movdqa %xmm5, %xmm4
1366 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
1367 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
1368 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
1369 ; SSE41-NEXT: pand %xmm4, %xmm0
1370 ; SSE41-NEXT: por %xmm5, %xmm0
1371 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1372 ; SSE41-NEXT: movapd %xmm2, %xmm0
1373 ; SSE41-NEXT: movapd %xmm3, %xmm1
1376 ; SSE42-LABEL: min_le_v4i64:
1378 ; SSE42-NEXT: movdqa %xmm0, %xmm4
1379 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
1380 ; SSE42-NEXT: movdqa %xmm0, %xmm6
1381 ; SSE42-NEXT: pxor %xmm5, %xmm6
1382 ; SSE42-NEXT: movdqa %xmm2, %xmm0
1383 ; SSE42-NEXT: pxor %xmm5, %xmm0
1384 ; SSE42-NEXT: pcmpgtq %xmm6, %xmm0
1385 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1386 ; SSE42-NEXT: movdqa %xmm1, %xmm0
1387 ; SSE42-NEXT: pxor %xmm5, %xmm0
1388 ; SSE42-NEXT: pxor %xmm3, %xmm5
1389 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm5
1390 ; SSE42-NEXT: movdqa %xmm5, %xmm0
1391 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1392 ; SSE42-NEXT: movapd %xmm2, %xmm0
1393 ; SSE42-NEXT: movapd %xmm3, %xmm1
1396 ; AVX1-LABEL: min_le_v4i64:
1398 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1399 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
1400 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
1401 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1402 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4
1403 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
1404 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
1405 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm3
1406 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
1407 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1408 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1411 ; AVX2-LABEL: min_le_v4i64:
1413 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1414 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
1415 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
1416 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
1417 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1420 ; AVX512-LABEL: min_le_v4i64:
1422 ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1423 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1424 ; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
1425 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1427 %1 = icmp ule <4 x i64> %a, %b
1428 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
1432 define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) {
1433 ; SSE2-LABEL: min_le_v4i32:
1435 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1436 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1437 ; SSE2-NEXT: pxor %xmm2, %xmm3
1438 ; SSE2-NEXT: pxor %xmm1, %xmm2
1439 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
1440 ; SSE2-NEXT: pand %xmm2, %xmm0
1441 ; SSE2-NEXT: pandn %xmm1, %xmm2
1442 ; SSE2-NEXT: por %xmm2, %xmm0
1445 ; SSE41-LABEL: min_le_v4i32:
1447 ; SSE41-NEXT: pminud %xmm1, %xmm0
1450 ; SSE42-LABEL: min_le_v4i32:
1452 ; SSE42-NEXT: pminud %xmm1, %xmm0
1455 ; AVX-LABEL: min_le_v4i32:
1457 ; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
1459 %1 = icmp ule <4 x i32> %a, %b
1460 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
1464 define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) {
1465 ; SSE2-LABEL: min_le_v8i32:
1467 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1468 ; SSE2-NEXT: movdqa %xmm0, %xmm5
1469 ; SSE2-NEXT: pxor %xmm4, %xmm5
1470 ; SSE2-NEXT: movdqa %xmm2, %xmm6
1471 ; SSE2-NEXT: pxor %xmm4, %xmm6
1472 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
1473 ; SSE2-NEXT: pand %xmm6, %xmm0
1474 ; SSE2-NEXT: pandn %xmm2, %xmm6
1475 ; SSE2-NEXT: por %xmm6, %xmm0
1476 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1477 ; SSE2-NEXT: pxor %xmm4, %xmm2
1478 ; SSE2-NEXT: pxor %xmm3, %xmm4
1479 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
1480 ; SSE2-NEXT: pand %xmm4, %xmm1
1481 ; SSE2-NEXT: pandn %xmm3, %xmm4
1482 ; SSE2-NEXT: por %xmm4, %xmm1
1485 ; SSE41-LABEL: min_le_v8i32:
1487 ; SSE41-NEXT: pminud %xmm2, %xmm0
1488 ; SSE41-NEXT: pminud %xmm3, %xmm1
1491 ; SSE42-LABEL: min_le_v8i32:
1493 ; SSE42-NEXT: pminud %xmm2, %xmm0
1494 ; SSE42-NEXT: pminud %xmm3, %xmm1
1497 ; AVX1-LABEL: min_le_v8i32:
1499 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1500 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1501 ; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1502 ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1503 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1506 ; AVX2-LABEL: min_le_v8i32:
1508 ; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
1511 ; AVX512-LABEL: min_le_v8i32:
1513 ; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
1515 %1 = icmp ule <8 x i32> %a, %b
1516 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1520 define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) {
1521 ; SSE2-LABEL: min_le_v8i16:
1523 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1524 ; SSE2-NEXT: psubusw %xmm1, %xmm2
1525 ; SSE2-NEXT: psubw %xmm2, %xmm0
1528 ; SSE41-LABEL: min_le_v8i16:
1530 ; SSE41-NEXT: pminuw %xmm1, %xmm0
1533 ; SSE42-LABEL: min_le_v8i16:
1535 ; SSE42-NEXT: pminuw %xmm1, %xmm0
1538 ; AVX-LABEL: min_le_v8i16:
1540 ; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1542 %1 = icmp ule <8 x i16> %a, %b
1543 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1547 define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) {
1548 ; SSE2-LABEL: min_le_v16i16:
1550 ; SSE2-NEXT: movdqa %xmm0, %xmm4
1551 ; SSE2-NEXT: psubusw %xmm2, %xmm4
1552 ; SSE2-NEXT: psubw %xmm4, %xmm0
1553 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1554 ; SSE2-NEXT: psubusw %xmm3, %xmm2
1555 ; SSE2-NEXT: psubw %xmm2, %xmm1
1558 ; SSE41-LABEL: min_le_v16i16:
1560 ; SSE41-NEXT: pminuw %xmm2, %xmm0
1561 ; SSE41-NEXT: pminuw %xmm3, %xmm1
1564 ; SSE42-LABEL: min_le_v16i16:
1566 ; SSE42-NEXT: pminuw %xmm2, %xmm0
1567 ; SSE42-NEXT: pminuw %xmm3, %xmm1
1570 ; AVX1-LABEL: min_le_v16i16:
1572 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1573 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1574 ; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
1575 ; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1576 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1579 ; AVX2-LABEL: min_le_v16i16:
1581 ; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1584 ; AVX512-LABEL: min_le_v16i16:
1586 ; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1588 %1 = icmp ule <16 x i16> %a, %b
1589 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1593 define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) {
1594 ; SSE-LABEL: min_le_v16i8:
1596 ; SSE-NEXT: pminub %xmm1, %xmm0
1599 ; AVX-LABEL: min_le_v16i8:
1601 ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
1603 %1 = icmp ule <16 x i8> %a, %b
1604 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1608 define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) {
1609 ; SSE-LABEL: min_le_v32i8:
1611 ; SSE-NEXT: pminub %xmm2, %xmm0
1612 ; SSE-NEXT: pminub %xmm3, %xmm1
1615 ; AVX1-LABEL: min_le_v32i8:
1617 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1618 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1619 ; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
1620 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1621 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1624 ; AVX2-LABEL: min_le_v32i8:
1626 ; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
1629 ; AVX512-LABEL: min_le_v32i8:
1631 ; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
1633 %1 = icmp ule <32 x i8> %a, %b
1634 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1642 define <2 x i64> @max_gt_v2i64c() {
1643 ; SSE-LABEL: max_gt_v2i64c:
1645 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7]
1648 ; AVX-LABEL: max_gt_v2i64c:
1650 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1652 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1653 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1654 %3 = icmp ugt <2 x i64> %1, %2
1655 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1659 define <4 x i64> @max_gt_v4i64c() {
1660 ; SSE-LABEL: max_gt_v4i64c:
1662 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7]
1663 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0
1666 ; AVX-LABEL: max_gt_v4i64c:
1668 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1670 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1671 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1672 %3 = icmp ugt <4 x i64> %1, %2
1673 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1677 define <4 x i32> @max_gt_v4i32c() {
1678 ; SSE-LABEL: max_gt_v4i32c:
1680 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1683 ; AVX-LABEL: max_gt_v4i32c:
1685 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1687 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1688 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1689 %3 = icmp ugt <4 x i32> %1, %2
1690 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1694 define <8 x i32> @max_gt_v8i32c() {
1695 ; SSE-LABEL: max_gt_v8i32c:
1697 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
1698 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
1701 ; AVX-LABEL: max_gt_v8i32c:
1703 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1705 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1706 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1707 %3 = icmp ugt <8 x i32> %1, %2
1708 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1712 define <8 x i16> @max_gt_v8i16c() {
1713 ; SSE-LABEL: max_gt_v8i16c:
1715 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1718 ; AVX-LABEL: max_gt_v8i16c:
1720 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1722 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1723 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1724 %3 = icmp ugt <8 x i16> %1, %2
1725 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1729 define <16 x i16> @max_gt_v16i16c() {
1730 ; SSE-LABEL: max_gt_v16i16c:
1732 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
1733 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
1736 ; AVX-LABEL: max_gt_v16i16c:
1738 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
1740 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1741 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
1742 %3 = icmp ugt <16 x i16> %1, %2
1743 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1747 define <16 x i8> @max_gt_v16i8c() {
1748 ; SSE-LABEL: max_gt_v16i8c:
1750 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1753 ; AVX-LABEL: max_gt_v16i8c:
1755 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1757 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
1758 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
1759 %3 = icmp ugt <16 x i8> %1, %2
1760 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1764 define <2 x i64> @max_ge_v2i64c() {
1765 ; SSE-LABEL: max_ge_v2i64c:
1767 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7]
1770 ; AVX-LABEL: max_ge_v2i64c:
1772 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1774 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1775 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1776 %3 = icmp uge <2 x i64> %1, %2
1777 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1781 define <4 x i64> @max_ge_v4i64c() {
1782 ; SSE-LABEL: max_ge_v4i64c:
1784 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7]
1785 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0
1788 ; AVX-LABEL: max_ge_v4i64c:
1790 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1792 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1793 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1794 %3 = icmp uge <4 x i64> %1, %2
1795 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1799 define <4 x i32> @max_ge_v4i32c() {
1800 ; SSE-LABEL: max_ge_v4i32c:
1802 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1805 ; AVX-LABEL: max_ge_v4i32c:
1807 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1809 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1810 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1811 %3 = icmp uge <4 x i32> %1, %2
1812 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1816 define <8 x i32> @max_ge_v8i32c() {
1817 ; SSE-LABEL: max_ge_v8i32c:
1819 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
1820 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
1823 ; AVX-LABEL: max_ge_v8i32c:
1825 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1827 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1828 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1829 %3 = icmp uge <8 x i32> %1, %2
1830 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1834 define <8 x i16> @max_ge_v8i16c() {
1835 ; SSE-LABEL: max_ge_v8i16c:
1837 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1840 ; AVX-LABEL: max_ge_v8i16c:
1842 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1844 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1845 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1846 %3 = icmp uge <8 x i16> %1, %2
1847 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1851 define <16 x i16> @max_ge_v16i16c() {
1852 ; SSE-LABEL: max_ge_v16i16c:
1854 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
1855 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
1858 ; AVX-LABEL: max_ge_v16i16c:
1860 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
1862 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1863 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
1864 %3 = icmp uge <16 x i16> %1, %2
1865 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1869 define <16 x i8> @max_ge_v16i8c() {
1870 ; SSE-LABEL: max_ge_v16i8c:
1872 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1875 ; AVX-LABEL: max_ge_v16i8c:
1877 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1879 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
1880 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
1881 %3 = icmp uge <16 x i8> %1, %2
1882 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1886 define <2 x i64> @min_lt_v2i64c() {
1887 ; SSE-LABEL: min_lt_v2i64c:
1889 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1]
1892 ; AVX-LABEL: min_lt_v2i64c:
1894 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
1896 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1897 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1898 %3 = icmp ult <2 x i64> %1, %2
1899 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1903 define <4 x i64> @min_lt_v4i64c() {
1904 ; SSE-LABEL: min_lt_v4i64c:
1906 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
1907 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1]
1910 ; AVX-LABEL: min_lt_v4i64c:
1912 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
1914 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1915 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1916 %3 = icmp ult <4 x i64> %1, %2
1917 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1921 define <4 x i32> @min_lt_v4i32c() {
1922 ; SSE-LABEL: min_lt_v4i32c:
1924 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
1927 ; AVX-LABEL: min_lt_v4i32c:
1929 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
1931 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1932 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1933 %3 = icmp ult <4 x i32> %1, %2
1934 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1938 define <8 x i32> @min_lt_v8i32c() {
1939 ; SSE-LABEL: min_lt_v8i32c:
1941 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
1942 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
1945 ; AVX-LABEL: min_lt_v8i32c:
1947 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
1949 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1950 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1951 %3 = icmp ult <8 x i32> %1, %2
1952 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1956 define <8 x i16> @min_lt_v8i16c() {
1957 ; SSE-LABEL: min_lt_v8i16c:
1959 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
1962 ; AVX-LABEL: min_lt_v8i16c:
1964 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
1966 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1967 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 1, i32 0
1968 %3 = icmp ult <8 x i16> %1, %2
1969 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1973 define <16 x i16> @min_lt_v16i16c() {
1974 ; SSE-LABEL: min_lt_v16i16c:
1976 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,65530,65531,65532,65531,65530,65529,0]
1977 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
1980 ; AVX-LABEL: min_lt_v16i16c:
1982 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
1984 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1985 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 1, i32 0
1986 %3 = icmp ult <16 x i16> %1, %2
1987 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1991 define <16 x i8> @min_lt_v16i8c() {
1992 ; SSE-LABEL: min_lt_v16i8c:
1994 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
1997 ; AVX-LABEL: min_lt_v16i8c:
1999 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2001 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
2002 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 1, i32 0
2003 %3 = icmp ult <16 x i8> %1, %2
2004 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2008 define <2 x i64> @min_le_v2i64c() {
2009 ; SSE-LABEL: min_le_v2i64c:
2011 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1]
2014 ; AVX-LABEL: min_le_v2i64c:
2016 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
2018 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
2019 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
2020 %3 = icmp ule <2 x i64> %1, %2
2021 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
2025 define <4 x i64> @min_le_v4i64c() {
2026 ; SSE-LABEL: min_le_v4i64c:
2028 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
2029 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1]
2032 ; AVX-LABEL: min_le_v4i64c:
2034 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
2036 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
2037 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
2038 %3 = icmp ule <4 x i64> %1, %2
2039 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
2043 define <4 x i32> @min_le_v4i32c() {
2044 ; SSE-LABEL: min_le_v4i32c:
2046 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2049 ; AVX-LABEL: min_le_v4i32c:
2051 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2053 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
2054 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
2055 %3 = icmp ule <4 x i32> %1, %2
2056 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
2060 define <8 x i32> @min_le_v8i32c() {
2061 ; SSE-LABEL: min_le_v8i32c:
2063 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
2064 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
2067 ; AVX-LABEL: min_le_v8i32c:
2069 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
2071 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
2072 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
2073 %3 = icmp ule <8 x i32> %1, %2
2074 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2078 define <8 x i16> @min_le_v8i16c() {
2079 ; SSE-LABEL: min_le_v8i16c:
2081 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
2084 ; AVX-LABEL: min_le_v8i16c:
2086 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
2088 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
2089 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
2090 %3 = icmp ule <8 x i16> %1, %2
2091 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2095 define <16 x i16> @min_le_v16i16c() {
2096 ; SSE-LABEL: min_le_v16i16c:
2098 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
2099 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
2102 ; AVX-LABEL: min_le_v16i16c:
2104 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
2106 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
2107 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
2108 %3 = icmp ule <16 x i16> %1, %2
2109 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2113 define <16 x i8> @min_le_v16i8c() {
2114 ; SSE-LABEL: min_le_v16i8c:
2116 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2119 ; AVX-LABEL: min_le_v16i8c:
2121 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2123 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
2124 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
2125 %3 = icmp ule <16 x i8> %1, %2
2126 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2