1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
11 ; Unsigned Maximum (GT)
14 define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
15 ; SSE2-LABEL: max_gt_v2i64:
17 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
18 ; SSE2-NEXT: movdqa %xmm1, %xmm3
19 ; SSE2-NEXT: pxor %xmm2, %xmm3
20 ; SSE2-NEXT: pxor %xmm0, %xmm2
21 ; SSE2-NEXT: movdqa %xmm2, %xmm4
22 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
23 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
24 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
25 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
26 ; SSE2-NEXT: pand %xmm5, %xmm2
27 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
28 ; SSE2-NEXT: por %xmm2, %xmm3
29 ; SSE2-NEXT: pand %xmm3, %xmm0
30 ; SSE2-NEXT: pandn %xmm1, %xmm3
31 ; SSE2-NEXT: por %xmm3, %xmm0
34 ; SSE41-LABEL: max_gt_v2i64:
36 ; SSE41-NEXT: movdqa %xmm0, %xmm2
37 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
38 ; SSE41-NEXT: movdqa %xmm1, %xmm0
39 ; SSE41-NEXT: pxor %xmm3, %xmm0
40 ; SSE41-NEXT: pxor %xmm2, %xmm3
41 ; SSE41-NEXT: movdqa %xmm3, %xmm4
42 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
43 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
44 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
45 ; SSE41-NEXT: pand %xmm4, %xmm0
46 ; SSE41-NEXT: por %xmm3, %xmm0
47 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
48 ; SSE41-NEXT: movapd %xmm1, %xmm0
51 ; SSE42-LABEL: max_gt_v2i64:
53 ; SSE42-NEXT: movdqa %xmm0, %xmm2
54 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
55 ; SSE42-NEXT: movdqa %xmm1, %xmm3
56 ; SSE42-NEXT: pxor %xmm0, %xmm3
57 ; SSE42-NEXT: pxor %xmm2, %xmm0
58 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
59 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
60 ; SSE42-NEXT: movapd %xmm1, %xmm0
63 ; AVX1-LABEL: max_gt_v2i64:
65 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
66 ; AVX1-NEXT: # xmm2 = mem[0,0]
67 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
68 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
69 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
70 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
73 ; AVX2-LABEL: max_gt_v2i64:
75 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
76 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
77 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
78 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
79 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
82 ; AVX512-LABEL: max_gt_v2i64:
84 ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
85 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
86 ; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
87 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
88 ; AVX512-NEXT: vzeroupper
90 %1 = icmp ugt <2 x i64> %a, %b
91 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
95 define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
96 ; SSE2-LABEL: max_gt_v4i64:
98 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
99 ; SSE2-NEXT: movdqa %xmm2, %xmm5
100 ; SSE2-NEXT: pxor %xmm4, %xmm5
101 ; SSE2-NEXT: movdqa %xmm0, %xmm6
102 ; SSE2-NEXT: pxor %xmm4, %xmm6
103 ; SSE2-NEXT: movdqa %xmm6, %xmm7
104 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
105 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
106 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
107 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
108 ; SSE2-NEXT: pand %xmm8, %xmm5
109 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
110 ; SSE2-NEXT: por %xmm5, %xmm6
111 ; SSE2-NEXT: pand %xmm6, %xmm0
112 ; SSE2-NEXT: pandn %xmm2, %xmm6
113 ; SSE2-NEXT: por %xmm6, %xmm0
114 ; SSE2-NEXT: movdqa %xmm3, %xmm2
115 ; SSE2-NEXT: pxor %xmm4, %xmm2
116 ; SSE2-NEXT: pxor %xmm1, %xmm4
117 ; SSE2-NEXT: movdqa %xmm4, %xmm5
118 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
119 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
120 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm4
121 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
122 ; SSE2-NEXT: pand %xmm6, %xmm2
123 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
124 ; SSE2-NEXT: por %xmm2, %xmm4
125 ; SSE2-NEXT: pand %xmm4, %xmm1
126 ; SSE2-NEXT: pandn %xmm3, %xmm4
127 ; SSE2-NEXT: por %xmm4, %xmm1
130 ; SSE41-LABEL: max_gt_v4i64:
132 ; SSE41-NEXT: movdqa %xmm0, %xmm4
133 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
134 ; SSE41-NEXT: movdqa %xmm2, %xmm0
135 ; SSE41-NEXT: pxor %xmm5, %xmm0
136 ; SSE41-NEXT: movdqa %xmm4, %xmm6
137 ; SSE41-NEXT: pxor %xmm5, %xmm6
138 ; SSE41-NEXT: movdqa %xmm6, %xmm7
139 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
140 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
141 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
142 ; SSE41-NEXT: pand %xmm7, %xmm0
143 ; SSE41-NEXT: por %xmm6, %xmm0
144 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
145 ; SSE41-NEXT: movdqa %xmm3, %xmm0
146 ; SSE41-NEXT: pxor %xmm5, %xmm0
147 ; SSE41-NEXT: pxor %xmm1, %xmm5
148 ; SSE41-NEXT: movdqa %xmm5, %xmm4
149 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
150 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
151 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
152 ; SSE41-NEXT: pand %xmm4, %xmm0
153 ; SSE41-NEXT: por %xmm5, %xmm0
154 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
155 ; SSE41-NEXT: movapd %xmm2, %xmm0
156 ; SSE41-NEXT: movapd %xmm3, %xmm1
159 ; SSE42-LABEL: max_gt_v4i64:
161 ; SSE42-NEXT: movdqa %xmm0, %xmm4
162 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
163 ; SSE42-NEXT: movdqa %xmm2, %xmm6
164 ; SSE42-NEXT: pxor %xmm5, %xmm6
165 ; SSE42-NEXT: pxor %xmm5, %xmm0
166 ; SSE42-NEXT: pcmpgtq %xmm6, %xmm0
167 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
168 ; SSE42-NEXT: movdqa %xmm3, %xmm0
169 ; SSE42-NEXT: pxor %xmm5, %xmm0
170 ; SSE42-NEXT: pxor %xmm1, %xmm5
171 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm5
172 ; SSE42-NEXT: movdqa %xmm5, %xmm0
173 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
174 ; SSE42-NEXT: movapd %xmm2, %xmm0
175 ; SSE42-NEXT: movapd %xmm3, %xmm1
178 ; AVX1-LABEL: max_gt_v4i64:
180 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
181 ; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
182 ; AVX1-NEXT: # xmm3 = mem[0,0]
183 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
184 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
185 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4
186 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
187 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4
188 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3
189 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
190 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
191 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
194 ; AVX2-LABEL: max_gt_v4i64:
196 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
197 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
198 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
199 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
200 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
203 ; AVX512-LABEL: max_gt_v4i64:
205 ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
206 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
207 ; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
208 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
210 %1 = icmp ugt <4 x i64> %a, %b
211 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
215 define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) {
216 ; SSE2-LABEL: max_gt_v4i32:
218 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
219 ; SSE2-NEXT: movdqa %xmm1, %xmm3
220 ; SSE2-NEXT: pxor %xmm2, %xmm3
221 ; SSE2-NEXT: pxor %xmm0, %xmm2
222 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
223 ; SSE2-NEXT: pand %xmm2, %xmm0
224 ; SSE2-NEXT: pandn %xmm1, %xmm2
225 ; SSE2-NEXT: por %xmm2, %xmm0
228 ; SSE41-LABEL: max_gt_v4i32:
230 ; SSE41-NEXT: pmaxud %xmm1, %xmm0
233 ; SSE42-LABEL: max_gt_v4i32:
235 ; SSE42-NEXT: pmaxud %xmm1, %xmm0
238 ; AVX-LABEL: max_gt_v4i32:
240 ; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
242 %1 = icmp ugt <4 x i32> %a, %b
243 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
247 define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) {
248 ; SSE2-LABEL: max_gt_v8i32:
250 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
251 ; SSE2-NEXT: movdqa %xmm2, %xmm5
252 ; SSE2-NEXT: pxor %xmm4, %xmm5
253 ; SSE2-NEXT: movdqa %xmm0, %xmm6
254 ; SSE2-NEXT: pxor %xmm4, %xmm6
255 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
256 ; SSE2-NEXT: pand %xmm6, %xmm0
257 ; SSE2-NEXT: pandn %xmm2, %xmm6
258 ; SSE2-NEXT: por %xmm6, %xmm0
259 ; SSE2-NEXT: movdqa %xmm3, %xmm2
260 ; SSE2-NEXT: pxor %xmm4, %xmm2
261 ; SSE2-NEXT: pxor %xmm1, %xmm4
262 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
263 ; SSE2-NEXT: pand %xmm4, %xmm1
264 ; SSE2-NEXT: pandn %xmm3, %xmm4
265 ; SSE2-NEXT: por %xmm4, %xmm1
268 ; SSE41-LABEL: max_gt_v8i32:
270 ; SSE41-NEXT: pmaxud %xmm2, %xmm0
271 ; SSE41-NEXT: pmaxud %xmm3, %xmm1
274 ; SSE42-LABEL: max_gt_v8i32:
276 ; SSE42-NEXT: pmaxud %xmm2, %xmm0
277 ; SSE42-NEXT: pmaxud %xmm3, %xmm1
280 ; AVX1-LABEL: max_gt_v8i32:
282 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
283 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
284 ; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
285 ; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
286 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
289 ; AVX2-LABEL: max_gt_v8i32:
291 ; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
294 ; AVX512-LABEL: max_gt_v8i32:
296 ; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
298 %1 = icmp ugt <8 x i32> %a, %b
299 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
303 define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) {
304 ; SSE2-LABEL: max_gt_v8i16:
306 ; SSE2-NEXT: psubusw %xmm0, %xmm1
307 ; SSE2-NEXT: paddw %xmm1, %xmm0
310 ; SSE41-LABEL: max_gt_v8i16:
312 ; SSE41-NEXT: pmaxuw %xmm1, %xmm0
315 ; SSE42-LABEL: max_gt_v8i16:
317 ; SSE42-NEXT: pmaxuw %xmm1, %xmm0
320 ; AVX-LABEL: max_gt_v8i16:
322 ; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
324 %1 = icmp ugt <8 x i16> %a, %b
325 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
329 define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) {
330 ; SSE2-LABEL: max_gt_v16i16:
332 ; SSE2-NEXT: psubusw %xmm0, %xmm2
333 ; SSE2-NEXT: paddw %xmm2, %xmm0
334 ; SSE2-NEXT: psubusw %xmm1, %xmm3
335 ; SSE2-NEXT: paddw %xmm3, %xmm1
338 ; SSE41-LABEL: max_gt_v16i16:
340 ; SSE41-NEXT: pmaxuw %xmm2, %xmm0
341 ; SSE41-NEXT: pmaxuw %xmm3, %xmm1
344 ; SSE42-LABEL: max_gt_v16i16:
346 ; SSE42-NEXT: pmaxuw %xmm2, %xmm0
347 ; SSE42-NEXT: pmaxuw %xmm3, %xmm1
350 ; AVX1-LABEL: max_gt_v16i16:
352 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
353 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
354 ; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
355 ; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
356 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
359 ; AVX2-LABEL: max_gt_v16i16:
361 ; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
364 ; AVX512-LABEL: max_gt_v16i16:
366 ; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
368 %1 = icmp ugt <16 x i16> %a, %b
369 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
373 define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) {
374 ; SSE-LABEL: max_gt_v16i8:
376 ; SSE-NEXT: pmaxub %xmm1, %xmm0
379 ; AVX-LABEL: max_gt_v16i8:
381 ; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
383 %1 = icmp ugt <16 x i8> %a, %b
384 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
388 define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) {
389 ; SSE-LABEL: max_gt_v32i8:
391 ; SSE-NEXT: pmaxub %xmm2, %xmm0
392 ; SSE-NEXT: pmaxub %xmm3, %xmm1
395 ; AVX1-LABEL: max_gt_v32i8:
397 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
398 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
399 ; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
400 ; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
401 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
404 ; AVX2-LABEL: max_gt_v32i8:
406 ; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
409 ; AVX512-LABEL: max_gt_v32i8:
411 ; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
413 %1 = icmp ugt <32 x i8> %a, %b
414 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
419 ; Unsigned Maximum (GE)
422 define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
423 ; SSE2-LABEL: max_ge_v2i64:
425 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
426 ; SSE2-NEXT: movdqa %xmm1, %xmm3
427 ; SSE2-NEXT: pxor %xmm2, %xmm3
428 ; SSE2-NEXT: pxor %xmm0, %xmm2
429 ; SSE2-NEXT: movdqa %xmm2, %xmm4
430 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
431 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
432 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
433 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
434 ; SSE2-NEXT: pand %xmm5, %xmm2
435 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
436 ; SSE2-NEXT: por %xmm2, %xmm3
437 ; SSE2-NEXT: pand %xmm3, %xmm0
438 ; SSE2-NEXT: pandn %xmm1, %xmm3
439 ; SSE2-NEXT: por %xmm3, %xmm0
442 ; SSE41-LABEL: max_ge_v2i64:
444 ; SSE41-NEXT: movdqa %xmm0, %xmm2
445 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
446 ; SSE41-NEXT: movdqa %xmm1, %xmm0
447 ; SSE41-NEXT: pxor %xmm3, %xmm0
448 ; SSE41-NEXT: pxor %xmm2, %xmm3
449 ; SSE41-NEXT: movdqa %xmm3, %xmm4
450 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
451 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
452 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
453 ; SSE41-NEXT: pand %xmm4, %xmm0
454 ; SSE41-NEXT: por %xmm3, %xmm0
455 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
456 ; SSE41-NEXT: movapd %xmm1, %xmm0
459 ; SSE42-LABEL: max_ge_v2i64:
461 ; SSE42-NEXT: movdqa %xmm0, %xmm2
462 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
463 ; SSE42-NEXT: movdqa %xmm1, %xmm3
464 ; SSE42-NEXT: pxor %xmm0, %xmm3
465 ; SSE42-NEXT: pxor %xmm2, %xmm0
466 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
467 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
468 ; SSE42-NEXT: movapd %xmm1, %xmm0
471 ; AVX1-LABEL: max_ge_v2i64:
473 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
474 ; AVX1-NEXT: # xmm2 = mem[0,0]
475 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
476 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
477 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
478 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
481 ; AVX2-LABEL: max_ge_v2i64:
483 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
484 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
485 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
486 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
487 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
490 ; AVX512-LABEL: max_ge_v2i64:
492 ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
493 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
494 ; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
495 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
496 ; AVX512-NEXT: vzeroupper
498 %1 = icmp uge <2 x i64> %a, %b
499 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
503 define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
504 ; SSE2-LABEL: max_ge_v4i64:
506 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
507 ; SSE2-NEXT: movdqa %xmm2, %xmm5
508 ; SSE2-NEXT: pxor %xmm4, %xmm5
509 ; SSE2-NEXT: movdqa %xmm0, %xmm6
510 ; SSE2-NEXT: pxor %xmm4, %xmm6
511 ; SSE2-NEXT: movdqa %xmm6, %xmm7
512 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
513 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
514 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
515 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
516 ; SSE2-NEXT: pand %xmm8, %xmm5
517 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
518 ; SSE2-NEXT: por %xmm5, %xmm6
519 ; SSE2-NEXT: pand %xmm6, %xmm0
520 ; SSE2-NEXT: pandn %xmm2, %xmm6
521 ; SSE2-NEXT: por %xmm6, %xmm0
522 ; SSE2-NEXT: movdqa %xmm3, %xmm2
523 ; SSE2-NEXT: pxor %xmm4, %xmm2
524 ; SSE2-NEXT: pxor %xmm1, %xmm4
525 ; SSE2-NEXT: movdqa %xmm4, %xmm5
526 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
527 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
528 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm4
529 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
530 ; SSE2-NEXT: pand %xmm6, %xmm2
531 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
532 ; SSE2-NEXT: por %xmm2, %xmm4
533 ; SSE2-NEXT: pand %xmm4, %xmm1
534 ; SSE2-NEXT: pandn %xmm3, %xmm4
535 ; SSE2-NEXT: por %xmm4, %xmm1
538 ; SSE41-LABEL: max_ge_v4i64:
540 ; SSE41-NEXT: movdqa %xmm0, %xmm4
541 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
542 ; SSE41-NEXT: movdqa %xmm2, %xmm0
543 ; SSE41-NEXT: pxor %xmm5, %xmm0
544 ; SSE41-NEXT: movdqa %xmm4, %xmm6
545 ; SSE41-NEXT: pxor %xmm5, %xmm6
546 ; SSE41-NEXT: movdqa %xmm6, %xmm7
547 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
548 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
549 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
550 ; SSE41-NEXT: pand %xmm7, %xmm0
551 ; SSE41-NEXT: por %xmm6, %xmm0
552 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
553 ; SSE41-NEXT: movdqa %xmm3, %xmm0
554 ; SSE41-NEXT: pxor %xmm5, %xmm0
555 ; SSE41-NEXT: pxor %xmm1, %xmm5
556 ; SSE41-NEXT: movdqa %xmm5, %xmm4
557 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
558 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
559 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
560 ; SSE41-NEXT: pand %xmm4, %xmm0
561 ; SSE41-NEXT: por %xmm5, %xmm0
562 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
563 ; SSE41-NEXT: movapd %xmm2, %xmm0
564 ; SSE41-NEXT: movapd %xmm3, %xmm1
567 ; SSE42-LABEL: max_ge_v4i64:
569 ; SSE42-NEXT: movdqa %xmm0, %xmm4
570 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
571 ; SSE42-NEXT: movdqa %xmm2, %xmm6
572 ; SSE42-NEXT: pxor %xmm5, %xmm6
573 ; SSE42-NEXT: pxor %xmm5, %xmm0
574 ; SSE42-NEXT: pcmpgtq %xmm6, %xmm0
575 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
576 ; SSE42-NEXT: movdqa %xmm3, %xmm0
577 ; SSE42-NEXT: pxor %xmm5, %xmm0
578 ; SSE42-NEXT: pxor %xmm1, %xmm5
579 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm5
580 ; SSE42-NEXT: movdqa %xmm5, %xmm0
581 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
582 ; SSE42-NEXT: movapd %xmm2, %xmm0
583 ; SSE42-NEXT: movapd %xmm3, %xmm1
586 ; AVX1-LABEL: max_ge_v4i64:
588 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
589 ; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
590 ; AVX1-NEXT: # xmm3 = mem[0,0]
591 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
592 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
593 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4
594 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
595 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4
596 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3
597 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
598 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
599 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
602 ; AVX2-LABEL: max_ge_v4i64:
604 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
605 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
606 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
607 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
608 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
611 ; AVX512-LABEL: max_ge_v4i64:
613 ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
614 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
615 ; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
616 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
618 %1 = icmp uge <4 x i64> %a, %b
619 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
623 define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) {
624 ; SSE2-LABEL: max_ge_v4i32:
626 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
627 ; SSE2-NEXT: movdqa %xmm1, %xmm3
628 ; SSE2-NEXT: pxor %xmm2, %xmm3
629 ; SSE2-NEXT: pxor %xmm0, %xmm2
630 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
631 ; SSE2-NEXT: pand %xmm2, %xmm0
632 ; SSE2-NEXT: pandn %xmm1, %xmm2
633 ; SSE2-NEXT: por %xmm2, %xmm0
636 ; SSE41-LABEL: max_ge_v4i32:
638 ; SSE41-NEXT: pmaxud %xmm1, %xmm0
641 ; SSE42-LABEL: max_ge_v4i32:
643 ; SSE42-NEXT: pmaxud %xmm1, %xmm0
646 ; AVX-LABEL: max_ge_v4i32:
648 ; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
650 %1 = icmp uge <4 x i32> %a, %b
651 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
655 define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) {
656 ; SSE2-LABEL: max_ge_v8i32:
658 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
659 ; SSE2-NEXT: movdqa %xmm2, %xmm5
660 ; SSE2-NEXT: pxor %xmm4, %xmm5
661 ; SSE2-NEXT: movdqa %xmm0, %xmm6
662 ; SSE2-NEXT: pxor %xmm4, %xmm6
663 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
664 ; SSE2-NEXT: pand %xmm6, %xmm0
665 ; SSE2-NEXT: pandn %xmm2, %xmm6
666 ; SSE2-NEXT: por %xmm6, %xmm0
667 ; SSE2-NEXT: movdqa %xmm3, %xmm2
668 ; SSE2-NEXT: pxor %xmm4, %xmm2
669 ; SSE2-NEXT: pxor %xmm1, %xmm4
670 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
671 ; SSE2-NEXT: pand %xmm4, %xmm1
672 ; SSE2-NEXT: pandn %xmm3, %xmm4
673 ; SSE2-NEXT: por %xmm4, %xmm1
676 ; SSE41-LABEL: max_ge_v8i32:
678 ; SSE41-NEXT: pmaxud %xmm2, %xmm0
679 ; SSE41-NEXT: pmaxud %xmm3, %xmm1
682 ; SSE42-LABEL: max_ge_v8i32:
684 ; SSE42-NEXT: pmaxud %xmm2, %xmm0
685 ; SSE42-NEXT: pmaxud %xmm3, %xmm1
688 ; AVX1-LABEL: max_ge_v8i32:
690 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
691 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
692 ; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
693 ; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
694 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
697 ; AVX2-LABEL: max_ge_v8i32:
699 ; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
702 ; AVX512-LABEL: max_ge_v8i32:
704 ; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
706 %1 = icmp uge <8 x i32> %a, %b
707 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
711 define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) {
712 ; SSE2-LABEL: max_ge_v8i16:
714 ; SSE2-NEXT: psubusw %xmm0, %xmm1
715 ; SSE2-NEXT: paddw %xmm1, %xmm0
718 ; SSE41-LABEL: max_ge_v8i16:
720 ; SSE41-NEXT: pmaxuw %xmm1, %xmm0
723 ; SSE42-LABEL: max_ge_v8i16:
725 ; SSE42-NEXT: pmaxuw %xmm1, %xmm0
728 ; AVX-LABEL: max_ge_v8i16:
730 ; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
732 %1 = icmp uge <8 x i16> %a, %b
733 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
737 define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) {
738 ; SSE2-LABEL: max_ge_v16i16:
740 ; SSE2-NEXT: psubusw %xmm0, %xmm2
741 ; SSE2-NEXT: paddw %xmm2, %xmm0
742 ; SSE2-NEXT: psubusw %xmm1, %xmm3
743 ; SSE2-NEXT: paddw %xmm3, %xmm1
746 ; SSE41-LABEL: max_ge_v16i16:
748 ; SSE41-NEXT: pmaxuw %xmm2, %xmm0
749 ; SSE41-NEXT: pmaxuw %xmm3, %xmm1
752 ; SSE42-LABEL: max_ge_v16i16:
754 ; SSE42-NEXT: pmaxuw %xmm2, %xmm0
755 ; SSE42-NEXT: pmaxuw %xmm3, %xmm1
758 ; AVX1-LABEL: max_ge_v16i16:
760 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
761 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
762 ; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
763 ; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
764 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
767 ; AVX2-LABEL: max_ge_v16i16:
769 ; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
772 ; AVX512-LABEL: max_ge_v16i16:
774 ; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
776 %1 = icmp uge <16 x i16> %a, %b
777 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
781 define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) {
782 ; SSE-LABEL: max_ge_v16i8:
784 ; SSE-NEXT: pmaxub %xmm1, %xmm0
787 ; AVX-LABEL: max_ge_v16i8:
789 ; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
791 %1 = icmp uge <16 x i8> %a, %b
792 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
796 define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) {
797 ; SSE-LABEL: max_ge_v32i8:
799 ; SSE-NEXT: pmaxub %xmm2, %xmm0
800 ; SSE-NEXT: pmaxub %xmm3, %xmm1
803 ; AVX1-LABEL: max_ge_v32i8:
805 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
806 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
807 ; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
808 ; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
809 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
812 ; AVX2-LABEL: max_ge_v32i8:
814 ; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
817 ; AVX512-LABEL: max_ge_v32i8:
819 ; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
821 %1 = icmp uge <32 x i8> %a, %b
822 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
827 ; Unsigned Minimum (LT)
830 define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
831 ; SSE2-LABEL: min_lt_v2i64:
833 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
834 ; SSE2-NEXT: movdqa %xmm0, %xmm3
835 ; SSE2-NEXT: pxor %xmm2, %xmm3
836 ; SSE2-NEXT: pxor %xmm1, %xmm2
837 ; SSE2-NEXT: movdqa %xmm2, %xmm4
838 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
839 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
840 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
841 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
842 ; SSE2-NEXT: pand %xmm5, %xmm2
843 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
844 ; SSE2-NEXT: por %xmm2, %xmm3
845 ; SSE2-NEXT: pand %xmm3, %xmm0
846 ; SSE2-NEXT: pandn %xmm1, %xmm3
847 ; SSE2-NEXT: por %xmm3, %xmm0
850 ; SSE41-LABEL: min_lt_v2i64:
852 ; SSE41-NEXT: movdqa %xmm0, %xmm2
853 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
854 ; SSE41-NEXT: pxor %xmm3, %xmm0
855 ; SSE41-NEXT: pxor %xmm1, %xmm3
856 ; SSE41-NEXT: movdqa %xmm3, %xmm4
857 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
858 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
859 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
860 ; SSE41-NEXT: pand %xmm4, %xmm0
861 ; SSE41-NEXT: por %xmm3, %xmm0
862 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
863 ; SSE41-NEXT: movapd %xmm1, %xmm0
866 ; SSE42-LABEL: min_lt_v2i64:
868 ; SSE42-NEXT: movdqa %xmm0, %xmm2
869 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
870 ; SSE42-NEXT: movdqa %xmm2, %xmm3
871 ; SSE42-NEXT: pxor %xmm0, %xmm3
872 ; SSE42-NEXT: pxor %xmm1, %xmm0
873 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
874 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
875 ; SSE42-NEXT: movapd %xmm1, %xmm0
878 ; AVX1-LABEL: min_lt_v2i64:
880 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
881 ; AVX1-NEXT: # xmm2 = mem[0,0]
882 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
883 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
884 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
885 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
888 ; AVX2-LABEL: min_lt_v2i64:
890 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
891 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
892 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
893 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
894 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
897 ; AVX512-LABEL: min_lt_v2i64:
899 ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
900 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
901 ; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
902 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
903 ; AVX512-NEXT: vzeroupper
905 %1 = icmp ult <2 x i64> %a, %b
906 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
910 define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) {
911 ; SSE2-LABEL: min_lt_v4i64:
913 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
914 ; SSE2-NEXT: movdqa %xmm0, %xmm5
915 ; SSE2-NEXT: pxor %xmm4, %xmm5
916 ; SSE2-NEXT: movdqa %xmm2, %xmm6
917 ; SSE2-NEXT: pxor %xmm4, %xmm6
918 ; SSE2-NEXT: movdqa %xmm6, %xmm7
919 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
920 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
921 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
922 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
923 ; SSE2-NEXT: pand %xmm8, %xmm5
924 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
925 ; SSE2-NEXT: por %xmm5, %xmm6
926 ; SSE2-NEXT: pand %xmm6, %xmm0
927 ; SSE2-NEXT: pandn %xmm2, %xmm6
928 ; SSE2-NEXT: por %xmm6, %xmm0
929 ; SSE2-NEXT: movdqa %xmm1, %xmm2
930 ; SSE2-NEXT: pxor %xmm4, %xmm2
931 ; SSE2-NEXT: pxor %xmm3, %xmm4
932 ; SSE2-NEXT: movdqa %xmm4, %xmm5
933 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
934 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
935 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm4
936 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
937 ; SSE2-NEXT: pand %xmm6, %xmm2
938 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
939 ; SSE2-NEXT: por %xmm2, %xmm4
940 ; SSE2-NEXT: pand %xmm4, %xmm1
941 ; SSE2-NEXT: pandn %xmm3, %xmm4
942 ; SSE2-NEXT: por %xmm4, %xmm1
945 ; SSE41-LABEL: min_lt_v4i64:
947 ; SSE41-NEXT: movdqa %xmm0, %xmm4
948 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
949 ; SSE41-NEXT: pxor %xmm5, %xmm0
950 ; SSE41-NEXT: movdqa %xmm2, %xmm6
951 ; SSE41-NEXT: pxor %xmm5, %xmm6
952 ; SSE41-NEXT: movdqa %xmm6, %xmm7
953 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
954 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
955 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
956 ; SSE41-NEXT: pand %xmm7, %xmm0
957 ; SSE41-NEXT: por %xmm6, %xmm0
958 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
959 ; SSE41-NEXT: movdqa %xmm1, %xmm0
960 ; SSE41-NEXT: pxor %xmm5, %xmm0
961 ; SSE41-NEXT: pxor %xmm3, %xmm5
962 ; SSE41-NEXT: movdqa %xmm5, %xmm4
963 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
964 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
965 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
966 ; SSE41-NEXT: pand %xmm4, %xmm0
967 ; SSE41-NEXT: por %xmm5, %xmm0
968 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
969 ; SSE41-NEXT: movapd %xmm2, %xmm0
970 ; SSE41-NEXT: movapd %xmm3, %xmm1
973 ; SSE42-LABEL: min_lt_v4i64:
975 ; SSE42-NEXT: movdqa %xmm0, %xmm4
976 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
977 ; SSE42-NEXT: movdqa %xmm0, %xmm6
978 ; SSE42-NEXT: pxor %xmm5, %xmm6
979 ; SSE42-NEXT: movdqa %xmm2, %xmm0
980 ; SSE42-NEXT: pxor %xmm5, %xmm0
981 ; SSE42-NEXT: pcmpgtq %xmm6, %xmm0
982 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
983 ; SSE42-NEXT: movdqa %xmm1, %xmm0
984 ; SSE42-NEXT: pxor %xmm5, %xmm0
985 ; SSE42-NEXT: pxor %xmm3, %xmm5
986 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm5
987 ; SSE42-NEXT: movdqa %xmm5, %xmm0
988 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
989 ; SSE42-NEXT: movapd %xmm2, %xmm0
990 ; SSE42-NEXT: movapd %xmm3, %xmm1
993 ; AVX1-LABEL: min_lt_v4i64:
995 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
996 ; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
997 ; AVX1-NEXT: # xmm3 = mem[0,0]
998 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
999 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1000 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4
1001 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
1002 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
1003 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm3
1004 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
1005 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1006 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1009 ; AVX2-LABEL: min_lt_v4i64:
1011 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1012 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
1013 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
1014 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
1015 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1018 ; AVX512-LABEL: min_lt_v4i64:
1020 ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1021 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1022 ; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
1023 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1025 %1 = icmp ult <4 x i64> %a, %b
1026 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
1030 define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) {
1031 ; SSE2-LABEL: min_lt_v4i32:
1033 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1034 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1035 ; SSE2-NEXT: pxor %xmm2, %xmm3
1036 ; SSE2-NEXT: pxor %xmm1, %xmm2
1037 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
1038 ; SSE2-NEXT: pand %xmm2, %xmm0
1039 ; SSE2-NEXT: pandn %xmm1, %xmm2
1040 ; SSE2-NEXT: por %xmm2, %xmm0
1043 ; SSE41-LABEL: min_lt_v4i32:
1045 ; SSE41-NEXT: pminud %xmm1, %xmm0
1048 ; SSE42-LABEL: min_lt_v4i32:
1050 ; SSE42-NEXT: pminud %xmm1, %xmm0
1053 ; AVX-LABEL: min_lt_v4i32:
1055 ; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
1057 %1 = icmp ult <4 x i32> %a, %b
1058 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
1062 define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) {
1063 ; SSE2-LABEL: min_lt_v8i32:
1065 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1066 ; SSE2-NEXT: movdqa %xmm0, %xmm5
1067 ; SSE2-NEXT: pxor %xmm4, %xmm5
1068 ; SSE2-NEXT: movdqa %xmm2, %xmm6
1069 ; SSE2-NEXT: pxor %xmm4, %xmm6
1070 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
1071 ; SSE2-NEXT: pand %xmm6, %xmm0
1072 ; SSE2-NEXT: pandn %xmm2, %xmm6
1073 ; SSE2-NEXT: por %xmm6, %xmm0
1074 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1075 ; SSE2-NEXT: pxor %xmm4, %xmm2
1076 ; SSE2-NEXT: pxor %xmm3, %xmm4
1077 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
1078 ; SSE2-NEXT: pand %xmm4, %xmm1
1079 ; SSE2-NEXT: pandn %xmm3, %xmm4
1080 ; SSE2-NEXT: por %xmm4, %xmm1
1083 ; SSE41-LABEL: min_lt_v8i32:
1085 ; SSE41-NEXT: pminud %xmm2, %xmm0
1086 ; SSE41-NEXT: pminud %xmm3, %xmm1
1089 ; SSE42-LABEL: min_lt_v8i32:
1091 ; SSE42-NEXT: pminud %xmm2, %xmm0
1092 ; SSE42-NEXT: pminud %xmm3, %xmm1
1095 ; AVX1-LABEL: min_lt_v8i32:
1097 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1098 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1099 ; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1100 ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1101 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1104 ; AVX2-LABEL: min_lt_v8i32:
1106 ; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
1109 ; AVX512-LABEL: min_lt_v8i32:
1111 ; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
1113 %1 = icmp ult <8 x i32> %a, %b
1114 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1118 define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) {
1119 ; SSE2-LABEL: min_lt_v8i16:
1121 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1122 ; SSE2-NEXT: psubusw %xmm1, %xmm2
1123 ; SSE2-NEXT: psubw %xmm2, %xmm0
1126 ; SSE41-LABEL: min_lt_v8i16:
1128 ; SSE41-NEXT: pminuw %xmm1, %xmm0
1131 ; SSE42-LABEL: min_lt_v8i16:
1133 ; SSE42-NEXT: pminuw %xmm1, %xmm0
1136 ; AVX-LABEL: min_lt_v8i16:
1138 ; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1140 %1 = icmp ult <8 x i16> %a, %b
1141 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1145 define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) {
1146 ; SSE2-LABEL: min_lt_v16i16:
1148 ; SSE2-NEXT: movdqa %xmm0, %xmm4
1149 ; SSE2-NEXT: psubusw %xmm2, %xmm4
1150 ; SSE2-NEXT: psubw %xmm4, %xmm0
1151 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1152 ; SSE2-NEXT: psubusw %xmm3, %xmm2
1153 ; SSE2-NEXT: psubw %xmm2, %xmm1
1156 ; SSE41-LABEL: min_lt_v16i16:
1158 ; SSE41-NEXT: pminuw %xmm2, %xmm0
1159 ; SSE41-NEXT: pminuw %xmm3, %xmm1
1162 ; SSE42-LABEL: min_lt_v16i16:
1164 ; SSE42-NEXT: pminuw %xmm2, %xmm0
1165 ; SSE42-NEXT: pminuw %xmm3, %xmm1
1168 ; AVX1-LABEL: min_lt_v16i16:
1170 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1171 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1172 ; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
1173 ; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1174 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1177 ; AVX2-LABEL: min_lt_v16i16:
1179 ; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1182 ; AVX512-LABEL: min_lt_v16i16:
1184 ; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1186 %1 = icmp ult <16 x i16> %a, %b
1187 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1191 define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) {
1192 ; SSE-LABEL: min_lt_v16i8:
1194 ; SSE-NEXT: pminub %xmm1, %xmm0
1197 ; AVX-LABEL: min_lt_v16i8:
1199 ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
1201 %1 = icmp ult <16 x i8> %a, %b
1202 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1206 define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) {
1207 ; SSE-LABEL: min_lt_v32i8:
1209 ; SSE-NEXT: pminub %xmm2, %xmm0
1210 ; SSE-NEXT: pminub %xmm3, %xmm1
1213 ; AVX1-LABEL: min_lt_v32i8:
1215 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1216 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1217 ; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
1218 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1219 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1222 ; AVX2-LABEL: min_lt_v32i8:
1224 ; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
1227 ; AVX512-LABEL: min_lt_v32i8:
1229 ; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
1231 %1 = icmp ult <32 x i8> %a, %b
1232 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1237 ; Unsigned Minimum (LE)
1240 define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
1241 ; SSE2-LABEL: min_le_v2i64:
1243 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
1244 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1245 ; SSE2-NEXT: pxor %xmm2, %xmm3
1246 ; SSE2-NEXT: pxor %xmm1, %xmm2
1247 ; SSE2-NEXT: movdqa %xmm2, %xmm4
1248 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
1249 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1250 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
1251 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1252 ; SSE2-NEXT: pand %xmm5, %xmm2
1253 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1254 ; SSE2-NEXT: por %xmm2, %xmm3
1255 ; SSE2-NEXT: pand %xmm3, %xmm0
1256 ; SSE2-NEXT: pandn %xmm1, %xmm3
1257 ; SSE2-NEXT: por %xmm3, %xmm0
1260 ; SSE41-LABEL: min_le_v2i64:
1262 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1263 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
1264 ; SSE41-NEXT: pxor %xmm3, %xmm0
1265 ; SSE41-NEXT: pxor %xmm1, %xmm3
1266 ; SSE41-NEXT: movdqa %xmm3, %xmm4
1267 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
1268 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm3
1269 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1270 ; SSE41-NEXT: pand %xmm4, %xmm0
1271 ; SSE41-NEXT: por %xmm3, %xmm0
1272 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
1273 ; SSE41-NEXT: movapd %xmm1, %xmm0
1276 ; SSE42-LABEL: min_le_v2i64:
1278 ; SSE42-NEXT: movdqa %xmm0, %xmm2
1279 ; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
1280 ; SSE42-NEXT: movdqa %xmm2, %xmm3
1281 ; SSE42-NEXT: pxor %xmm0, %xmm3
1282 ; SSE42-NEXT: pxor %xmm1, %xmm0
1283 ; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
1284 ; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
1285 ; SSE42-NEXT: movapd %xmm1, %xmm0
1288 ; AVX1-LABEL: min_le_v2i64:
1290 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1291 ; AVX1-NEXT: # xmm2 = mem[0,0]
1292 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
1293 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
1294 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
1295 ; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1298 ; AVX2-LABEL: min_le_v2i64:
1300 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1301 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
1302 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
1303 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
1304 ; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1307 ; AVX512-LABEL: min_le_v2i64:
1309 ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1310 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1311 ; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
1312 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1313 ; AVX512-NEXT: vzeroupper
1315 %1 = icmp ule <2 x i64> %a, %b
1316 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
1320 define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) {
1321 ; SSE2-LABEL: min_le_v4i64:
1323 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
1324 ; SSE2-NEXT: movdqa %xmm0, %xmm5
1325 ; SSE2-NEXT: pxor %xmm4, %xmm5
1326 ; SSE2-NEXT: movdqa %xmm2, %xmm6
1327 ; SSE2-NEXT: pxor %xmm4, %xmm6
1328 ; SSE2-NEXT: movdqa %xmm6, %xmm7
1329 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1330 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1331 ; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
1332 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
1333 ; SSE2-NEXT: pand %xmm8, %xmm5
1334 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
1335 ; SSE2-NEXT: por %xmm5, %xmm6
1336 ; SSE2-NEXT: pand %xmm6, %xmm0
1337 ; SSE2-NEXT: pandn %xmm2, %xmm6
1338 ; SSE2-NEXT: por %xmm6, %xmm0
1339 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1340 ; SSE2-NEXT: pxor %xmm4, %xmm2
1341 ; SSE2-NEXT: pxor %xmm3, %xmm4
1342 ; SSE2-NEXT: movdqa %xmm4, %xmm5
1343 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
1344 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1345 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm4
1346 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1347 ; SSE2-NEXT: pand %xmm6, %xmm2
1348 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1349 ; SSE2-NEXT: por %xmm2, %xmm4
1350 ; SSE2-NEXT: pand %xmm4, %xmm1
1351 ; SSE2-NEXT: pandn %xmm3, %xmm4
1352 ; SSE2-NEXT: por %xmm4, %xmm1
1355 ; SSE41-LABEL: min_le_v4i64:
1357 ; SSE41-NEXT: movdqa %xmm0, %xmm4
1358 ; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
1359 ; SSE41-NEXT: pxor %xmm5, %xmm0
1360 ; SSE41-NEXT: movdqa %xmm2, %xmm6
1361 ; SSE41-NEXT: pxor %xmm5, %xmm6
1362 ; SSE41-NEXT: movdqa %xmm6, %xmm7
1363 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm7
1364 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm6
1365 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
1366 ; SSE41-NEXT: pand %xmm7, %xmm0
1367 ; SSE41-NEXT: por %xmm6, %xmm0
1368 ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1369 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1370 ; SSE41-NEXT: pxor %xmm5, %xmm0
1371 ; SSE41-NEXT: pxor %xmm3, %xmm5
1372 ; SSE41-NEXT: movdqa %xmm5, %xmm4
1373 ; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
1374 ; SSE41-NEXT: pcmpgtd %xmm0, %xmm5
1375 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
1376 ; SSE41-NEXT: pand %xmm4, %xmm0
1377 ; SSE41-NEXT: por %xmm5, %xmm0
1378 ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1379 ; SSE41-NEXT: movapd %xmm2, %xmm0
1380 ; SSE41-NEXT: movapd %xmm3, %xmm1
1383 ; SSE42-LABEL: min_le_v4i64:
1385 ; SSE42-NEXT: movdqa %xmm0, %xmm4
1386 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
1387 ; SSE42-NEXT: movdqa %xmm0, %xmm6
1388 ; SSE42-NEXT: pxor %xmm5, %xmm6
1389 ; SSE42-NEXT: movdqa %xmm2, %xmm0
1390 ; SSE42-NEXT: pxor %xmm5, %xmm0
1391 ; SSE42-NEXT: pcmpgtq %xmm6, %xmm0
1392 ; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1393 ; SSE42-NEXT: movdqa %xmm1, %xmm0
1394 ; SSE42-NEXT: pxor %xmm5, %xmm0
1395 ; SSE42-NEXT: pxor %xmm3, %xmm5
1396 ; SSE42-NEXT: pcmpgtq %xmm0, %xmm5
1397 ; SSE42-NEXT: movdqa %xmm5, %xmm0
1398 ; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1399 ; SSE42-NEXT: movapd %xmm2, %xmm0
1400 ; SSE42-NEXT: movapd %xmm3, %xmm1
1403 ; AVX1-LABEL: min_le_v4i64:
1405 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1406 ; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
1407 ; AVX1-NEXT: # xmm3 = mem[0,0]
1408 ; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
1409 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1410 ; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4
1411 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
1412 ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
1413 ; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm3
1414 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
1415 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1416 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1419 ; AVX2-LABEL: min_le_v4i64:
1421 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1422 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
1423 ; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
1424 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
1425 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1428 ; AVX512-LABEL: min_le_v4i64:
1430 ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1431 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1432 ; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
1433 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1435 %1 = icmp ule <4 x i64> %a, %b
1436 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
1440 define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) {
1441 ; SSE2-LABEL: min_le_v4i32:
1443 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1444 ; SSE2-NEXT: movdqa %xmm0, %xmm3
1445 ; SSE2-NEXT: pxor %xmm2, %xmm3
1446 ; SSE2-NEXT: pxor %xmm1, %xmm2
1447 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
1448 ; SSE2-NEXT: pand %xmm2, %xmm0
1449 ; SSE2-NEXT: pandn %xmm1, %xmm2
1450 ; SSE2-NEXT: por %xmm2, %xmm0
1453 ; SSE41-LABEL: min_le_v4i32:
1455 ; SSE41-NEXT: pminud %xmm1, %xmm0
1458 ; SSE42-LABEL: min_le_v4i32:
1460 ; SSE42-NEXT: pminud %xmm1, %xmm0
1463 ; AVX-LABEL: min_le_v4i32:
1465 ; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
1467 %1 = icmp ule <4 x i32> %a, %b
1468 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
1472 define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) {
1473 ; SSE2-LABEL: min_le_v8i32:
1475 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1476 ; SSE2-NEXT: movdqa %xmm0, %xmm5
1477 ; SSE2-NEXT: pxor %xmm4, %xmm5
1478 ; SSE2-NEXT: movdqa %xmm2, %xmm6
1479 ; SSE2-NEXT: pxor %xmm4, %xmm6
1480 ; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
1481 ; SSE2-NEXT: pand %xmm6, %xmm0
1482 ; SSE2-NEXT: pandn %xmm2, %xmm6
1483 ; SSE2-NEXT: por %xmm6, %xmm0
1484 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1485 ; SSE2-NEXT: pxor %xmm4, %xmm2
1486 ; SSE2-NEXT: pxor %xmm3, %xmm4
1487 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
1488 ; SSE2-NEXT: pand %xmm4, %xmm1
1489 ; SSE2-NEXT: pandn %xmm3, %xmm4
1490 ; SSE2-NEXT: por %xmm4, %xmm1
1493 ; SSE41-LABEL: min_le_v8i32:
1495 ; SSE41-NEXT: pminud %xmm2, %xmm0
1496 ; SSE41-NEXT: pminud %xmm3, %xmm1
1499 ; SSE42-LABEL: min_le_v8i32:
1501 ; SSE42-NEXT: pminud %xmm2, %xmm0
1502 ; SSE42-NEXT: pminud %xmm3, %xmm1
1505 ; AVX1-LABEL: min_le_v8i32:
1507 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1508 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1509 ; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1510 ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1511 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1514 ; AVX2-LABEL: min_le_v8i32:
1516 ; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
1519 ; AVX512-LABEL: min_le_v8i32:
1521 ; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
1523 %1 = icmp ule <8 x i32> %a, %b
1524 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1528 define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) {
1529 ; SSE2-LABEL: min_le_v8i16:
1531 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1532 ; SSE2-NEXT: psubusw %xmm1, %xmm2
1533 ; SSE2-NEXT: psubw %xmm2, %xmm0
1536 ; SSE41-LABEL: min_le_v8i16:
1538 ; SSE41-NEXT: pminuw %xmm1, %xmm0
1541 ; SSE42-LABEL: min_le_v8i16:
1543 ; SSE42-NEXT: pminuw %xmm1, %xmm0
1546 ; AVX-LABEL: min_le_v8i16:
1548 ; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1550 %1 = icmp ule <8 x i16> %a, %b
1551 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1555 define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) {
1556 ; SSE2-LABEL: min_le_v16i16:
1558 ; SSE2-NEXT: movdqa %xmm0, %xmm4
1559 ; SSE2-NEXT: psubusw %xmm2, %xmm4
1560 ; SSE2-NEXT: psubw %xmm4, %xmm0
1561 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1562 ; SSE2-NEXT: psubusw %xmm3, %xmm2
1563 ; SSE2-NEXT: psubw %xmm2, %xmm1
1566 ; SSE41-LABEL: min_le_v16i16:
1568 ; SSE41-NEXT: pminuw %xmm2, %xmm0
1569 ; SSE41-NEXT: pminuw %xmm3, %xmm1
1572 ; SSE42-LABEL: min_le_v16i16:
1574 ; SSE42-NEXT: pminuw %xmm2, %xmm0
1575 ; SSE42-NEXT: pminuw %xmm3, %xmm1
1578 ; AVX1-LABEL: min_le_v16i16:
1580 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1581 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1582 ; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
1583 ; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1584 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1587 ; AVX2-LABEL: min_le_v16i16:
1589 ; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1592 ; AVX512-LABEL: min_le_v16i16:
1594 ; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1596 %1 = icmp ule <16 x i16> %a, %b
1597 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1601 define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) {
1602 ; SSE-LABEL: min_le_v16i8:
1604 ; SSE-NEXT: pminub %xmm1, %xmm0
1607 ; AVX-LABEL: min_le_v16i8:
1609 ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
1611 %1 = icmp ule <16 x i8> %a, %b
1612 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1616 define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) {
1617 ; SSE-LABEL: min_le_v32i8:
1619 ; SSE-NEXT: pminub %xmm2, %xmm0
1620 ; SSE-NEXT: pminub %xmm3, %xmm1
1623 ; AVX1-LABEL: min_le_v32i8:
1625 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1626 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1627 ; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
1628 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1629 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1632 ; AVX2-LABEL: min_le_v32i8:
1634 ; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
1637 ; AVX512-LABEL: min_le_v32i8:
1639 ; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
1641 %1 = icmp ule <32 x i8> %a, %b
1642 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1650 define <2 x i64> @max_gt_v2i64c() {
1651 ; SSE-LABEL: max_gt_v2i64c:
1653 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7]
1656 ; AVX-LABEL: max_gt_v2i64c:
1658 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1660 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1661 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1662 %3 = icmp ugt <2 x i64> %1, %2
1663 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1667 define <4 x i64> @max_gt_v4i64c() {
1668 ; SSE-LABEL: max_gt_v4i64c:
1670 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7]
1671 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0
1674 ; AVX-LABEL: max_gt_v4i64c:
1676 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1678 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1679 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1680 %3 = icmp ugt <4 x i64> %1, %2
1681 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1685 define <4 x i32> @max_gt_v4i32c() {
1686 ; SSE-LABEL: max_gt_v4i32c:
1688 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1691 ; AVX-LABEL: max_gt_v4i32c:
1693 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1695 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1696 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1697 %3 = icmp ugt <4 x i32> %1, %2
1698 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1702 define <8 x i32> @max_gt_v8i32c() {
1703 ; SSE-LABEL: max_gt_v8i32c:
1705 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
1706 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
1709 ; AVX-LABEL: max_gt_v8i32c:
1711 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1713 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1714 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1715 %3 = icmp ugt <8 x i32> %1, %2
1716 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1720 define <8 x i16> @max_gt_v8i16c() {
1721 ; SSE-LABEL: max_gt_v8i16c:
1723 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1726 ; AVX-LABEL: max_gt_v8i16c:
1728 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1730 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1731 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1732 %3 = icmp ugt <8 x i16> %1, %2
1733 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1737 define <16 x i16> @max_gt_v16i16c() {
1738 ; SSE-LABEL: max_gt_v16i16c:
1740 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
1741 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
1744 ; AVX-LABEL: max_gt_v16i16c:
1746 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
1748 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1749 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
1750 %3 = icmp ugt <16 x i16> %1, %2
1751 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1755 define <16 x i8> @max_gt_v16i8c() {
1756 ; SSE-LABEL: max_gt_v16i8c:
1758 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1761 ; AVX-LABEL: max_gt_v16i8c:
1763 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1765 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
1766 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
1767 %3 = icmp ugt <16 x i8> %1, %2
1768 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1772 define <2 x i64> @max_ge_v2i64c() {
1773 ; SSE-LABEL: max_ge_v2i64c:
1775 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7]
1778 ; AVX-LABEL: max_ge_v2i64c:
1780 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1782 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1783 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1784 %3 = icmp uge <2 x i64> %1, %2
1785 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1789 define <4 x i64> @max_ge_v4i64c() {
1790 ; SSE-LABEL: max_ge_v4i64c:
1792 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7]
1793 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0
1796 ; AVX-LABEL: max_ge_v4i64c:
1798 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1800 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1801 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1802 %3 = icmp uge <4 x i64> %1, %2
1803 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1807 define <4 x i32> @max_ge_v4i32c() {
1808 ; SSE-LABEL: max_ge_v4i32c:
1810 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1813 ; AVX-LABEL: max_ge_v4i32c:
1815 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1817 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1818 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1819 %3 = icmp uge <4 x i32> %1, %2
1820 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1824 define <8 x i32> @max_ge_v8i32c() {
1825 ; SSE-LABEL: max_ge_v8i32c:
1827 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
1828 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
1831 ; AVX-LABEL: max_ge_v8i32c:
1833 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1835 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1836 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1837 %3 = icmp uge <8 x i32> %1, %2
1838 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1842 define <8 x i16> @max_ge_v8i16c() {
1843 ; SSE-LABEL: max_ge_v8i16c:
1845 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1848 ; AVX-LABEL: max_ge_v8i16c:
1850 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1852 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1853 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1854 %3 = icmp uge <8 x i16> %1, %2
1855 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1859 define <16 x i16> @max_ge_v16i16c() {
1860 ; SSE-LABEL: max_ge_v16i16c:
1862 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
1863 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
1866 ; AVX-LABEL: max_ge_v16i16c:
1868 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
1870 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1871 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
1872 %3 = icmp uge <16 x i16> %1, %2
1873 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1877 define <16 x i8> @max_ge_v16i8c() {
1878 ; SSE-LABEL: max_ge_v16i8c:
1880 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1883 ; AVX-LABEL: max_ge_v16i8c:
1885 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1887 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
1888 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
1889 %3 = icmp uge <16 x i8> %1, %2
1890 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1894 define <2 x i64> @min_lt_v2i64c() {
1895 ; SSE-LABEL: min_lt_v2i64c:
1897 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1]
1900 ; AVX-LABEL: min_lt_v2i64c:
1902 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
1904 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1905 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1906 %3 = icmp ult <2 x i64> %1, %2
1907 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1911 define <4 x i64> @min_lt_v4i64c() {
1912 ; SSE-LABEL: min_lt_v4i64c:
1914 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
1915 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1]
1918 ; AVX-LABEL: min_lt_v4i64c:
1920 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
1922 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1923 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1924 %3 = icmp ult <4 x i64> %1, %2
1925 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1929 define <4 x i32> @min_lt_v4i32c() {
1930 ; SSE-LABEL: min_lt_v4i32c:
1932 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
1935 ; AVX-LABEL: min_lt_v4i32c:
1937 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
1939 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1940 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1941 %3 = icmp ult <4 x i32> %1, %2
1942 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1946 define <8 x i32> @min_lt_v8i32c() {
1947 ; SSE-LABEL: min_lt_v8i32c:
1949 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
1950 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
1953 ; AVX-LABEL: min_lt_v8i32c:
1955 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
1957 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1958 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1959 %3 = icmp ult <8 x i32> %1, %2
1960 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1964 define <8 x i16> @min_lt_v8i16c() {
1965 ; SSE-LABEL: min_lt_v8i16c:
1967 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
1970 ; AVX-LABEL: min_lt_v8i16c:
1972 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
1974 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1975 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 1, i32 0
1976 %3 = icmp ult <8 x i16> %1, %2
1977 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1981 define <16 x i16> @min_lt_v16i16c() {
1982 ; SSE-LABEL: min_lt_v16i16c:
1984 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,65530,65531,65532,65531,65530,65529,0]
1985 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
1988 ; AVX-LABEL: min_lt_v16i16c:
1990 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
1992 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1993 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 1, i32 0
1994 %3 = icmp ult <16 x i16> %1, %2
1995 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1999 define <16 x i8> @min_lt_v16i8c() {
2000 ; SSE-LABEL: min_lt_v16i8c:
2002 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2005 ; AVX-LABEL: min_lt_v16i8c:
2007 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2009 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
2010 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 1, i32 0
2011 %3 = icmp ult <16 x i8> %1, %2
2012 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2016 define <2 x i64> @min_le_v2i64c() {
2017 ; SSE-LABEL: min_le_v2i64c:
2019 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1]
2022 ; AVX-LABEL: min_le_v2i64c:
2024 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
2026 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
2027 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
2028 %3 = icmp ule <2 x i64> %1, %2
2029 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
2033 define <4 x i64> @min_le_v4i64c() {
2034 ; SSE-LABEL: min_le_v4i64c:
2036 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
2037 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1]
2040 ; AVX-LABEL: min_le_v4i64c:
2042 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
2044 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
2045 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
2046 %3 = icmp ule <4 x i64> %1, %2
2047 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
2051 define <4 x i32> @min_le_v4i32c() {
2052 ; SSE-LABEL: min_le_v4i32c:
2054 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2057 ; AVX-LABEL: min_le_v4i32c:
2059 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2061 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
2062 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
2063 %3 = icmp ule <4 x i32> %1, %2
2064 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
2068 define <8 x i32> @min_le_v8i32c() {
2069 ; SSE-LABEL: min_le_v8i32c:
2071 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
2072 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
2075 ; AVX-LABEL: min_le_v8i32c:
2077 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
2079 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
2080 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
2081 %3 = icmp ule <8 x i32> %1, %2
2082 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2086 define <8 x i16> @min_le_v8i16c() {
2087 ; SSE-LABEL: min_le_v8i16c:
2089 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
2092 ; AVX-LABEL: min_le_v8i16c:
2094 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
2096 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
2097 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
2098 %3 = icmp ule <8 x i16> %1, %2
2099 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2103 define <16 x i16> @min_le_v16i16c() {
2104 ; SSE-LABEL: min_le_v16i16c:
2106 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
2107 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
2110 ; AVX-LABEL: min_le_v16i16c:
2112 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
2114 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
2115 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
2116 %3 = icmp ule <16 x i16> %1, %2
2117 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2121 define <16 x i8> @min_le_v16i8c() {
2122 ; SSE-LABEL: min_le_v16i8c:
2124 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2127 ; AVX-LABEL: min_le_v16i8c:
2129 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2131 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
2132 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
2133 %3 = icmp ule <16 x i8> %1, %2
2134 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2