1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 -disable-peephole | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=AVX
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl -disable-peephole | FileCheck %s --check-prefix=AVX512
8 ; Only equal/not-equal/ordered/unordered can be safely commuted
11 define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) {
12 ; SSE-LABEL: commute_cmpps_eq:
14 ; SSE-NEXT: cmpeqps (%rdi), %xmm0
17 ; AVX-LABEL: commute_cmpps_eq:
19 ; AVX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
22 ; AVX512-LABEL: commute_cmpps_eq:
24 ; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %k1
25 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
26 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
28 %1 = load <4 x float>, <4 x float>* %a0
29 %2 = fcmp oeq <4 x float> %1, %a1
30 %3 = sext <4 x i1> %2 to <4 x i32>
34 define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) {
35 ; SSE-LABEL: commute_cmpps_ne:
37 ; SSE-NEXT: cmpneqps (%rdi), %xmm0
40 ; AVX-LABEL: commute_cmpps_ne:
42 ; AVX-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0
45 ; AVX512-LABEL: commute_cmpps_ne:
47 ; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %k1
48 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
49 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
51 %1 = load <4 x float>, <4 x float>* %a0
52 %2 = fcmp une <4 x float> %1, %a1
53 %3 = sext <4 x i1> %2 to <4 x i32>
57 define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) {
58 ; SSE-LABEL: commute_cmpps_ord:
60 ; SSE-NEXT: cmpordps (%rdi), %xmm0
63 ; AVX-LABEL: commute_cmpps_ord:
65 ; AVX-NEXT: vcmpordps (%rdi), %xmm0, %xmm0
68 ; AVX512-LABEL: commute_cmpps_ord:
70 ; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %k1
71 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
72 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
74 %1 = load <4 x float>, <4 x float>* %a0
75 %2 = fcmp ord <4 x float> %1, %a1
76 %3 = sext <4 x i1> %2 to <4 x i32>
80 define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) {
81 ; SSE-LABEL: commute_cmpps_uno:
83 ; SSE-NEXT: cmpunordps (%rdi), %xmm0
86 ; AVX-LABEL: commute_cmpps_uno:
88 ; AVX-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0
91 ; AVX512-LABEL: commute_cmpps_uno:
93 ; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %k1
94 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
95 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
97 %1 = load <4 x float>, <4 x float>* %a0
98 %2 = fcmp uno <4 x float> %1, %a1
99 %3 = sext <4 x i1> %2 to <4 x i32>
103 define <4 x i32> @commute_cmpps_ueq(<4 x float>* %a0, <4 x float> %a1) {
104 ; SSE-LABEL: commute_cmpps_ueq:
106 ; SSE-NEXT: movaps (%rdi), %xmm1
107 ; SSE-NEXT: movaps %xmm1, %xmm2
108 ; SSE-NEXT: cmpeqps %xmm0, %xmm2
109 ; SSE-NEXT: cmpunordps %xmm1, %xmm0
110 ; SSE-NEXT: orps %xmm2, %xmm0
113 ; AVX-LABEL: commute_cmpps_ueq:
115 ; AVX-NEXT: vmovaps (%rdi), %xmm1
116 ; AVX-NEXT: vcmpeq_uqps %xmm0, %xmm1, %xmm0
119 ; AVX512-LABEL: commute_cmpps_ueq:
121 ; AVX512-NEXT: vmovaps (%rdi), %xmm1
122 ; AVX512-NEXT: vcmpeq_uqps %xmm0, %xmm1, %k1
123 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
124 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
126 %1 = load <4 x float>, <4 x float>* %a0
127 %2 = fcmp ueq <4 x float> %1, %a1
128 %3 = sext <4 x i1> %2 to <4 x i32>
132 define <4 x i32> @commute_cmpps_one(<4 x float>* %a0, <4 x float> %a1) {
133 ; SSE-LABEL: commute_cmpps_one:
135 ; SSE-NEXT: movaps (%rdi), %xmm1
136 ; SSE-NEXT: movaps %xmm1, %xmm2
137 ; SSE-NEXT: cmpneqps %xmm0, %xmm2
138 ; SSE-NEXT: cmpordps %xmm1, %xmm0
139 ; SSE-NEXT: andps %xmm2, %xmm0
142 ; AVX-LABEL: commute_cmpps_one:
144 ; AVX-NEXT: vmovaps (%rdi), %xmm1
145 ; AVX-NEXT: vcmpneq_oqps %xmm0, %xmm1, %xmm0
148 ; AVX512-LABEL: commute_cmpps_one:
150 ; AVX512-NEXT: vmovaps (%rdi), %xmm1
151 ; AVX512-NEXT: vcmpneq_oqps %xmm0, %xmm1, %k1
152 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
153 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
155 %1 = load <4 x float>, <4 x float>* %a0
156 %2 = fcmp one <4 x float> %1, %a1
157 %3 = sext <4 x i1> %2 to <4 x i32>
161 define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) {
162 ; SSE-LABEL: commute_cmpps_lt:
164 ; SSE-NEXT: movaps (%rdi), %xmm1
165 ; SSE-NEXT: cmpltps %xmm0, %xmm1
166 ; SSE-NEXT: movaps %xmm1, %xmm0
169 ; AVX-LABEL: commute_cmpps_lt:
171 ; AVX-NEXT: vmovaps (%rdi), %xmm1
172 ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
175 ; AVX512-LABEL: commute_cmpps_lt:
177 ; AVX512-NEXT: vmovaps (%rdi), %xmm1
178 ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1
179 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
180 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
182 %1 = load <4 x float>, <4 x float>* %a0
183 %2 = fcmp olt <4 x float> %1, %a1
184 %3 = sext <4 x i1> %2 to <4 x i32>
188 define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) {
189 ; SSE-LABEL: commute_cmpps_le:
191 ; SSE-NEXT: movaps (%rdi), %xmm1
192 ; SSE-NEXT: cmpleps %xmm0, %xmm1
193 ; SSE-NEXT: movaps %xmm1, %xmm0
196 ; AVX-LABEL: commute_cmpps_le:
198 ; AVX-NEXT: vmovaps (%rdi), %xmm1
199 ; AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
202 ; AVX512-LABEL: commute_cmpps_le:
204 ; AVX512-NEXT: vmovaps (%rdi), %xmm1
205 ; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k1
206 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
207 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
209 %1 = load <4 x float>, <4 x float>* %a0
210 %2 = fcmp ole <4 x float> %1, %a1
211 %3 = sext <4 x i1> %2 to <4 x i32>
215 define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) {
216 ; SSE-LABEL: commute_cmpps_eq_ymm:
218 ; SSE-NEXT: cmpeqps (%rdi), %xmm0
219 ; SSE-NEXT: cmpeqps 16(%rdi), %xmm1
222 ; AVX-LABEL: commute_cmpps_eq_ymm:
224 ; AVX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0
227 ; AVX512-LABEL: commute_cmpps_eq_ymm:
229 ; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %k1
230 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
231 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
233 %1 = load <8 x float>, <8 x float>* %a0
234 %2 = fcmp oeq <8 x float> %1, %a1
235 %3 = sext <8 x i1> %2 to <8 x i32>
239 define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) {
240 ; SSE-LABEL: commute_cmpps_ne_ymm:
242 ; SSE-NEXT: cmpneqps (%rdi), %xmm0
243 ; SSE-NEXT: cmpneqps 16(%rdi), %xmm1
246 ; AVX-LABEL: commute_cmpps_ne_ymm:
248 ; AVX-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0
251 ; AVX512-LABEL: commute_cmpps_ne_ymm:
253 ; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %k1
254 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
255 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
257 %1 = load <8 x float>, <8 x float>* %a0
258 %2 = fcmp une <8 x float> %1, %a1
259 %3 = sext <8 x i1> %2 to <8 x i32>
263 define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) {
264 ; SSE-LABEL: commute_cmpps_ord_ymm:
266 ; SSE-NEXT: cmpordps (%rdi), %xmm0
267 ; SSE-NEXT: cmpordps 16(%rdi), %xmm1
270 ; AVX-LABEL: commute_cmpps_ord_ymm:
272 ; AVX-NEXT: vcmpordps (%rdi), %ymm0, %ymm0
275 ; AVX512-LABEL: commute_cmpps_ord_ymm:
277 ; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %k1
278 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
279 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
281 %1 = load <8 x float>, <8 x float>* %a0
282 %2 = fcmp ord <8 x float> %1, %a1
283 %3 = sext <8 x i1> %2 to <8 x i32>
287 define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) {
288 ; SSE-LABEL: commute_cmpps_uno_ymm:
290 ; SSE-NEXT: cmpunordps (%rdi), %xmm0
291 ; SSE-NEXT: cmpunordps 16(%rdi), %xmm1
294 ; AVX-LABEL: commute_cmpps_uno_ymm:
296 ; AVX-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0
299 ; AVX512-LABEL: commute_cmpps_uno_ymm:
301 ; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %k1
302 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
303 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
305 %1 = load <8 x float>, <8 x float>* %a0
306 %2 = fcmp uno <8 x float> %1, %a1
307 %3 = sext <8 x i1> %2 to <8 x i32>
311 define <8 x i32> @commute_cmpps_ueq_ymm(<8 x float>* %a0, <8 x float> %a1) {
312 ; SSE-LABEL: commute_cmpps_ueq_ymm:
314 ; SSE-NEXT: movaps (%rdi), %xmm2
315 ; SSE-NEXT: movaps 16(%rdi), %xmm3
316 ; SSE-NEXT: movaps %xmm2, %xmm4
317 ; SSE-NEXT: cmpeqps %xmm0, %xmm4
318 ; SSE-NEXT: cmpunordps %xmm2, %xmm0
319 ; SSE-NEXT: orps %xmm4, %xmm0
320 ; SSE-NEXT: movaps %xmm3, %xmm2
321 ; SSE-NEXT: cmpeqps %xmm1, %xmm2
322 ; SSE-NEXT: cmpunordps %xmm3, %xmm1
323 ; SSE-NEXT: orps %xmm2, %xmm1
326 ; AVX-LABEL: commute_cmpps_ueq_ymm:
328 ; AVX-NEXT: vmovaps (%rdi), %ymm1
329 ; AVX-NEXT: vcmpeq_uqps %ymm0, %ymm1, %ymm0
332 ; AVX512-LABEL: commute_cmpps_ueq_ymm:
334 ; AVX512-NEXT: vmovaps (%rdi), %ymm1
335 ; AVX512-NEXT: vcmpeq_uqps %ymm0, %ymm1, %k1
336 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
337 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
339 %1 = load <8 x float>, <8 x float>* %a0
340 %2 = fcmp ueq <8 x float> %1, %a1
341 %3 = sext <8 x i1> %2 to <8 x i32>
345 define <8 x i32> @commute_cmpps_one_ymm(<8 x float>* %a0, <8 x float> %a1) {
346 ; SSE-LABEL: commute_cmpps_one_ymm:
348 ; SSE-NEXT: movaps (%rdi), %xmm2
349 ; SSE-NEXT: movaps 16(%rdi), %xmm3
350 ; SSE-NEXT: movaps %xmm2, %xmm4
351 ; SSE-NEXT: cmpneqps %xmm0, %xmm4
352 ; SSE-NEXT: cmpordps %xmm2, %xmm0
353 ; SSE-NEXT: andps %xmm4, %xmm0
354 ; SSE-NEXT: movaps %xmm3, %xmm2
355 ; SSE-NEXT: cmpneqps %xmm1, %xmm2
356 ; SSE-NEXT: cmpordps %xmm3, %xmm1
357 ; SSE-NEXT: andps %xmm2, %xmm1
360 ; AVX-LABEL: commute_cmpps_one_ymm:
362 ; AVX-NEXT: vmovaps (%rdi), %ymm1
363 ; AVX-NEXT: vcmpneq_oqps %ymm0, %ymm1, %ymm0
366 ; AVX512-LABEL: commute_cmpps_one_ymm:
368 ; AVX512-NEXT: vmovaps (%rdi), %ymm1
369 ; AVX512-NEXT: vcmpneq_oqps %ymm0, %ymm1, %k1
370 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
371 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
373 %1 = load <8 x float>, <8 x float>* %a0
374 %2 = fcmp one <8 x float> %1, %a1
375 %3 = sext <8 x i1> %2 to <8 x i32>
379 define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) {
380 ; SSE-LABEL: commute_cmpps_lt_ymm:
382 ; SSE-NEXT: movaps (%rdi), %xmm2
383 ; SSE-NEXT: movaps 16(%rdi), %xmm3
384 ; SSE-NEXT: cmpltps %xmm0, %xmm2
385 ; SSE-NEXT: cmpltps %xmm1, %xmm3
386 ; SSE-NEXT: movaps %xmm2, %xmm0
387 ; SSE-NEXT: movaps %xmm3, %xmm1
390 ; AVX-LABEL: commute_cmpps_lt_ymm:
392 ; AVX-NEXT: vmovaps (%rdi), %ymm1
393 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
396 ; AVX512-LABEL: commute_cmpps_lt_ymm:
398 ; AVX512-NEXT: vmovaps (%rdi), %ymm1
399 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1
400 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
401 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
403 %1 = load <8 x float>, <8 x float>* %a0
404 %2 = fcmp olt <8 x float> %1, %a1
405 %3 = sext <8 x i1> %2 to <8 x i32>
409 define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) {
410 ; SSE-LABEL: commute_cmpps_le_ymm:
412 ; SSE-NEXT: movaps (%rdi), %xmm2
413 ; SSE-NEXT: movaps 16(%rdi), %xmm3
414 ; SSE-NEXT: cmpleps %xmm0, %xmm2
415 ; SSE-NEXT: cmpleps %xmm1, %xmm3
416 ; SSE-NEXT: movaps %xmm2, %xmm0
417 ; SSE-NEXT: movaps %xmm3, %xmm1
420 ; AVX-LABEL: commute_cmpps_le_ymm:
422 ; AVX-NEXT: vmovaps (%rdi), %ymm1
423 ; AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0
426 ; AVX512-LABEL: commute_cmpps_le_ymm:
428 ; AVX512-NEXT: vmovaps (%rdi), %ymm1
429 ; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %k1
430 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
431 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
433 %1 = load <8 x float>, <8 x float>* %a0
434 %2 = fcmp ole <8 x float> %1, %a1
435 %3 = sext <8 x i1> %2 to <8 x i32>
441 ; Only equal/not-equal/ordered/unordered can be safely commuted
444 define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) {
445 ; SSE-LABEL: commute_cmppd_eq:
447 ; SSE-NEXT: cmpeqpd (%rdi), %xmm0
450 ; AVX-LABEL: commute_cmppd_eq:
452 ; AVX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
455 ; AVX512-LABEL: commute_cmppd_eq:
457 ; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %k1
458 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
459 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
461 %1 = load <2 x double>, <2 x double>* %a0
462 %2 = fcmp oeq <2 x double> %1, %a1
463 %3 = sext <2 x i1> %2 to <2 x i64>
467 define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) {
468 ; SSE-LABEL: commute_cmppd_ne:
470 ; SSE-NEXT: cmpneqpd (%rdi), %xmm0
473 ; AVX-LABEL: commute_cmppd_ne:
475 ; AVX-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0
478 ; AVX512-LABEL: commute_cmppd_ne:
480 ; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %k1
481 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
482 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
484 %1 = load <2 x double>, <2 x double>* %a0
485 %2 = fcmp une <2 x double> %1, %a1
486 %3 = sext <2 x i1> %2 to <2 x i64>
490 define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) {
491 ; SSE-LABEL: commute_cmppd_ord:
493 ; SSE-NEXT: cmpordpd (%rdi), %xmm0
496 ; AVX-LABEL: commute_cmppd_ord:
498 ; AVX-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0
501 ; AVX512-LABEL: commute_cmppd_ord:
503 ; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %k1
504 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
505 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
507 %1 = load <2 x double>, <2 x double>* %a0
508 %2 = fcmp ord <2 x double> %1, %a1
509 %3 = sext <2 x i1> %2 to <2 x i64>
513 define <2 x i64> @commute_cmppd_ueq(<2 x double>* %a0, <2 x double> %a1) {
514 ; SSE-LABEL: commute_cmppd_ueq:
516 ; SSE-NEXT: movapd (%rdi), %xmm1
517 ; SSE-NEXT: movapd %xmm1, %xmm2
518 ; SSE-NEXT: cmpeqpd %xmm0, %xmm2
519 ; SSE-NEXT: cmpunordpd %xmm1, %xmm0
520 ; SSE-NEXT: orpd %xmm2, %xmm0
523 ; AVX-LABEL: commute_cmppd_ueq:
525 ; AVX-NEXT: vmovapd (%rdi), %xmm1
526 ; AVX-NEXT: vcmpeq_uqpd %xmm0, %xmm1, %xmm0
529 ; AVX512-LABEL: commute_cmppd_ueq:
531 ; AVX512-NEXT: vmovapd (%rdi), %xmm1
532 ; AVX512-NEXT: vcmpeq_uqpd %xmm0, %xmm1, %k1
533 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
534 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
536 %1 = load <2 x double>, <2 x double>* %a0
537 %2 = fcmp ueq <2 x double> %1, %a1
538 %3 = sext <2 x i1> %2 to <2 x i64>
542 define <2 x i64> @commute_cmppd_one(<2 x double>* %a0, <2 x double> %a1) {
543 ; SSE-LABEL: commute_cmppd_one:
545 ; SSE-NEXT: movapd (%rdi), %xmm1
546 ; SSE-NEXT: movapd %xmm1, %xmm2
547 ; SSE-NEXT: cmpneqpd %xmm0, %xmm2
548 ; SSE-NEXT: cmpordpd %xmm1, %xmm0
549 ; SSE-NEXT: andpd %xmm2, %xmm0
552 ; AVX-LABEL: commute_cmppd_one:
554 ; AVX-NEXT: vmovapd (%rdi), %xmm1
555 ; AVX-NEXT: vcmpneq_oqpd %xmm0, %xmm1, %xmm0
558 ; AVX512-LABEL: commute_cmppd_one:
560 ; AVX512-NEXT: vmovapd (%rdi), %xmm1
561 ; AVX512-NEXT: vcmpneq_oqpd %xmm0, %xmm1, %k1
562 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
563 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
565 %1 = load <2 x double>, <2 x double>* %a0
566 %2 = fcmp one <2 x double> %1, %a1
567 %3 = sext <2 x i1> %2 to <2 x i64>
571 define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) {
572 ; SSE-LABEL: commute_cmppd_uno:
574 ; SSE-NEXT: cmpunordpd (%rdi), %xmm0
577 ; AVX-LABEL: commute_cmppd_uno:
579 ; AVX-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0
582 ; AVX512-LABEL: commute_cmppd_uno:
584 ; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %k1
585 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
586 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
588 %1 = load <2 x double>, <2 x double>* %a0
589 %2 = fcmp uno <2 x double> %1, %a1
590 %3 = sext <2 x i1> %2 to <2 x i64>
594 define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) {
595 ; SSE-LABEL: commute_cmppd_lt:
597 ; SSE-NEXT: movapd (%rdi), %xmm1
598 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
599 ; SSE-NEXT: movapd %xmm1, %xmm0
602 ; AVX-LABEL: commute_cmppd_lt:
604 ; AVX-NEXT: vmovapd (%rdi), %xmm1
605 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
608 ; AVX512-LABEL: commute_cmppd_lt:
610 ; AVX512-NEXT: vmovapd (%rdi), %xmm1
611 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1
612 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
613 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
615 %1 = load <2 x double>, <2 x double>* %a0
616 %2 = fcmp olt <2 x double> %1, %a1
617 %3 = sext <2 x i1> %2 to <2 x i64>
621 define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) {
622 ; SSE-LABEL: commute_cmppd_le:
624 ; SSE-NEXT: movapd (%rdi), %xmm1
625 ; SSE-NEXT: cmplepd %xmm0, %xmm1
626 ; SSE-NEXT: movapd %xmm1, %xmm0
629 ; AVX-LABEL: commute_cmppd_le:
631 ; AVX-NEXT: vmovapd (%rdi), %xmm1
632 ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
635 ; AVX512-LABEL: commute_cmppd_le:
637 ; AVX512-NEXT: vmovapd (%rdi), %xmm1
638 ; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k1
639 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
640 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
642 %1 = load <2 x double>, <2 x double>* %a0
643 %2 = fcmp ole <2 x double> %1, %a1
644 %3 = sext <2 x i1> %2 to <2 x i64>
648 define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
649 ; SSE-LABEL: commute_cmppd_eq_ymmm:
651 ; SSE-NEXT: cmpeqpd (%rdi), %xmm0
652 ; SSE-NEXT: cmpeqpd 16(%rdi), %xmm1
655 ; AVX-LABEL: commute_cmppd_eq_ymmm:
657 ; AVX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
660 ; AVX512-LABEL: commute_cmppd_eq_ymmm:
662 ; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %k1
663 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
664 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
666 %1 = load <4 x double>, <4 x double>* %a0
667 %2 = fcmp oeq <4 x double> %1, %a1
668 %3 = sext <4 x i1> %2 to <4 x i64>
672 define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) {
673 ; SSE-LABEL: commute_cmppd_ne_ymmm:
675 ; SSE-NEXT: cmpneqpd (%rdi), %xmm0
676 ; SSE-NEXT: cmpneqpd 16(%rdi), %xmm1
679 ; AVX-LABEL: commute_cmppd_ne_ymmm:
681 ; AVX-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0
684 ; AVX512-LABEL: commute_cmppd_ne_ymmm:
686 ; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %k1
687 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
688 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
690 %1 = load <4 x double>, <4 x double>* %a0
691 %2 = fcmp une <4 x double> %1, %a1
692 %3 = sext <4 x i1> %2 to <4 x i64>
696 define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) {
697 ; SSE-LABEL: commute_cmppd_ord_ymmm:
699 ; SSE-NEXT: cmpordpd (%rdi), %xmm0
700 ; SSE-NEXT: cmpordpd 16(%rdi), %xmm1
703 ; AVX-LABEL: commute_cmppd_ord_ymmm:
705 ; AVX-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0
708 ; AVX512-LABEL: commute_cmppd_ord_ymmm:
710 ; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %k1
711 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
712 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
714 %1 = load <4 x double>, <4 x double>* %a0
715 %2 = fcmp ord <4 x double> %1, %a1
716 %3 = sext <4 x i1> %2 to <4 x i64>
720 define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) {
721 ; SSE-LABEL: commute_cmppd_uno_ymmm:
723 ; SSE-NEXT: cmpunordpd (%rdi), %xmm0
724 ; SSE-NEXT: cmpunordpd 16(%rdi), %xmm1
727 ; AVX-LABEL: commute_cmppd_uno_ymmm:
729 ; AVX-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0
732 ; AVX512-LABEL: commute_cmppd_uno_ymmm:
734 ; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %k1
735 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
736 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
738 %1 = load <4 x double>, <4 x double>* %a0
739 %2 = fcmp uno <4 x double> %1, %a1
740 %3 = sext <4 x i1> %2 to <4 x i64>
744 define <4 x i64> @commute_cmppd_ueq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
745 ; SSE-LABEL: commute_cmppd_ueq_ymmm:
747 ; SSE-NEXT: movapd (%rdi), %xmm2
748 ; SSE-NEXT: movapd 16(%rdi), %xmm3
749 ; SSE-NEXT: movapd %xmm2, %xmm4
750 ; SSE-NEXT: cmpeqpd %xmm0, %xmm4
751 ; SSE-NEXT: cmpunordpd %xmm2, %xmm0
752 ; SSE-NEXT: orpd %xmm4, %xmm0
753 ; SSE-NEXT: movapd %xmm3, %xmm2
754 ; SSE-NEXT: cmpeqpd %xmm1, %xmm2
755 ; SSE-NEXT: cmpunordpd %xmm3, %xmm1
756 ; SSE-NEXT: orpd %xmm2, %xmm1
759 ; AVX-LABEL: commute_cmppd_ueq_ymmm:
761 ; AVX-NEXT: vmovapd (%rdi), %ymm1
762 ; AVX-NEXT: vcmpeq_uqpd %ymm0, %ymm1, %ymm0
765 ; AVX512-LABEL: commute_cmppd_ueq_ymmm:
767 ; AVX512-NEXT: vmovapd (%rdi), %ymm1
768 ; AVX512-NEXT: vcmpeq_uqpd %ymm0, %ymm1, %k1
769 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
770 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
772 %1 = load <4 x double>, <4 x double>* %a0
773 %2 = fcmp ueq <4 x double> %1, %a1
774 %3 = sext <4 x i1> %2 to <4 x i64>
778 define <4 x i64> @commute_cmppd_one_ymmm(<4 x double>* %a0, <4 x double> %a1) {
779 ; SSE-LABEL: commute_cmppd_one_ymmm:
781 ; SSE-NEXT: movapd (%rdi), %xmm2
782 ; SSE-NEXT: movapd 16(%rdi), %xmm3
783 ; SSE-NEXT: movapd %xmm2, %xmm4
784 ; SSE-NEXT: cmpneqpd %xmm0, %xmm4
785 ; SSE-NEXT: cmpordpd %xmm2, %xmm0
786 ; SSE-NEXT: andpd %xmm4, %xmm0
787 ; SSE-NEXT: movapd %xmm3, %xmm2
788 ; SSE-NEXT: cmpneqpd %xmm1, %xmm2
789 ; SSE-NEXT: cmpordpd %xmm3, %xmm1
790 ; SSE-NEXT: andpd %xmm2, %xmm1
793 ; AVX-LABEL: commute_cmppd_one_ymmm:
795 ; AVX-NEXT: vmovapd (%rdi), %ymm1
796 ; AVX-NEXT: vcmpneq_oqpd %ymm0, %ymm1, %ymm0
799 ; AVX512-LABEL: commute_cmppd_one_ymmm:
801 ; AVX512-NEXT: vmovapd (%rdi), %ymm1
802 ; AVX512-NEXT: vcmpneq_oqpd %ymm0, %ymm1, %k1
803 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
804 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
806 %1 = load <4 x double>, <4 x double>* %a0
807 %2 = fcmp one <4 x double> %1, %a1
808 %3 = sext <4 x i1> %2 to <4 x i64>
812 define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) {
813 ; SSE-LABEL: commute_cmppd_lt_ymmm:
815 ; SSE-NEXT: movapd (%rdi), %xmm2
816 ; SSE-NEXT: movapd 16(%rdi), %xmm3
817 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
818 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
819 ; SSE-NEXT: movapd %xmm2, %xmm0
820 ; SSE-NEXT: movapd %xmm3, %xmm1
823 ; AVX-LABEL: commute_cmppd_lt_ymmm:
825 ; AVX-NEXT: vmovapd (%rdi), %ymm1
826 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
829 ; AVX512-LABEL: commute_cmppd_lt_ymmm:
831 ; AVX512-NEXT: vmovapd (%rdi), %ymm1
832 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
833 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
834 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
836 %1 = load <4 x double>, <4 x double>* %a0
837 %2 = fcmp olt <4 x double> %1, %a1
838 %3 = sext <4 x i1> %2 to <4 x i64>
842 define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) {
843 ; SSE-LABEL: commute_cmppd_le_ymmm:
845 ; SSE-NEXT: movapd (%rdi), %xmm2
846 ; SSE-NEXT: movapd 16(%rdi), %xmm3
847 ; SSE-NEXT: cmplepd %xmm0, %xmm2
848 ; SSE-NEXT: cmplepd %xmm1, %xmm3
849 ; SSE-NEXT: movapd %xmm2, %xmm0
850 ; SSE-NEXT: movapd %xmm3, %xmm1
853 ; AVX-LABEL: commute_cmppd_le_ymmm:
855 ; AVX-NEXT: vmovapd (%rdi), %ymm1
856 ; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
859 ; AVX512-LABEL: commute_cmppd_le_ymmm:
861 ; AVX512-NEXT: vmovapd (%rdi), %ymm1
862 ; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k1
863 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
864 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
866 %1 = load <4 x double>, <4 x double>* %a0
867 %2 = fcmp ole <4 x double> %1, %a1
868 %3 = sext <4 x i1> %2 to <4 x i64>