1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 -disable-peephole | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=AVX
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl -disable-peephole | FileCheck %s --check-prefix=AVX512
8 ; Only equal/not-equal/ordered/unordered can be safely commuted
11 define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) {
12 ; SSE-LABEL: commute_cmpps_eq:
14 ; SSE-NEXT: cmpeqps (%rdi), %xmm0
17 ; AVX-LABEL: commute_cmpps_eq:
19 ; AVX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
22 ; AVX512-LABEL: commute_cmpps_eq:
24 ; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
26 %1 = load <4 x float>, <4 x float>* %a0
27 %2 = fcmp oeq <4 x float> %1, %a1
28 %3 = sext <4 x i1> %2 to <4 x i32>
32 define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) {
33 ; SSE-LABEL: commute_cmpps_ne:
35 ; SSE-NEXT: cmpneqps (%rdi), %xmm0
38 ; AVX-LABEL: commute_cmpps_ne:
40 ; AVX-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0
43 ; AVX512-LABEL: commute_cmpps_ne:
45 ; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0
47 %1 = load <4 x float>, <4 x float>* %a0
48 %2 = fcmp une <4 x float> %1, %a1
49 %3 = sext <4 x i1> %2 to <4 x i32>
53 define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) {
54 ; SSE-LABEL: commute_cmpps_ord:
56 ; SSE-NEXT: cmpordps (%rdi), %xmm0
59 ; AVX-LABEL: commute_cmpps_ord:
61 ; AVX-NEXT: vcmpordps (%rdi), %xmm0, %xmm0
64 ; AVX512-LABEL: commute_cmpps_ord:
66 ; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %xmm0
68 %1 = load <4 x float>, <4 x float>* %a0
69 %2 = fcmp ord <4 x float> %1, %a1
70 %3 = sext <4 x i1> %2 to <4 x i32>
74 define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) {
75 ; SSE-LABEL: commute_cmpps_uno:
77 ; SSE-NEXT: cmpunordps (%rdi), %xmm0
80 ; AVX-LABEL: commute_cmpps_uno:
82 ; AVX-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0
85 ; AVX512-LABEL: commute_cmpps_uno:
87 ; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0
89 %1 = load <4 x float>, <4 x float>* %a0
90 %2 = fcmp uno <4 x float> %1, %a1
91 %3 = sext <4 x i1> %2 to <4 x i32>
95 define <4 x i32> @commute_cmpps_ueq(<4 x float>* %a0, <4 x float> %a1) {
96 ; SSE-LABEL: commute_cmpps_ueq:
98 ; SSE-NEXT: movaps (%rdi), %xmm1
99 ; SSE-NEXT: movaps %xmm1, %xmm2
100 ; SSE-NEXT: cmpeqps %xmm0, %xmm2
101 ; SSE-NEXT: cmpunordps %xmm1, %xmm0
102 ; SSE-NEXT: orps %xmm2, %xmm0
105 ; AVX-LABEL: commute_cmpps_ueq:
107 ; AVX-NEXT: vcmpeq_uqps (%rdi), %xmm0, %xmm0
110 ; AVX512-LABEL: commute_cmpps_ueq:
112 ; AVX512-NEXT: vcmpeq_uqps (%rdi), %xmm0, %xmm0
114 %1 = load <4 x float>, <4 x float>* %a0
115 %2 = fcmp ueq <4 x float> %1, %a1
116 %3 = sext <4 x i1> %2 to <4 x i32>
120 define <4 x i32> @commute_cmpps_one(<4 x float>* %a0, <4 x float> %a1) {
121 ; SSE-LABEL: commute_cmpps_one:
123 ; SSE-NEXT: movaps (%rdi), %xmm1
124 ; SSE-NEXT: movaps %xmm1, %xmm2
125 ; SSE-NEXT: cmpneqps %xmm0, %xmm2
126 ; SSE-NEXT: cmpordps %xmm1, %xmm0
127 ; SSE-NEXT: andps %xmm2, %xmm0
130 ; AVX-LABEL: commute_cmpps_one:
132 ; AVX-NEXT: vcmpneq_oqps (%rdi), %xmm0, %xmm0
135 ; AVX512-LABEL: commute_cmpps_one:
137 ; AVX512-NEXT: vcmpneq_oqps (%rdi), %xmm0, %xmm0
139 %1 = load <4 x float>, <4 x float>* %a0
140 %2 = fcmp one <4 x float> %1, %a1
141 %3 = sext <4 x i1> %2 to <4 x i32>
145 define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) {
146 ; SSE-LABEL: commute_cmpps_lt:
148 ; SSE-NEXT: movaps (%rdi), %xmm1
149 ; SSE-NEXT: cmpltps %xmm0, %xmm1
150 ; SSE-NEXT: movaps %xmm1, %xmm0
153 ; AVX-LABEL: commute_cmpps_lt:
155 ; AVX-NEXT: vmovaps (%rdi), %xmm1
156 ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
159 ; AVX512-LABEL: commute_cmpps_lt:
161 ; AVX512-NEXT: vmovaps (%rdi), %xmm1
162 ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
164 %1 = load <4 x float>, <4 x float>* %a0
165 %2 = fcmp olt <4 x float> %1, %a1
166 %3 = sext <4 x i1> %2 to <4 x i32>
170 define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) {
171 ; SSE-LABEL: commute_cmpps_le:
173 ; SSE-NEXT: movaps (%rdi), %xmm1
174 ; SSE-NEXT: cmpleps %xmm0, %xmm1
175 ; SSE-NEXT: movaps %xmm1, %xmm0
178 ; AVX-LABEL: commute_cmpps_le:
180 ; AVX-NEXT: vmovaps (%rdi), %xmm1
181 ; AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
184 ; AVX512-LABEL: commute_cmpps_le:
186 ; AVX512-NEXT: vmovaps (%rdi), %xmm1
187 ; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
189 %1 = load <4 x float>, <4 x float>* %a0
190 %2 = fcmp ole <4 x float> %1, %a1
191 %3 = sext <4 x i1> %2 to <4 x i32>
195 define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) {
196 ; SSE-LABEL: commute_cmpps_eq_ymm:
198 ; SSE-NEXT: cmpeqps (%rdi), %xmm0
199 ; SSE-NEXT: cmpeqps 16(%rdi), %xmm1
202 ; AVX-LABEL: commute_cmpps_eq_ymm:
204 ; AVX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0
207 ; AVX512-LABEL: commute_cmpps_eq_ymm:
209 ; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0
211 %1 = load <8 x float>, <8 x float>* %a0
212 %2 = fcmp oeq <8 x float> %1, %a1
213 %3 = sext <8 x i1> %2 to <8 x i32>
217 define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) {
218 ; SSE-LABEL: commute_cmpps_ne_ymm:
220 ; SSE-NEXT: cmpneqps (%rdi), %xmm0
221 ; SSE-NEXT: cmpneqps 16(%rdi), %xmm1
224 ; AVX-LABEL: commute_cmpps_ne_ymm:
226 ; AVX-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0
229 ; AVX512-LABEL: commute_cmpps_ne_ymm:
231 ; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0
233 %1 = load <8 x float>, <8 x float>* %a0
234 %2 = fcmp une <8 x float> %1, %a1
235 %3 = sext <8 x i1> %2 to <8 x i32>
239 define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) {
240 ; SSE-LABEL: commute_cmpps_ord_ymm:
242 ; SSE-NEXT: cmpordps (%rdi), %xmm0
243 ; SSE-NEXT: cmpordps 16(%rdi), %xmm1
246 ; AVX-LABEL: commute_cmpps_ord_ymm:
248 ; AVX-NEXT: vcmpordps (%rdi), %ymm0, %ymm0
251 ; AVX512-LABEL: commute_cmpps_ord_ymm:
253 ; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %ymm0
255 %1 = load <8 x float>, <8 x float>* %a0
256 %2 = fcmp ord <8 x float> %1, %a1
257 %3 = sext <8 x i1> %2 to <8 x i32>
261 define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) {
262 ; SSE-LABEL: commute_cmpps_uno_ymm:
264 ; SSE-NEXT: cmpunordps (%rdi), %xmm0
265 ; SSE-NEXT: cmpunordps 16(%rdi), %xmm1
268 ; AVX-LABEL: commute_cmpps_uno_ymm:
270 ; AVX-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0
273 ; AVX512-LABEL: commute_cmpps_uno_ymm:
275 ; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0
277 %1 = load <8 x float>, <8 x float>* %a0
278 %2 = fcmp uno <8 x float> %1, %a1
279 %3 = sext <8 x i1> %2 to <8 x i32>
283 define <8 x i32> @commute_cmpps_ueq_ymm(<8 x float>* %a0, <8 x float> %a1) {
284 ; SSE-LABEL: commute_cmpps_ueq_ymm:
286 ; SSE-NEXT: movaps (%rdi), %xmm2
287 ; SSE-NEXT: movaps 16(%rdi), %xmm3
288 ; SSE-NEXT: movaps %xmm2, %xmm4
289 ; SSE-NEXT: cmpeqps %xmm0, %xmm4
290 ; SSE-NEXT: cmpunordps %xmm2, %xmm0
291 ; SSE-NEXT: orps %xmm4, %xmm0
292 ; SSE-NEXT: movaps %xmm3, %xmm2
293 ; SSE-NEXT: cmpeqps %xmm1, %xmm2
294 ; SSE-NEXT: cmpunordps %xmm3, %xmm1
295 ; SSE-NEXT: orps %xmm2, %xmm1
298 ; AVX-LABEL: commute_cmpps_ueq_ymm:
300 ; AVX-NEXT: vcmpeq_uqps (%rdi), %ymm0, %ymm0
303 ; AVX512-LABEL: commute_cmpps_ueq_ymm:
305 ; AVX512-NEXT: vcmpeq_uqps (%rdi), %ymm0, %ymm0
307 %1 = load <8 x float>, <8 x float>* %a0
308 %2 = fcmp ueq <8 x float> %1, %a1
309 %3 = sext <8 x i1> %2 to <8 x i32>
313 define <8 x i32> @commute_cmpps_one_ymm(<8 x float>* %a0, <8 x float> %a1) {
314 ; SSE-LABEL: commute_cmpps_one_ymm:
316 ; SSE-NEXT: movaps (%rdi), %xmm2
317 ; SSE-NEXT: movaps 16(%rdi), %xmm3
318 ; SSE-NEXT: movaps %xmm2, %xmm4
319 ; SSE-NEXT: cmpneqps %xmm0, %xmm4
320 ; SSE-NEXT: cmpordps %xmm2, %xmm0
321 ; SSE-NEXT: andps %xmm4, %xmm0
322 ; SSE-NEXT: movaps %xmm3, %xmm2
323 ; SSE-NEXT: cmpneqps %xmm1, %xmm2
324 ; SSE-NEXT: cmpordps %xmm3, %xmm1
325 ; SSE-NEXT: andps %xmm2, %xmm1
328 ; AVX-LABEL: commute_cmpps_one_ymm:
330 ; AVX-NEXT: vcmpneq_oqps (%rdi), %ymm0, %ymm0
333 ; AVX512-LABEL: commute_cmpps_one_ymm:
335 ; AVX512-NEXT: vcmpneq_oqps (%rdi), %ymm0, %ymm0
337 %1 = load <8 x float>, <8 x float>* %a0
338 %2 = fcmp one <8 x float> %1, %a1
339 %3 = sext <8 x i1> %2 to <8 x i32>
343 define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) {
344 ; SSE-LABEL: commute_cmpps_lt_ymm:
346 ; SSE-NEXT: movaps (%rdi), %xmm2
347 ; SSE-NEXT: movaps 16(%rdi), %xmm3
348 ; SSE-NEXT: cmpltps %xmm0, %xmm2
349 ; SSE-NEXT: cmpltps %xmm1, %xmm3
350 ; SSE-NEXT: movaps %xmm2, %xmm0
351 ; SSE-NEXT: movaps %xmm3, %xmm1
354 ; AVX-LABEL: commute_cmpps_lt_ymm:
356 ; AVX-NEXT: vmovaps (%rdi), %ymm1
357 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
360 ; AVX512-LABEL: commute_cmpps_lt_ymm:
362 ; AVX512-NEXT: vmovaps (%rdi), %ymm1
363 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
365 %1 = load <8 x float>, <8 x float>* %a0
366 %2 = fcmp olt <8 x float> %1, %a1
367 %3 = sext <8 x i1> %2 to <8 x i32>
371 define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) {
372 ; SSE-LABEL: commute_cmpps_le_ymm:
374 ; SSE-NEXT: movaps (%rdi), %xmm2
375 ; SSE-NEXT: movaps 16(%rdi), %xmm3
376 ; SSE-NEXT: cmpleps %xmm0, %xmm2
377 ; SSE-NEXT: cmpleps %xmm1, %xmm3
378 ; SSE-NEXT: movaps %xmm2, %xmm0
379 ; SSE-NEXT: movaps %xmm3, %xmm1
382 ; AVX-LABEL: commute_cmpps_le_ymm:
384 ; AVX-NEXT: vmovaps (%rdi), %ymm1
385 ; AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0
388 ; AVX512-LABEL: commute_cmpps_le_ymm:
390 ; AVX512-NEXT: vmovaps (%rdi), %ymm1
391 ; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %ymm0
393 %1 = load <8 x float>, <8 x float>* %a0
394 %2 = fcmp ole <8 x float> %1, %a1
395 %3 = sext <8 x i1> %2 to <8 x i32>
401 ; Only equal/not-equal/ordered/unordered can be safely commuted
404 define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) {
405 ; SSE-LABEL: commute_cmppd_eq:
407 ; SSE-NEXT: cmpeqpd (%rdi), %xmm0
410 ; AVX-LABEL: commute_cmppd_eq:
412 ; AVX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
415 ; AVX512-LABEL: commute_cmppd_eq:
417 ; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
419 %1 = load <2 x double>, <2 x double>* %a0
420 %2 = fcmp oeq <2 x double> %1, %a1
421 %3 = sext <2 x i1> %2 to <2 x i64>
425 define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) {
426 ; SSE-LABEL: commute_cmppd_ne:
428 ; SSE-NEXT: cmpneqpd (%rdi), %xmm0
431 ; AVX-LABEL: commute_cmppd_ne:
433 ; AVX-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0
436 ; AVX512-LABEL: commute_cmppd_ne:
438 ; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0
440 %1 = load <2 x double>, <2 x double>* %a0
441 %2 = fcmp une <2 x double> %1, %a1
442 %3 = sext <2 x i1> %2 to <2 x i64>
446 define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) {
447 ; SSE-LABEL: commute_cmppd_ord:
449 ; SSE-NEXT: cmpordpd (%rdi), %xmm0
452 ; AVX-LABEL: commute_cmppd_ord:
454 ; AVX-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0
457 ; AVX512-LABEL: commute_cmppd_ord:
459 ; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0
461 %1 = load <2 x double>, <2 x double>* %a0
462 %2 = fcmp ord <2 x double> %1, %a1
463 %3 = sext <2 x i1> %2 to <2 x i64>
467 define <2 x i64> @commute_cmppd_ueq(<2 x double>* %a0, <2 x double> %a1) {
468 ; SSE-LABEL: commute_cmppd_ueq:
470 ; SSE-NEXT: movapd (%rdi), %xmm1
471 ; SSE-NEXT: movapd %xmm1, %xmm2
472 ; SSE-NEXT: cmpeqpd %xmm0, %xmm2
473 ; SSE-NEXT: cmpunordpd %xmm1, %xmm0
474 ; SSE-NEXT: orpd %xmm2, %xmm0
477 ; AVX-LABEL: commute_cmppd_ueq:
479 ; AVX-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %xmm0
482 ; AVX512-LABEL: commute_cmppd_ueq:
484 ; AVX512-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %xmm0
486 %1 = load <2 x double>, <2 x double>* %a0
487 %2 = fcmp ueq <2 x double> %1, %a1
488 %3 = sext <2 x i1> %2 to <2 x i64>
492 define <2 x i64> @commute_cmppd_one(<2 x double>* %a0, <2 x double> %a1) {
493 ; SSE-LABEL: commute_cmppd_one:
495 ; SSE-NEXT: movapd (%rdi), %xmm1
496 ; SSE-NEXT: movapd %xmm1, %xmm2
497 ; SSE-NEXT: cmpneqpd %xmm0, %xmm2
498 ; SSE-NEXT: cmpordpd %xmm1, %xmm0
499 ; SSE-NEXT: andpd %xmm2, %xmm0
502 ; AVX-LABEL: commute_cmppd_one:
504 ; AVX-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %xmm0
507 ; AVX512-LABEL: commute_cmppd_one:
509 ; AVX512-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %xmm0
511 %1 = load <2 x double>, <2 x double>* %a0
512 %2 = fcmp one <2 x double> %1, %a1
513 %3 = sext <2 x i1> %2 to <2 x i64>
517 define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) {
518 ; SSE-LABEL: commute_cmppd_uno:
520 ; SSE-NEXT: cmpunordpd (%rdi), %xmm0
523 ; AVX-LABEL: commute_cmppd_uno:
525 ; AVX-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0
528 ; AVX512-LABEL: commute_cmppd_uno:
530 ; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0
532 %1 = load <2 x double>, <2 x double>* %a0
533 %2 = fcmp uno <2 x double> %1, %a1
534 %3 = sext <2 x i1> %2 to <2 x i64>
538 define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) {
539 ; SSE-LABEL: commute_cmppd_lt:
541 ; SSE-NEXT: movapd (%rdi), %xmm1
542 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
543 ; SSE-NEXT: movapd %xmm1, %xmm0
546 ; AVX-LABEL: commute_cmppd_lt:
548 ; AVX-NEXT: vmovapd (%rdi), %xmm1
549 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
552 ; AVX512-LABEL: commute_cmppd_lt:
554 ; AVX512-NEXT: vmovapd (%rdi), %xmm1
555 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
557 %1 = load <2 x double>, <2 x double>* %a0
558 %2 = fcmp olt <2 x double> %1, %a1
559 %3 = sext <2 x i1> %2 to <2 x i64>
563 define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) {
564 ; SSE-LABEL: commute_cmppd_le:
566 ; SSE-NEXT: movapd (%rdi), %xmm1
567 ; SSE-NEXT: cmplepd %xmm0, %xmm1
568 ; SSE-NEXT: movapd %xmm1, %xmm0
571 ; AVX-LABEL: commute_cmppd_le:
573 ; AVX-NEXT: vmovapd (%rdi), %xmm1
574 ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
577 ; AVX512-LABEL: commute_cmppd_le:
579 ; AVX512-NEXT: vmovapd (%rdi), %xmm1
580 ; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
582 %1 = load <2 x double>, <2 x double>* %a0
583 %2 = fcmp ole <2 x double> %1, %a1
584 %3 = sext <2 x i1> %2 to <2 x i64>
588 define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
589 ; SSE-LABEL: commute_cmppd_eq_ymmm:
591 ; SSE-NEXT: cmpeqpd (%rdi), %xmm0
592 ; SSE-NEXT: cmpeqpd 16(%rdi), %xmm1
595 ; AVX-LABEL: commute_cmppd_eq_ymmm:
597 ; AVX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
600 ; AVX512-LABEL: commute_cmppd_eq_ymmm:
602 ; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
604 %1 = load <4 x double>, <4 x double>* %a0
605 %2 = fcmp oeq <4 x double> %1, %a1
606 %3 = sext <4 x i1> %2 to <4 x i64>
610 define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) {
611 ; SSE-LABEL: commute_cmppd_ne_ymmm:
613 ; SSE-NEXT: cmpneqpd (%rdi), %xmm0
614 ; SSE-NEXT: cmpneqpd 16(%rdi), %xmm1
617 ; AVX-LABEL: commute_cmppd_ne_ymmm:
619 ; AVX-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0
622 ; AVX512-LABEL: commute_cmppd_ne_ymmm:
624 ; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0
626 %1 = load <4 x double>, <4 x double>* %a0
627 %2 = fcmp une <4 x double> %1, %a1
628 %3 = sext <4 x i1> %2 to <4 x i64>
632 define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) {
633 ; SSE-LABEL: commute_cmppd_ord_ymmm:
635 ; SSE-NEXT: cmpordpd (%rdi), %xmm0
636 ; SSE-NEXT: cmpordpd 16(%rdi), %xmm1
639 ; AVX-LABEL: commute_cmppd_ord_ymmm:
641 ; AVX-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0
644 ; AVX512-LABEL: commute_cmppd_ord_ymmm:
646 ; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0
648 %1 = load <4 x double>, <4 x double>* %a0
649 %2 = fcmp ord <4 x double> %1, %a1
650 %3 = sext <4 x i1> %2 to <4 x i64>
654 define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) {
655 ; SSE-LABEL: commute_cmppd_uno_ymmm:
657 ; SSE-NEXT: cmpunordpd (%rdi), %xmm0
658 ; SSE-NEXT: cmpunordpd 16(%rdi), %xmm1
661 ; AVX-LABEL: commute_cmppd_uno_ymmm:
663 ; AVX-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0
666 ; AVX512-LABEL: commute_cmppd_uno_ymmm:
668 ; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0
670 %1 = load <4 x double>, <4 x double>* %a0
671 %2 = fcmp uno <4 x double> %1, %a1
672 %3 = sext <4 x i1> %2 to <4 x i64>
676 define <4 x i64> @commute_cmppd_ueq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
677 ; SSE-LABEL: commute_cmppd_ueq_ymmm:
679 ; SSE-NEXT: movapd (%rdi), %xmm2
680 ; SSE-NEXT: movapd 16(%rdi), %xmm3
681 ; SSE-NEXT: movapd %xmm2, %xmm4
682 ; SSE-NEXT: cmpeqpd %xmm0, %xmm4
683 ; SSE-NEXT: cmpunordpd %xmm2, %xmm0
684 ; SSE-NEXT: orpd %xmm4, %xmm0
685 ; SSE-NEXT: movapd %xmm3, %xmm2
686 ; SSE-NEXT: cmpeqpd %xmm1, %xmm2
687 ; SSE-NEXT: cmpunordpd %xmm3, %xmm1
688 ; SSE-NEXT: orpd %xmm2, %xmm1
691 ; AVX-LABEL: commute_cmppd_ueq_ymmm:
693 ; AVX-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %ymm0
696 ; AVX512-LABEL: commute_cmppd_ueq_ymmm:
698 ; AVX512-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %ymm0
700 %1 = load <4 x double>, <4 x double>* %a0
701 %2 = fcmp ueq <4 x double> %1, %a1
702 %3 = sext <4 x i1> %2 to <4 x i64>
706 define <4 x i64> @commute_cmppd_one_ymmm(<4 x double>* %a0, <4 x double> %a1) {
707 ; SSE-LABEL: commute_cmppd_one_ymmm:
709 ; SSE-NEXT: movapd (%rdi), %xmm2
710 ; SSE-NEXT: movapd 16(%rdi), %xmm3
711 ; SSE-NEXT: movapd %xmm2, %xmm4
712 ; SSE-NEXT: cmpneqpd %xmm0, %xmm4
713 ; SSE-NEXT: cmpordpd %xmm2, %xmm0
714 ; SSE-NEXT: andpd %xmm4, %xmm0
715 ; SSE-NEXT: movapd %xmm3, %xmm2
716 ; SSE-NEXT: cmpneqpd %xmm1, %xmm2
717 ; SSE-NEXT: cmpordpd %xmm3, %xmm1
718 ; SSE-NEXT: andpd %xmm2, %xmm1
721 ; AVX-LABEL: commute_cmppd_one_ymmm:
723 ; AVX-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %ymm0
726 ; AVX512-LABEL: commute_cmppd_one_ymmm:
728 ; AVX512-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %ymm0
730 %1 = load <4 x double>, <4 x double>* %a0
731 %2 = fcmp one <4 x double> %1, %a1
732 %3 = sext <4 x i1> %2 to <4 x i64>
736 define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) {
737 ; SSE-LABEL: commute_cmppd_lt_ymmm:
739 ; SSE-NEXT: movapd (%rdi), %xmm2
740 ; SSE-NEXT: movapd 16(%rdi), %xmm3
741 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
742 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
743 ; SSE-NEXT: movapd %xmm2, %xmm0
744 ; SSE-NEXT: movapd %xmm3, %xmm1
747 ; AVX-LABEL: commute_cmppd_lt_ymmm:
749 ; AVX-NEXT: vmovapd (%rdi), %ymm1
750 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
753 ; AVX512-LABEL: commute_cmppd_lt_ymmm:
755 ; AVX512-NEXT: vmovapd (%rdi), %ymm1
756 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
758 %1 = load <4 x double>, <4 x double>* %a0
759 %2 = fcmp olt <4 x double> %1, %a1
760 %3 = sext <4 x i1> %2 to <4 x i64>
764 define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) {
765 ; SSE-LABEL: commute_cmppd_le_ymmm:
767 ; SSE-NEXT: movapd (%rdi), %xmm2
768 ; SSE-NEXT: movapd 16(%rdi), %xmm3
769 ; SSE-NEXT: cmplepd %xmm0, %xmm2
770 ; SSE-NEXT: cmplepd %xmm1, %xmm3
771 ; SSE-NEXT: movapd %xmm2, %xmm0
772 ; SSE-NEXT: movapd %xmm3, %xmm1
775 ; AVX-LABEL: commute_cmppd_le_ymmm:
777 ; AVX-NEXT: vmovapd (%rdi), %ymm1
778 ; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
781 ; AVX512-LABEL: commute_cmppd_le_ymmm:
783 ; AVX512-NEXT: vmovapd (%rdi), %ymm1
784 ; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
786 %1 = load <4 x double>, <4 x double>* %a0
787 %2 = fcmp ole <4 x double> %1, %a1
788 %3 = sext <4 x i1> %2 to <4 x i64>