1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 -disable-peephole | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=AVX
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,+avx512dq -disable-peephole | FileCheck %s --check-prefix=AVX512
8 ; Only equal/not-equal/ordered/unordered can be safely commuted
11 define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) {
12 ; SSE-LABEL: commute_cmpps_eq:
14 ; SSE-NEXT: cmpeqps (%rdi), %xmm0
17 ; AVX-LABEL: commute_cmpps_eq:
19 ; AVX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
22 ; AVX512-LABEL: commute_cmpps_eq:
24 ; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
26 %1 = load <4 x float>, <4 x float>* %a0
27 %2 = fcmp oeq <4 x float> %1, %a1
28 %3 = sext <4 x i1> %2 to <4 x i32>
32 define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) {
33 ; SSE-LABEL: commute_cmpps_ne:
35 ; SSE-NEXT: cmpneqps (%rdi), %xmm0
38 ; AVX-LABEL: commute_cmpps_ne:
40 ; AVX-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0
43 ; AVX512-LABEL: commute_cmpps_ne:
45 ; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0
47 %1 = load <4 x float>, <4 x float>* %a0
48 %2 = fcmp une <4 x float> %1, %a1
49 %3 = sext <4 x i1> %2 to <4 x i32>
53 define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) {
54 ; SSE-LABEL: commute_cmpps_ord:
56 ; SSE-NEXT: cmpordps (%rdi), %xmm0
59 ; AVX-LABEL: commute_cmpps_ord:
61 ; AVX-NEXT: vcmpordps (%rdi), %xmm0, %xmm0
64 ; AVX512-LABEL: commute_cmpps_ord:
66 ; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %xmm0
68 %1 = load <4 x float>, <4 x float>* %a0
69 %2 = fcmp ord <4 x float> %1, %a1
70 %3 = sext <4 x i1> %2 to <4 x i32>
74 define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) {
75 ; SSE-LABEL: commute_cmpps_uno:
77 ; SSE-NEXT: cmpunordps (%rdi), %xmm0
80 ; AVX-LABEL: commute_cmpps_uno:
82 ; AVX-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0
85 ; AVX512-LABEL: commute_cmpps_uno:
87 ; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0
89 %1 = load <4 x float>, <4 x float>* %a0
90 %2 = fcmp uno <4 x float> %1, %a1
91 %3 = sext <4 x i1> %2 to <4 x i32>
95 define <4 x i32> @commute_cmpps_ueq(<4 x float>* %a0, <4 x float> %a1) {
96 ; SSE-LABEL: commute_cmpps_ueq:
98 ; SSE-NEXT: movaps (%rdi), %xmm1
99 ; SSE-NEXT: movaps %xmm1, %xmm2
100 ; SSE-NEXT: cmpeqps %xmm0, %xmm2
101 ; SSE-NEXT: cmpunordps %xmm1, %xmm0
102 ; SSE-NEXT: orps %xmm2, %xmm0
105 ; AVX-LABEL: commute_cmpps_ueq:
107 ; AVX-NEXT: vcmpeq_uqps (%rdi), %xmm0, %xmm0
110 ; AVX512-LABEL: commute_cmpps_ueq:
112 ; AVX512-NEXT: vcmpeq_uqps (%rdi), %xmm0, %xmm0
114 %1 = load <4 x float>, <4 x float>* %a0
115 %2 = fcmp ueq <4 x float> %1, %a1
116 %3 = sext <4 x i1> %2 to <4 x i32>
120 define <4 x i32> @commute_cmpps_one(<4 x float>* %a0, <4 x float> %a1) {
121 ; SSE-LABEL: commute_cmpps_one:
123 ; SSE-NEXT: movaps (%rdi), %xmm1
124 ; SSE-NEXT: movaps %xmm1, %xmm2
125 ; SSE-NEXT: cmpneqps %xmm0, %xmm2
126 ; SSE-NEXT: cmpordps %xmm1, %xmm0
127 ; SSE-NEXT: andps %xmm2, %xmm0
130 ; AVX-LABEL: commute_cmpps_one:
132 ; AVX-NEXT: vcmpneq_oqps (%rdi), %xmm0, %xmm0
135 ; AVX512-LABEL: commute_cmpps_one:
137 ; AVX512-NEXT: vcmpneq_oqps (%rdi), %xmm0, %xmm0
139 %1 = load <4 x float>, <4 x float>* %a0
140 %2 = fcmp one <4 x float> %1, %a1
141 %3 = sext <4 x i1> %2 to <4 x i32>
145 define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) {
146 ; SSE-LABEL: commute_cmpps_lt:
148 ; SSE-NEXT: movaps (%rdi), %xmm1
149 ; SSE-NEXT: cmpltps %xmm0, %xmm1
150 ; SSE-NEXT: movaps %xmm1, %xmm0
153 ; AVX-LABEL: commute_cmpps_lt:
155 ; AVX-NEXT: vmovaps (%rdi), %xmm1
156 ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
159 ; AVX512-LABEL: commute_cmpps_lt:
161 ; AVX512-NEXT: vmovaps (%rdi), %xmm1
162 ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
164 %1 = load <4 x float>, <4 x float>* %a0
165 %2 = fcmp olt <4 x float> %1, %a1
166 %3 = sext <4 x i1> %2 to <4 x i32>
170 define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) {
171 ; SSE-LABEL: commute_cmpps_le:
173 ; SSE-NEXT: movaps (%rdi), %xmm1
174 ; SSE-NEXT: cmpleps %xmm0, %xmm1
175 ; SSE-NEXT: movaps %xmm1, %xmm0
178 ; AVX-LABEL: commute_cmpps_le:
180 ; AVX-NEXT: vmovaps (%rdi), %xmm1
181 ; AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
184 ; AVX512-LABEL: commute_cmpps_le:
186 ; AVX512-NEXT: vmovaps (%rdi), %xmm1
187 ; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
189 %1 = load <4 x float>, <4 x float>* %a0
190 %2 = fcmp ole <4 x float> %1, %a1
191 %3 = sext <4 x i1> %2 to <4 x i32>
195 define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) {
196 ; SSE-LABEL: commute_cmpps_eq_ymm:
198 ; SSE-NEXT: cmpeqps (%rdi), %xmm0
199 ; SSE-NEXT: cmpeqps 16(%rdi), %xmm1
202 ; AVX-LABEL: commute_cmpps_eq_ymm:
204 ; AVX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0
207 ; AVX512-LABEL: commute_cmpps_eq_ymm:
209 ; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0
211 %1 = load <8 x float>, <8 x float>* %a0
212 %2 = fcmp oeq <8 x float> %1, %a1
213 %3 = sext <8 x i1> %2 to <8 x i32>
217 define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) {
218 ; SSE-LABEL: commute_cmpps_ne_ymm:
220 ; SSE-NEXT: cmpneqps (%rdi), %xmm0
221 ; SSE-NEXT: cmpneqps 16(%rdi), %xmm1
224 ; AVX-LABEL: commute_cmpps_ne_ymm:
226 ; AVX-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0
229 ; AVX512-LABEL: commute_cmpps_ne_ymm:
231 ; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0
233 %1 = load <8 x float>, <8 x float>* %a0
234 %2 = fcmp une <8 x float> %1, %a1
235 %3 = sext <8 x i1> %2 to <8 x i32>
239 define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) {
240 ; SSE-LABEL: commute_cmpps_ord_ymm:
242 ; SSE-NEXT: cmpordps (%rdi), %xmm0
243 ; SSE-NEXT: cmpordps 16(%rdi), %xmm1
246 ; AVX-LABEL: commute_cmpps_ord_ymm:
248 ; AVX-NEXT: vcmpordps (%rdi), %ymm0, %ymm0
251 ; AVX512-LABEL: commute_cmpps_ord_ymm:
253 ; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %ymm0
255 %1 = load <8 x float>, <8 x float>* %a0
256 %2 = fcmp ord <8 x float> %1, %a1
257 %3 = sext <8 x i1> %2 to <8 x i32>
261 define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) {
262 ; SSE-LABEL: commute_cmpps_uno_ymm:
264 ; SSE-NEXT: cmpunordps (%rdi), %xmm0
265 ; SSE-NEXT: cmpunordps 16(%rdi), %xmm1
268 ; AVX-LABEL: commute_cmpps_uno_ymm:
270 ; AVX-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0
273 ; AVX512-LABEL: commute_cmpps_uno_ymm:
275 ; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0
277 %1 = load <8 x float>, <8 x float>* %a0
278 %2 = fcmp uno <8 x float> %1, %a1
279 %3 = sext <8 x i1> %2 to <8 x i32>
283 define <8 x i32> @commute_cmpps_ueq_ymm(<8 x float>* %a0, <8 x float> %a1) {
284 ; SSE-LABEL: commute_cmpps_ueq_ymm:
286 ; SSE-NEXT: movaps (%rdi), %xmm2
287 ; SSE-NEXT: movaps 16(%rdi), %xmm3
288 ; SSE-NEXT: movaps %xmm2, %xmm4
289 ; SSE-NEXT: cmpeqps %xmm0, %xmm4
290 ; SSE-NEXT: cmpunordps %xmm2, %xmm0
291 ; SSE-NEXT: orps %xmm4, %xmm0
292 ; SSE-NEXT: movaps %xmm3, %xmm2
293 ; SSE-NEXT: cmpeqps %xmm1, %xmm2
294 ; SSE-NEXT: cmpunordps %xmm3, %xmm1
295 ; SSE-NEXT: orps %xmm2, %xmm1
298 ; AVX-LABEL: commute_cmpps_ueq_ymm:
300 ; AVX-NEXT: vcmpeq_uqps (%rdi), %ymm0, %ymm0
303 ; AVX512-LABEL: commute_cmpps_ueq_ymm:
305 ; AVX512-NEXT: vcmpeq_uqps (%rdi), %ymm0, %ymm0
307 %1 = load <8 x float>, <8 x float>* %a0
308 %2 = fcmp ueq <8 x float> %1, %a1
309 %3 = sext <8 x i1> %2 to <8 x i32>
313 define <8 x i32> @commute_cmpps_one_ymm(<8 x float>* %a0, <8 x float> %a1) {
314 ; SSE-LABEL: commute_cmpps_one_ymm:
316 ; SSE-NEXT: movaps (%rdi), %xmm2
317 ; SSE-NEXT: movaps 16(%rdi), %xmm3
318 ; SSE-NEXT: movaps %xmm2, %xmm4
319 ; SSE-NEXT: cmpneqps %xmm0, %xmm4
320 ; SSE-NEXT: cmpordps %xmm2, %xmm0
321 ; SSE-NEXT: andps %xmm4, %xmm0
322 ; SSE-NEXT: movaps %xmm3, %xmm2
323 ; SSE-NEXT: cmpneqps %xmm1, %xmm2
324 ; SSE-NEXT: cmpordps %xmm3, %xmm1
325 ; SSE-NEXT: andps %xmm2, %xmm1
328 ; AVX-LABEL: commute_cmpps_one_ymm:
330 ; AVX-NEXT: vcmpneq_oqps (%rdi), %ymm0, %ymm0
333 ; AVX512-LABEL: commute_cmpps_one_ymm:
335 ; AVX512-NEXT: vcmpneq_oqps (%rdi), %ymm0, %ymm0
337 %1 = load <8 x float>, <8 x float>* %a0
338 %2 = fcmp one <8 x float> %1, %a1
339 %3 = sext <8 x i1> %2 to <8 x i32>
343 define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) {
344 ; SSE-LABEL: commute_cmpps_lt_ymm:
346 ; SSE-NEXT: movaps (%rdi), %xmm2
347 ; SSE-NEXT: movaps 16(%rdi), %xmm3
348 ; SSE-NEXT: cmpltps %xmm0, %xmm2
349 ; SSE-NEXT: cmpltps %xmm1, %xmm3
350 ; SSE-NEXT: movaps %xmm2, %xmm0
351 ; SSE-NEXT: movaps %xmm3, %xmm1
354 ; AVX-LABEL: commute_cmpps_lt_ymm:
356 ; AVX-NEXT: vmovaps (%rdi), %ymm1
357 ; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
360 ; AVX512-LABEL: commute_cmpps_lt_ymm:
362 ; AVX512-NEXT: vmovaps (%rdi), %ymm1
363 ; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
365 %1 = load <8 x float>, <8 x float>* %a0
366 %2 = fcmp olt <8 x float> %1, %a1
367 %3 = sext <8 x i1> %2 to <8 x i32>
371 define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) {
372 ; SSE-LABEL: commute_cmpps_le_ymm:
374 ; SSE-NEXT: movaps (%rdi), %xmm2
375 ; SSE-NEXT: movaps 16(%rdi), %xmm3
376 ; SSE-NEXT: cmpleps %xmm0, %xmm2
377 ; SSE-NEXT: cmpleps %xmm1, %xmm3
378 ; SSE-NEXT: movaps %xmm2, %xmm0
379 ; SSE-NEXT: movaps %xmm3, %xmm1
382 ; AVX-LABEL: commute_cmpps_le_ymm:
384 ; AVX-NEXT: vmovaps (%rdi), %ymm1
385 ; AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0
388 ; AVX512-LABEL: commute_cmpps_le_ymm:
390 ; AVX512-NEXT: vmovaps (%rdi), %ymm1
391 ; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %ymm0
393 %1 = load <8 x float>, <8 x float>* %a0
394 %2 = fcmp ole <8 x float> %1, %a1
395 %3 = sext <8 x i1> %2 to <8 x i32>
401 ; Only equal/not-equal/ordered/unordered can be safely commuted
404 define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) {
405 ; SSE-LABEL: commute_cmppd_eq:
407 ; SSE-NEXT: cmpeqpd (%rdi), %xmm0
410 ; AVX-LABEL: commute_cmppd_eq:
412 ; AVX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
415 ; AVX512-LABEL: commute_cmppd_eq:
417 ; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
419 %1 = load <2 x double>, <2 x double>* %a0
420 %2 = fcmp oeq <2 x double> %1, %a1
421 %3 = sext <2 x i1> %2 to <2 x i64>
425 define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) {
426 ; SSE-LABEL: commute_cmppd_ne:
428 ; SSE-NEXT: cmpneqpd (%rdi), %xmm0
431 ; AVX-LABEL: commute_cmppd_ne:
433 ; AVX-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0
436 ; AVX512-LABEL: commute_cmppd_ne:
438 ; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0
440 %1 = load <2 x double>, <2 x double>* %a0
441 %2 = fcmp une <2 x double> %1, %a1
442 %3 = sext <2 x i1> %2 to <2 x i64>
446 define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) {
447 ; SSE-LABEL: commute_cmppd_ord:
449 ; SSE-NEXT: cmpordpd (%rdi), %xmm0
452 ; AVX-LABEL: commute_cmppd_ord:
454 ; AVX-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0
457 ; AVX512-LABEL: commute_cmppd_ord:
459 ; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0
461 %1 = load <2 x double>, <2 x double>* %a0
462 %2 = fcmp ord <2 x double> %1, %a1
463 %3 = sext <2 x i1> %2 to <2 x i64>
467 define <2 x i64> @commute_cmppd_ueq(<2 x double>* %a0, <2 x double> %a1) {
468 ; SSE-LABEL: commute_cmppd_ueq:
470 ; SSE-NEXT: movapd (%rdi), %xmm1
471 ; SSE-NEXT: movapd %xmm1, %xmm2
472 ; SSE-NEXT: cmpeqpd %xmm0, %xmm2
473 ; SSE-NEXT: cmpunordpd %xmm1, %xmm0
474 ; SSE-NEXT: orpd %xmm2, %xmm0
477 ; AVX-LABEL: commute_cmppd_ueq:
479 ; AVX-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %xmm0
482 ; AVX512-LABEL: commute_cmppd_ueq:
484 ; AVX512-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %xmm0
486 %1 = load <2 x double>, <2 x double>* %a0
487 %2 = fcmp ueq <2 x double> %1, %a1
488 %3 = sext <2 x i1> %2 to <2 x i64>
492 define <2 x i64> @commute_cmppd_one(<2 x double>* %a0, <2 x double> %a1) {
493 ; SSE-LABEL: commute_cmppd_one:
495 ; SSE-NEXT: movapd (%rdi), %xmm1
496 ; SSE-NEXT: movapd %xmm1, %xmm2
497 ; SSE-NEXT: cmpneqpd %xmm0, %xmm2
498 ; SSE-NEXT: cmpordpd %xmm1, %xmm0
499 ; SSE-NEXT: andpd %xmm2, %xmm0
502 ; AVX-LABEL: commute_cmppd_one:
504 ; AVX-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %xmm0
507 ; AVX512-LABEL: commute_cmppd_one:
509 ; AVX512-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %xmm0
511 %1 = load <2 x double>, <2 x double>* %a0
512 %2 = fcmp one <2 x double> %1, %a1
513 %3 = sext <2 x i1> %2 to <2 x i64>
517 define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) {
518 ; SSE-LABEL: commute_cmppd_uno:
520 ; SSE-NEXT: cmpunordpd (%rdi), %xmm0
523 ; AVX-LABEL: commute_cmppd_uno:
525 ; AVX-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0
528 ; AVX512-LABEL: commute_cmppd_uno:
530 ; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0
532 %1 = load <2 x double>, <2 x double>* %a0
533 %2 = fcmp uno <2 x double> %1, %a1
534 %3 = sext <2 x i1> %2 to <2 x i64>
538 define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) {
539 ; SSE-LABEL: commute_cmppd_lt:
541 ; SSE-NEXT: movapd (%rdi), %xmm1
542 ; SSE-NEXT: cmpltpd %xmm0, %xmm1
543 ; SSE-NEXT: movapd %xmm1, %xmm0
546 ; AVX-LABEL: commute_cmppd_lt:
548 ; AVX-NEXT: vmovapd (%rdi), %xmm1
549 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
552 ; AVX512-LABEL: commute_cmppd_lt:
554 ; AVX512-NEXT: vmovapd (%rdi), %xmm1
555 ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
557 %1 = load <2 x double>, <2 x double>* %a0
558 %2 = fcmp olt <2 x double> %1, %a1
559 %3 = sext <2 x i1> %2 to <2 x i64>
563 define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) {
564 ; SSE-LABEL: commute_cmppd_le:
566 ; SSE-NEXT: movapd (%rdi), %xmm1
567 ; SSE-NEXT: cmplepd %xmm0, %xmm1
568 ; SSE-NEXT: movapd %xmm1, %xmm0
571 ; AVX-LABEL: commute_cmppd_le:
573 ; AVX-NEXT: vmovapd (%rdi), %xmm1
574 ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
577 ; AVX512-LABEL: commute_cmppd_le:
579 ; AVX512-NEXT: vmovapd (%rdi), %xmm1
580 ; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
582 %1 = load <2 x double>, <2 x double>* %a0
583 %2 = fcmp ole <2 x double> %1, %a1
584 %3 = sext <2 x i1> %2 to <2 x i64>
588 define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
589 ; SSE-LABEL: commute_cmppd_eq_ymmm:
591 ; SSE-NEXT: cmpeqpd (%rdi), %xmm0
592 ; SSE-NEXT: cmpeqpd 16(%rdi), %xmm1
595 ; AVX-LABEL: commute_cmppd_eq_ymmm:
597 ; AVX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
600 ; AVX512-LABEL: commute_cmppd_eq_ymmm:
602 ; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
604 %1 = load <4 x double>, <4 x double>* %a0
605 %2 = fcmp oeq <4 x double> %1, %a1
606 %3 = sext <4 x i1> %2 to <4 x i64>
610 define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) {
611 ; SSE-LABEL: commute_cmppd_ne_ymmm:
613 ; SSE-NEXT: cmpneqpd (%rdi), %xmm0
614 ; SSE-NEXT: cmpneqpd 16(%rdi), %xmm1
617 ; AVX-LABEL: commute_cmppd_ne_ymmm:
619 ; AVX-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0
622 ; AVX512-LABEL: commute_cmppd_ne_ymmm:
624 ; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0
626 %1 = load <4 x double>, <4 x double>* %a0
627 %2 = fcmp une <4 x double> %1, %a1
628 %3 = sext <4 x i1> %2 to <4 x i64>
632 define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) {
633 ; SSE-LABEL: commute_cmppd_ord_ymmm:
635 ; SSE-NEXT: cmpordpd (%rdi), %xmm0
636 ; SSE-NEXT: cmpordpd 16(%rdi), %xmm1
639 ; AVX-LABEL: commute_cmppd_ord_ymmm:
641 ; AVX-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0
644 ; AVX512-LABEL: commute_cmppd_ord_ymmm:
646 ; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0
648 %1 = load <4 x double>, <4 x double>* %a0
649 %2 = fcmp ord <4 x double> %1, %a1
650 %3 = sext <4 x i1> %2 to <4 x i64>
654 define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) {
655 ; SSE-LABEL: commute_cmppd_uno_ymmm:
657 ; SSE-NEXT: cmpunordpd (%rdi), %xmm0
658 ; SSE-NEXT: cmpunordpd 16(%rdi), %xmm1
661 ; AVX-LABEL: commute_cmppd_uno_ymmm:
663 ; AVX-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0
666 ; AVX512-LABEL: commute_cmppd_uno_ymmm:
668 ; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0
670 %1 = load <4 x double>, <4 x double>* %a0
671 %2 = fcmp uno <4 x double> %1, %a1
672 %3 = sext <4 x i1> %2 to <4 x i64>
676 define <4 x i64> @commute_cmppd_ueq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
677 ; SSE-LABEL: commute_cmppd_ueq_ymmm:
679 ; SSE-NEXT: movapd (%rdi), %xmm2
680 ; SSE-NEXT: movapd 16(%rdi), %xmm3
681 ; SSE-NEXT: movapd %xmm2, %xmm4
682 ; SSE-NEXT: cmpeqpd %xmm0, %xmm4
683 ; SSE-NEXT: cmpunordpd %xmm2, %xmm0
684 ; SSE-NEXT: orpd %xmm4, %xmm0
685 ; SSE-NEXT: movapd %xmm3, %xmm2
686 ; SSE-NEXT: cmpeqpd %xmm1, %xmm2
687 ; SSE-NEXT: cmpunordpd %xmm3, %xmm1
688 ; SSE-NEXT: orpd %xmm2, %xmm1
691 ; AVX-LABEL: commute_cmppd_ueq_ymmm:
693 ; AVX-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %ymm0
696 ; AVX512-LABEL: commute_cmppd_ueq_ymmm:
698 ; AVX512-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %ymm0
700 %1 = load <4 x double>, <4 x double>* %a0
701 %2 = fcmp ueq <4 x double> %1, %a1
702 %3 = sext <4 x i1> %2 to <4 x i64>
706 define <4 x i64> @commute_cmppd_one_ymmm(<4 x double>* %a0, <4 x double> %a1) {
707 ; SSE-LABEL: commute_cmppd_one_ymmm:
709 ; SSE-NEXT: movapd (%rdi), %xmm2
710 ; SSE-NEXT: movapd 16(%rdi), %xmm3
711 ; SSE-NEXT: movapd %xmm2, %xmm4
712 ; SSE-NEXT: cmpneqpd %xmm0, %xmm4
713 ; SSE-NEXT: cmpordpd %xmm2, %xmm0
714 ; SSE-NEXT: andpd %xmm4, %xmm0
715 ; SSE-NEXT: movapd %xmm3, %xmm2
716 ; SSE-NEXT: cmpneqpd %xmm1, %xmm2
717 ; SSE-NEXT: cmpordpd %xmm3, %xmm1
718 ; SSE-NEXT: andpd %xmm2, %xmm1
721 ; AVX-LABEL: commute_cmppd_one_ymmm:
723 ; AVX-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %ymm0
726 ; AVX512-LABEL: commute_cmppd_one_ymmm:
728 ; AVX512-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %ymm0
730 %1 = load <4 x double>, <4 x double>* %a0
731 %2 = fcmp one <4 x double> %1, %a1
732 %3 = sext <4 x i1> %2 to <4 x i64>
736 define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) {
737 ; SSE-LABEL: commute_cmppd_lt_ymmm:
739 ; SSE-NEXT: movapd (%rdi), %xmm2
740 ; SSE-NEXT: movapd 16(%rdi), %xmm3
741 ; SSE-NEXT: cmpltpd %xmm0, %xmm2
742 ; SSE-NEXT: cmpltpd %xmm1, %xmm3
743 ; SSE-NEXT: movapd %xmm2, %xmm0
744 ; SSE-NEXT: movapd %xmm3, %xmm1
747 ; AVX-LABEL: commute_cmppd_lt_ymmm:
749 ; AVX-NEXT: vmovapd (%rdi), %ymm1
750 ; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
753 ; AVX512-LABEL: commute_cmppd_lt_ymmm:
755 ; AVX512-NEXT: vmovapd (%rdi), %ymm1
756 ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
758 %1 = load <4 x double>, <4 x double>* %a0
759 %2 = fcmp olt <4 x double> %1, %a1
760 %3 = sext <4 x i1> %2 to <4 x i64>
764 define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) {
765 ; SSE-LABEL: commute_cmppd_le_ymmm:
767 ; SSE-NEXT: movapd (%rdi), %xmm2
768 ; SSE-NEXT: movapd 16(%rdi), %xmm3
769 ; SSE-NEXT: cmplepd %xmm0, %xmm2
770 ; SSE-NEXT: cmplepd %xmm1, %xmm3
771 ; SSE-NEXT: movapd %xmm2, %xmm0
772 ; SSE-NEXT: movapd %xmm3, %xmm1
775 ; AVX-LABEL: commute_cmppd_le_ymmm:
777 ; AVX-NEXT: vmovapd (%rdi), %ymm1
778 ; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
781 ; AVX512-LABEL: commute_cmppd_le_ymmm:
783 ; AVX512-NEXT: vmovapd (%rdi), %ymm1
784 ; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
786 %1 = load <4 x double>, <4 x double>* %a0
787 %2 = fcmp ole <4 x double> %1, %a1
788 %3 = sext <4 x i1> %2 to <4 x i64>
792 define <16 x i32> @commute_cmpps_eq_zmm(<16 x float>* %a0, <16 x float> %a1) {
793 ; SSE-LABEL: commute_cmpps_eq_zmm:
795 ; SSE-NEXT: cmpeqps (%rdi), %xmm0
796 ; SSE-NEXT: cmpeqps 16(%rdi), %xmm1
797 ; SSE-NEXT: cmpeqps 32(%rdi), %xmm2
798 ; SSE-NEXT: cmpeqps 48(%rdi), %xmm3
801 ; AVX-LABEL: commute_cmpps_eq_zmm:
803 ; AVX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0
804 ; AVX-NEXT: vcmpeqps 32(%rdi), %ymm1, %ymm1
807 ; AVX512-LABEL: commute_cmpps_eq_zmm:
809 ; AVX512-NEXT: vcmpeqps (%rdi), %zmm0, %k0
810 ; AVX512-NEXT: vpmovm2d %k0, %zmm0
812 %1 = load <16 x float>, <16 x float>* %a0
813 %2 = fcmp oeq <16 x float> %1, %a1
814 %3 = sext <16 x i1> %2 to <16 x i32>
818 define <16 x i32> @commute_cmpps_ne_zmm(<16 x float>* %a0, <16 x float> %a1) {
819 ; SSE-LABEL: commute_cmpps_ne_zmm:
821 ; SSE-NEXT: cmpneqps (%rdi), %xmm0
822 ; SSE-NEXT: cmpneqps 16(%rdi), %xmm1
823 ; SSE-NEXT: cmpneqps 32(%rdi), %xmm2
824 ; SSE-NEXT: cmpneqps 48(%rdi), %xmm3
827 ; AVX-LABEL: commute_cmpps_ne_zmm:
829 ; AVX-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0
830 ; AVX-NEXT: vcmpneqps 32(%rdi), %ymm1, %ymm1
833 ; AVX512-LABEL: commute_cmpps_ne_zmm:
835 ; AVX512-NEXT: vcmpneqps (%rdi), %zmm0, %k0
836 ; AVX512-NEXT: vpmovm2d %k0, %zmm0
838 %1 = load <16 x float>, <16 x float>* %a0
839 %2 = fcmp une <16 x float> %1, %a1
840 %3 = sext <16 x i1> %2 to <16 x i32>
844 define <16 x i32> @commute_cmpps_ord_zmm(<16 x float>* %a0, <16 x float> %a1) {
845 ; SSE-LABEL: commute_cmpps_ord_zmm:
847 ; SSE-NEXT: cmpordps (%rdi), %xmm0
848 ; SSE-NEXT: cmpordps 16(%rdi), %xmm1
849 ; SSE-NEXT: cmpordps 32(%rdi), %xmm2
850 ; SSE-NEXT: cmpordps 48(%rdi), %xmm3
853 ; AVX-LABEL: commute_cmpps_ord_zmm:
855 ; AVX-NEXT: vcmpordps (%rdi), %ymm0, %ymm0
856 ; AVX-NEXT: vcmpordps 32(%rdi), %ymm1, %ymm1
859 ; AVX512-LABEL: commute_cmpps_ord_zmm:
861 ; AVX512-NEXT: vcmpordps (%rdi), %zmm0, %k0
862 ; AVX512-NEXT: vpmovm2d %k0, %zmm0
864 %1 = load <16 x float>, <16 x float>* %a0
865 %2 = fcmp ord <16 x float> %1, %a1
866 %3 = sext <16 x i1> %2 to <16 x i32>
870 define <16 x i32> @commute_cmpps_uno_zmm(<16 x float>* %a0, <16 x float> %a1) {
871 ; SSE-LABEL: commute_cmpps_uno_zmm:
873 ; SSE-NEXT: cmpunordps (%rdi), %xmm0
874 ; SSE-NEXT: cmpunordps 16(%rdi), %xmm1
875 ; SSE-NEXT: cmpunordps 32(%rdi), %xmm2
876 ; SSE-NEXT: cmpunordps 48(%rdi), %xmm3
879 ; AVX-LABEL: commute_cmpps_uno_zmm:
881 ; AVX-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0
882 ; AVX-NEXT: vcmpunordps 32(%rdi), %ymm1, %ymm1
885 ; AVX512-LABEL: commute_cmpps_uno_zmm:
887 ; AVX512-NEXT: vcmpunordps (%rdi), %zmm0, %k0
888 ; AVX512-NEXT: vpmovm2d %k0, %zmm0
890 %1 = load <16 x float>, <16 x float>* %a0
891 %2 = fcmp uno <16 x float> %1, %a1
892 %3 = sext <16 x i1> %2 to <16 x i32>
896 define <16 x i32> @commute_cmpps_ueq_zmm(<16 x float>* %a0, <16 x float> %a1) {
897 ; SSE-LABEL: commute_cmpps_ueq_zmm:
899 ; SSE-NEXT: movaps (%rdi), %xmm7
900 ; SSE-NEXT: movaps 16(%rdi), %xmm5
901 ; SSE-NEXT: movaps 32(%rdi), %xmm6
902 ; SSE-NEXT: movaps 48(%rdi), %xmm8
903 ; SSE-NEXT: movaps %xmm7, %xmm4
904 ; SSE-NEXT: cmpeqps %xmm0, %xmm4
905 ; SSE-NEXT: cmpunordps %xmm7, %xmm0
906 ; SSE-NEXT: orps %xmm4, %xmm0
907 ; SSE-NEXT: movaps %xmm5, %xmm4
908 ; SSE-NEXT: cmpeqps %xmm1, %xmm4
909 ; SSE-NEXT: cmpunordps %xmm5, %xmm1
910 ; SSE-NEXT: orps %xmm4, %xmm1
911 ; SSE-NEXT: movaps %xmm6, %xmm4
912 ; SSE-NEXT: cmpeqps %xmm2, %xmm4
913 ; SSE-NEXT: cmpunordps %xmm6, %xmm2
914 ; SSE-NEXT: orps %xmm4, %xmm2
915 ; SSE-NEXT: movaps %xmm8, %xmm4
916 ; SSE-NEXT: cmpeqps %xmm3, %xmm4
917 ; SSE-NEXT: cmpunordps %xmm8, %xmm3
918 ; SSE-NEXT: orps %xmm4, %xmm3
921 ; AVX-LABEL: commute_cmpps_ueq_zmm:
923 ; AVX-NEXT: vcmpeq_uqps (%rdi), %ymm0, %ymm0
924 ; AVX-NEXT: vcmpeq_uqps 32(%rdi), %ymm1, %ymm1
927 ; AVX512-LABEL: commute_cmpps_ueq_zmm:
929 ; AVX512-NEXT: vcmpeq_uqps (%rdi), %zmm0, %k0
930 ; AVX512-NEXT: vpmovm2d %k0, %zmm0
932 %1 = load <16 x float>, <16 x float>* %a0
933 %2 = fcmp ueq <16 x float> %1, %a1
934 %3 = sext <16 x i1> %2 to <16 x i32>
938 define <16 x i32> @commute_cmpps_one_zmm(<16 x float>* %a0, <16 x float> %a1) {
939 ; SSE-LABEL: commute_cmpps_one_zmm:
941 ; SSE-NEXT: movaps (%rdi), %xmm7
942 ; SSE-NEXT: movaps 16(%rdi), %xmm5
943 ; SSE-NEXT: movaps 32(%rdi), %xmm6
944 ; SSE-NEXT: movaps 48(%rdi), %xmm8
945 ; SSE-NEXT: movaps %xmm7, %xmm4
946 ; SSE-NEXT: cmpneqps %xmm0, %xmm4
947 ; SSE-NEXT: cmpordps %xmm7, %xmm0
948 ; SSE-NEXT: andps %xmm4, %xmm0
949 ; SSE-NEXT: movaps %xmm5, %xmm4
950 ; SSE-NEXT: cmpneqps %xmm1, %xmm4
951 ; SSE-NEXT: cmpordps %xmm5, %xmm1
952 ; SSE-NEXT: andps %xmm4, %xmm1
953 ; SSE-NEXT: movaps %xmm6, %xmm4
954 ; SSE-NEXT: cmpneqps %xmm2, %xmm4
955 ; SSE-NEXT: cmpordps %xmm6, %xmm2
956 ; SSE-NEXT: andps %xmm4, %xmm2
957 ; SSE-NEXT: movaps %xmm8, %xmm4
958 ; SSE-NEXT: cmpneqps %xmm3, %xmm4
959 ; SSE-NEXT: cmpordps %xmm8, %xmm3
960 ; SSE-NEXT: andps %xmm4, %xmm3
963 ; AVX-LABEL: commute_cmpps_one_zmm:
965 ; AVX-NEXT: vcmpneq_oqps (%rdi), %ymm0, %ymm0
966 ; AVX-NEXT: vcmpneq_oqps 32(%rdi), %ymm1, %ymm1
969 ; AVX512-LABEL: commute_cmpps_one_zmm:
971 ; AVX512-NEXT: vcmpneq_oqps (%rdi), %zmm0, %k0
972 ; AVX512-NEXT: vpmovm2d %k0, %zmm0
974 %1 = load <16 x float>, <16 x float>* %a0
975 %2 = fcmp one <16 x float> %1, %a1
976 %3 = sext <16 x i1> %2 to <16 x i32>
980 define <16 x i32> @commute_cmpps_lt_zmm(<16 x float>* %a0, <16 x float> %a1) {
981 ; SSE-LABEL: commute_cmpps_lt_zmm:
983 ; SSE-NEXT: movaps (%rdi), %xmm4
984 ; SSE-NEXT: movaps 16(%rdi), %xmm5
985 ; SSE-NEXT: movaps 32(%rdi), %xmm6
986 ; SSE-NEXT: movaps 48(%rdi), %xmm7
987 ; SSE-NEXT: cmpltps %xmm0, %xmm4
988 ; SSE-NEXT: cmpltps %xmm1, %xmm5
989 ; SSE-NEXT: cmpltps %xmm2, %xmm6
990 ; SSE-NEXT: cmpltps %xmm3, %xmm7
991 ; SSE-NEXT: movaps %xmm4, %xmm0
992 ; SSE-NEXT: movaps %xmm5, %xmm1
993 ; SSE-NEXT: movaps %xmm6, %xmm2
994 ; SSE-NEXT: movaps %xmm7, %xmm3
997 ; AVX-LABEL: commute_cmpps_lt_zmm:
999 ; AVX-NEXT: vmovaps (%rdi), %ymm2
1000 ; AVX-NEXT: vmovaps 32(%rdi), %ymm3
1001 ; AVX-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
1002 ; AVX-NEXT: vcmpltps %ymm1, %ymm3, %ymm1
1005 ; AVX512-LABEL: commute_cmpps_lt_zmm:
1007 ; AVX512-NEXT: vcmpgtps (%rdi), %zmm0, %k0
1008 ; AVX512-NEXT: vpmovm2d %k0, %zmm0
1010 %1 = load <16 x float>, <16 x float>* %a0
1011 %2 = fcmp olt <16 x float> %1, %a1
1012 %3 = sext <16 x i1> %2 to <16 x i32>
1016 define <16 x i32> @commute_cmpps_le_zmm(<16 x float>* %a0, <16 x float> %a1) {
1017 ; SSE-LABEL: commute_cmpps_le_zmm:
1019 ; SSE-NEXT: movaps (%rdi), %xmm4
1020 ; SSE-NEXT: movaps 16(%rdi), %xmm5
1021 ; SSE-NEXT: movaps 32(%rdi), %xmm6
1022 ; SSE-NEXT: movaps 48(%rdi), %xmm7
1023 ; SSE-NEXT: cmpleps %xmm0, %xmm4
1024 ; SSE-NEXT: cmpleps %xmm1, %xmm5
1025 ; SSE-NEXT: cmpleps %xmm2, %xmm6
1026 ; SSE-NEXT: cmpleps %xmm3, %xmm7
1027 ; SSE-NEXT: movaps %xmm4, %xmm0
1028 ; SSE-NEXT: movaps %xmm5, %xmm1
1029 ; SSE-NEXT: movaps %xmm6, %xmm2
1030 ; SSE-NEXT: movaps %xmm7, %xmm3
1033 ; AVX-LABEL: commute_cmpps_le_zmm:
1035 ; AVX-NEXT: vmovaps (%rdi), %ymm2
1036 ; AVX-NEXT: vmovaps 32(%rdi), %ymm3
1037 ; AVX-NEXT: vcmpleps %ymm0, %ymm2, %ymm0
1038 ; AVX-NEXT: vcmpleps %ymm1, %ymm3, %ymm1
1041 ; AVX512-LABEL: commute_cmpps_le_zmm:
1043 ; AVX512-NEXT: vcmpgeps (%rdi), %zmm0, %k0
1044 ; AVX512-NEXT: vpmovm2d %k0, %zmm0
1046 %1 = load <16 x float>, <16 x float>* %a0
1047 %2 = fcmp ole <16 x float> %1, %a1
1048 %3 = sext <16 x i1> %2 to <16 x i32>
1052 define <8 x i64> @commute_cmppd_eq_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1053 ; SSE-LABEL: commute_cmppd_eq_zmmm:
1055 ; SSE-NEXT: cmpeqpd (%rdi), %xmm0
1056 ; SSE-NEXT: cmpeqpd 16(%rdi), %xmm1
1057 ; SSE-NEXT: cmpeqpd 32(%rdi), %xmm2
1058 ; SSE-NEXT: cmpeqpd 48(%rdi), %xmm3
1061 ; AVX-LABEL: commute_cmppd_eq_zmmm:
1063 ; AVX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
1064 ; AVX-NEXT: vcmpeqpd 32(%rdi), %ymm1, %ymm1
1067 ; AVX512-LABEL: commute_cmppd_eq_zmmm:
1069 ; AVX512-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
1070 ; AVX512-NEXT: vpmovm2q %k0, %zmm0
1072 %1 = load <8 x double>, <8 x double>* %a0
1073 %2 = fcmp oeq <8 x double> %1, %a1
1074 %3 = sext <8 x i1> %2 to <8 x i64>
1078 define <8 x i64> @commute_cmppd_ne_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1079 ; SSE-LABEL: commute_cmppd_ne_zmmm:
1081 ; SSE-NEXT: cmpneqpd (%rdi), %xmm0
1082 ; SSE-NEXT: cmpneqpd 16(%rdi), %xmm1
1083 ; SSE-NEXT: cmpneqpd 32(%rdi), %xmm2
1084 ; SSE-NEXT: cmpneqpd 48(%rdi), %xmm3
1087 ; AVX-LABEL: commute_cmppd_ne_zmmm:
1089 ; AVX-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0
1090 ; AVX-NEXT: vcmpneqpd 32(%rdi), %ymm1, %ymm1
1093 ; AVX512-LABEL: commute_cmppd_ne_zmmm:
1095 ; AVX512-NEXT: vcmpneqpd (%rdi), %zmm0, %k0
1096 ; AVX512-NEXT: vpmovm2q %k0, %zmm0
1098 %1 = load <8 x double>, <8 x double>* %a0
1099 %2 = fcmp une <8 x double> %1, %a1
1100 %3 = sext <8 x i1> %2 to <8 x i64>
1104 define <8 x i64> @commute_cmppd_ord_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1105 ; SSE-LABEL: commute_cmppd_ord_zmmm:
1107 ; SSE-NEXT: cmpordpd (%rdi), %xmm0
1108 ; SSE-NEXT: cmpordpd 16(%rdi), %xmm1
1109 ; SSE-NEXT: cmpordpd 32(%rdi), %xmm2
1110 ; SSE-NEXT: cmpordpd 48(%rdi), %xmm3
1113 ; AVX-LABEL: commute_cmppd_ord_zmmm:
1115 ; AVX-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0
1116 ; AVX-NEXT: vcmpordpd 32(%rdi), %ymm1, %ymm1
1119 ; AVX512-LABEL: commute_cmppd_ord_zmmm:
1121 ; AVX512-NEXT: vcmpordpd (%rdi), %zmm0, %k0
1122 ; AVX512-NEXT: vpmovm2q %k0, %zmm0
1124 %1 = load <8 x double>, <8 x double>* %a0
1125 %2 = fcmp ord <8 x double> %1, %a1
1126 %3 = sext <8 x i1> %2 to <8 x i64>
1130 define <8 x i64> @commute_cmppd_uno_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1131 ; SSE-LABEL: commute_cmppd_uno_zmmm:
1133 ; SSE-NEXT: cmpunordpd (%rdi), %xmm0
1134 ; SSE-NEXT: cmpunordpd 16(%rdi), %xmm1
1135 ; SSE-NEXT: cmpunordpd 32(%rdi), %xmm2
1136 ; SSE-NEXT: cmpunordpd 48(%rdi), %xmm3
1139 ; AVX-LABEL: commute_cmppd_uno_zmmm:
1141 ; AVX-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0
1142 ; AVX-NEXT: vcmpunordpd 32(%rdi), %ymm1, %ymm1
1145 ; AVX512-LABEL: commute_cmppd_uno_zmmm:
1147 ; AVX512-NEXT: vcmpunordpd (%rdi), %zmm0, %k0
1148 ; AVX512-NEXT: vpmovm2q %k0, %zmm0
1150 %1 = load <8 x double>, <8 x double>* %a0
1151 %2 = fcmp uno <8 x double> %1, %a1
1152 %3 = sext <8 x i1> %2 to <8 x i64>
1156 define <8 x i64> @commute_cmppd_ueq_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1157 ; SSE-LABEL: commute_cmppd_ueq_zmmm:
1159 ; SSE-NEXT: movapd (%rdi), %xmm7
1160 ; SSE-NEXT: movapd 16(%rdi), %xmm5
1161 ; SSE-NEXT: movapd 32(%rdi), %xmm6
1162 ; SSE-NEXT: movapd 48(%rdi), %xmm8
1163 ; SSE-NEXT: movapd %xmm7, %xmm4
1164 ; SSE-NEXT: cmpeqpd %xmm0, %xmm4
1165 ; SSE-NEXT: cmpunordpd %xmm7, %xmm0
1166 ; SSE-NEXT: orpd %xmm4, %xmm0
1167 ; SSE-NEXT: movapd %xmm5, %xmm4
1168 ; SSE-NEXT: cmpeqpd %xmm1, %xmm4
1169 ; SSE-NEXT: cmpunordpd %xmm5, %xmm1
1170 ; SSE-NEXT: orpd %xmm4, %xmm1
1171 ; SSE-NEXT: movapd %xmm6, %xmm4
1172 ; SSE-NEXT: cmpeqpd %xmm2, %xmm4
1173 ; SSE-NEXT: cmpunordpd %xmm6, %xmm2
1174 ; SSE-NEXT: orpd %xmm4, %xmm2
1175 ; SSE-NEXT: movapd %xmm8, %xmm4
1176 ; SSE-NEXT: cmpeqpd %xmm3, %xmm4
1177 ; SSE-NEXT: cmpunordpd %xmm8, %xmm3
1178 ; SSE-NEXT: orpd %xmm4, %xmm3
1181 ; AVX-LABEL: commute_cmppd_ueq_zmmm:
1183 ; AVX-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %ymm0
1184 ; AVX-NEXT: vcmpeq_uqpd 32(%rdi), %ymm1, %ymm1
1187 ; AVX512-LABEL: commute_cmppd_ueq_zmmm:
1189 ; AVX512-NEXT: vcmpeq_uqpd (%rdi), %zmm0, %k0
1190 ; AVX512-NEXT: vpmovm2q %k0, %zmm0
1192 %1 = load <8 x double>, <8 x double>* %a0
1193 %2 = fcmp ueq <8 x double> %1, %a1
1194 %3 = sext <8 x i1> %2 to <8 x i64>
1198 define <8 x i64> @commute_cmppd_one_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1199 ; SSE-LABEL: commute_cmppd_one_zmmm:
1201 ; SSE-NEXT: movapd (%rdi), %xmm7
1202 ; SSE-NEXT: movapd 16(%rdi), %xmm5
1203 ; SSE-NEXT: movapd 32(%rdi), %xmm6
1204 ; SSE-NEXT: movapd 48(%rdi), %xmm8
1205 ; SSE-NEXT: movapd %xmm7, %xmm4
1206 ; SSE-NEXT: cmpneqpd %xmm0, %xmm4
1207 ; SSE-NEXT: cmpordpd %xmm7, %xmm0
1208 ; SSE-NEXT: andpd %xmm4, %xmm0
1209 ; SSE-NEXT: movapd %xmm5, %xmm4
1210 ; SSE-NEXT: cmpneqpd %xmm1, %xmm4
1211 ; SSE-NEXT: cmpordpd %xmm5, %xmm1
1212 ; SSE-NEXT: andpd %xmm4, %xmm1
1213 ; SSE-NEXT: movapd %xmm6, %xmm4
1214 ; SSE-NEXT: cmpneqpd %xmm2, %xmm4
1215 ; SSE-NEXT: cmpordpd %xmm6, %xmm2
1216 ; SSE-NEXT: andpd %xmm4, %xmm2
1217 ; SSE-NEXT: movapd %xmm8, %xmm4
1218 ; SSE-NEXT: cmpneqpd %xmm3, %xmm4
1219 ; SSE-NEXT: cmpordpd %xmm8, %xmm3
1220 ; SSE-NEXT: andpd %xmm4, %xmm3
1223 ; AVX-LABEL: commute_cmppd_one_zmmm:
1225 ; AVX-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %ymm0
1226 ; AVX-NEXT: vcmpneq_oqpd 32(%rdi), %ymm1, %ymm1
1229 ; AVX512-LABEL: commute_cmppd_one_zmmm:
1231 ; AVX512-NEXT: vcmpneq_oqpd (%rdi), %zmm0, %k0
1232 ; AVX512-NEXT: vpmovm2q %k0, %zmm0
1234 %1 = load <8 x double>, <8 x double>* %a0
1235 %2 = fcmp one <8 x double> %1, %a1
1236 %3 = sext <8 x i1> %2 to <8 x i64>
1240 define <8 x i64> @commute_cmppd_lt_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1241 ; SSE-LABEL: commute_cmppd_lt_zmmm:
1243 ; SSE-NEXT: movapd (%rdi), %xmm4
1244 ; SSE-NEXT: movapd 16(%rdi), %xmm5
1245 ; SSE-NEXT: movapd 32(%rdi), %xmm6
1246 ; SSE-NEXT: movapd 48(%rdi), %xmm7
1247 ; SSE-NEXT: cmpltpd %xmm0, %xmm4
1248 ; SSE-NEXT: cmpltpd %xmm1, %xmm5
1249 ; SSE-NEXT: cmpltpd %xmm2, %xmm6
1250 ; SSE-NEXT: cmpltpd %xmm3, %xmm7
1251 ; SSE-NEXT: movapd %xmm4, %xmm0
1252 ; SSE-NEXT: movapd %xmm5, %xmm1
1253 ; SSE-NEXT: movapd %xmm6, %xmm2
1254 ; SSE-NEXT: movapd %xmm7, %xmm3
1257 ; AVX-LABEL: commute_cmppd_lt_zmmm:
1259 ; AVX-NEXT: vmovapd (%rdi), %ymm2
1260 ; AVX-NEXT: vmovapd 32(%rdi), %ymm3
1261 ; AVX-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
1262 ; AVX-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
1265 ; AVX512-LABEL: commute_cmppd_lt_zmmm:
1267 ; AVX512-NEXT: vcmpgtpd (%rdi), %zmm0, %k0
1268 ; AVX512-NEXT: vpmovm2q %k0, %zmm0
1270 %1 = load <8 x double>, <8 x double>* %a0
1271 %2 = fcmp olt <8 x double> %1, %a1
1272 %3 = sext <8 x i1> %2 to <8 x i64>
1276 define <8 x i64> @commute_cmppd_le_zmmm(<8 x double>* %a0, <8 x double> %a1) {
1277 ; SSE-LABEL: commute_cmppd_le_zmmm:
1279 ; SSE-NEXT: movapd (%rdi), %xmm4
1280 ; SSE-NEXT: movapd 16(%rdi), %xmm5
1281 ; SSE-NEXT: movapd 32(%rdi), %xmm6
1282 ; SSE-NEXT: movapd 48(%rdi), %xmm7
1283 ; SSE-NEXT: cmplepd %xmm0, %xmm4
1284 ; SSE-NEXT: cmplepd %xmm1, %xmm5
1285 ; SSE-NEXT: cmplepd %xmm2, %xmm6
1286 ; SSE-NEXT: cmplepd %xmm3, %xmm7
1287 ; SSE-NEXT: movapd %xmm4, %xmm0
1288 ; SSE-NEXT: movapd %xmm5, %xmm1
1289 ; SSE-NEXT: movapd %xmm6, %xmm2
1290 ; SSE-NEXT: movapd %xmm7, %xmm3
1293 ; AVX-LABEL: commute_cmppd_le_zmmm:
1295 ; AVX-NEXT: vmovapd (%rdi), %ymm2
1296 ; AVX-NEXT: vmovapd 32(%rdi), %ymm3
1297 ; AVX-NEXT: vcmplepd %ymm0, %ymm2, %ymm0
1298 ; AVX-NEXT: vcmplepd %ymm1, %ymm3, %ymm1
1301 ; AVX512-LABEL: commute_cmppd_le_zmmm:
1303 ; AVX512-NEXT: vcmpgepd (%rdi), %zmm0, %k0
1304 ; AVX512-NEXT: vpmovm2q %k0, %zmm0
1306 %1 = load <8 x double>, <8 x double>* %a0
1307 %2 = fcmp ole <8 x double> %1, %a1
1308 %3 = sext <8 x i1> %2 to <8 x i64>