1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=KNL
3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
4 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
6 define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
9 ; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
10 ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
11 ; CHECK-NEXT: retq ## encoding: [0xc3]
12 %mask = fcmp ole <16 x float> %x, %y
13 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
17 define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
20 ; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
21 ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
22 ; CHECK-NEXT: retq ## encoding: [0xc3]
23 %mask = fcmp ole <8 x double> %x, %y
24 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
28 define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
31 ; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
32 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
33 ; CHECK-NEXT: retq ## encoding: [0xc3]
34 %y = load <16 x i32>, <16 x i32>* %yp, align 4
35 %mask = icmp eq <16 x i32> %x, %y
36 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
40 define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
41 ; CHECK-LABEL: test4_unsigned:
43 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
44 ; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
45 ; CHECK-NEXT: retq ## encoding: [0xc3]
46 %mask = icmp uge <16 x i32> %x, %y
47 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
51 define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
54 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
55 ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
56 ; CHECK-NEXT: retq ## encoding: [0xc3]
57 %mask = icmp eq <8 x i64> %x, %y
58 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
62 define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
63 ; CHECK-LABEL: test6_unsigned:
65 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
66 ; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
67 ; CHECK-NEXT: retq ## encoding: [0xc3]
68 %mask = icmp ugt <8 x i64> %x, %y
69 %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
73 define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
74 ; AVX512-LABEL: test7:
76 ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
77 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
78 ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
79 ; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
80 ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
81 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
82 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
83 ; AVX512-NEXT: retq ## encoding: [0xc3]
87 ; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x57,0xd2]
88 ; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xca,0x01]
89 ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
90 ; SKX-NEXT: retq ## encoding: [0xc3]
92 %mask = fcmp olt <4 x float> %a, zeroinitializer
93 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
97 define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
98 ; AVX512-LABEL: test8:
100 ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
101 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
102 ; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x57,0xd2]
103 ; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
104 ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
105 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
106 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
107 ; AVX512-NEXT: retq ## encoding: [0xc3]
111 ; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x57,0xd2]
112 ; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xca,0x01]
113 ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
114 ; SKX-NEXT: retq ## encoding: [0xc3]
115 %mask = fcmp olt <2 x double> %a, zeroinitializer
116 %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
120 define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
121 ; AVX512-LABEL: test9:
123 ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
124 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
125 ; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
126 ; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
127 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
128 ; AVX512-NEXT: retq ## encoding: [0xc3]
132 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc9]
133 ; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x64,0xc0]
134 ; SKX-NEXT: retq ## encoding: [0xc3]
135 %mask = icmp eq <8 x i32> %x, %y
136 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
140 define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
141 ; AVX512-LABEL: test10:
143 ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
144 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
145 ; AVX512-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x00]
146 ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
147 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
148 ; AVX512-NEXT: retq ## encoding: [0xc3]
152 ; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x00]
153 ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
154 ; SKX-NEXT: retq ## encoding: [0xc3]
156 %mask = fcmp oeq <8 x float> %x, %y
157 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
161 define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
162 ; AVX512-LABEL: test11_unsigned:
164 ; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
165 ; AVX512-NEXT: retq ## encoding: [0xc3]
167 ; SKX-LABEL: test11_unsigned:
169 ; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
170 ; SKX-NEXT: retq ## encoding: [0xc3]
171 %mask = icmp ugt <8 x i32> %x, %y
172 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
176 define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
179 ; KNL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc2]
180 ; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x29,0xcb]
181 ; KNL-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
182 ; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
183 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
184 ; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
185 ; KNL-NEXT: retq ## encoding: [0xc3]
187 ; AVX512BW-LABEL: test12:
188 ; AVX512BW: ## %bb.0:
189 ; AVX512BW-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc2]
190 ; AVX512BW-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x29,0xcb]
191 ; AVX512BW-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
192 ; AVX512BW-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
193 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
194 ; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
195 ; AVX512BW-NEXT: retq ## encoding: [0xc3]
199 ; SKX-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc2]
200 ; SKX-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x29,0xcb]
201 ; SKX-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
202 ; SKX-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
203 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
204 ; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
205 ; SKX-NEXT: retq ## encoding: [0xc3]
206 %res = icmp eq <16 x i64> %a, %b
207 %res1 = bitcast <16 x i1> %res to i16
211 define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
212 ; KNL-LABEL: test12_v32i32:
214 ; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc2]
215 ; KNL-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
216 ; KNL-NEXT: vpcmpeqd %zmm3, %zmm1, %k0 ## encoding: [0x62,0xf1,0x75,0x48,0x76,0xc3]
217 ; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
218 ; KNL-NEXT: shll $16, %eax ## encoding: [0xc1,0xe0,0x10]
219 ; KNL-NEXT: orl %ecx, %eax ## encoding: [0x09,0xc8]
220 ; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
221 ; KNL-NEXT: retq ## encoding: [0xc3]
223 ; AVX512BW-LABEL: test12_v32i32:
224 ; AVX512BW: ## %bb.0:
225 ; AVX512BW-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc2]
226 ; AVX512BW-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x76,0xcb]
227 ; AVX512BW-NEXT: kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
228 ; AVX512BW-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
229 ; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
230 ; AVX512BW-NEXT: retq ## encoding: [0xc3]
232 ; SKX-LABEL: test12_v32i32:
234 ; SKX-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc2]
235 ; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x76,0xcb]
236 ; SKX-NEXT: kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
237 ; SKX-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
238 ; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
239 ; SKX-NEXT: retq ## encoding: [0xc3]
240 %res = icmp eq <32 x i32> %a, %b
241 %res1 = bitcast <32 x i1> %res to i32
245 define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
246 ; KNL-LABEL: test12_v64i16:
248 ; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm4 ## encoding: [0xc5,0xfd,0x75,0xe2]
249 ; KNL-NEXT: vpmovsxwd %ymm4, %zmm4 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xe4]
250 ; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0 ## encoding: [0x62,0xf2,0x5d,0x48,0x27,0xc4]
251 ; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
252 ; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xd2,0x01]
253 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc0,0x01]
254 ; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x75,0xc2]
255 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
256 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
257 ; KNL-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
258 ; KNL-NEXT: shll $16, %ecx ## encoding: [0xc1,0xe1,0x10]
259 ; KNL-NEXT: orl %eax, %ecx ## encoding: [0x09,0xc1]
260 ; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x75,0xc3]
261 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
262 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
263 ; KNL-NEXT: kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
264 ; KNL-NEXT: vextracti64x4 $1, %zmm3, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xd8,0x01]
265 ; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc9,0x01]
266 ; KNL-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x75,0xc0]
267 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
268 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
269 ; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
270 ; KNL-NEXT: shll $16, %eax ## encoding: [0xc1,0xe0,0x10]
271 ; KNL-NEXT: orl %edx, %eax ## encoding: [0x09,0xd0]
272 ; KNL-NEXT: shlq $32, %rax ## encoding: [0x48,0xc1,0xe0,0x20]
273 ; KNL-NEXT: orq %rcx, %rax ## encoding: [0x48,0x09,0xc8]
274 ; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
275 ; KNL-NEXT: retq ## encoding: [0xc3]
277 ; AVX512BW-LABEL: test12_v64i16:
278 ; AVX512BW: ## %bb.0:
279 ; AVX512BW-NEXT: vpcmpeqw %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc2]
280 ; AVX512BW-NEXT: vpcmpeqw %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x75,0xcb]
281 ; AVX512BW-NEXT: kunpckdq %k0, %k1, %k0 ## encoding: [0xc4,0xe1,0xf4,0x4b,0xc0]
282 ; AVX512BW-NEXT: kmovq %k0, %rax ## encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
283 ; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
284 ; AVX512BW-NEXT: retq ## encoding: [0xc3]
286 ; SKX-LABEL: test12_v64i16:
288 ; SKX-NEXT: vpcmpeqw %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc2]
289 ; SKX-NEXT: vpcmpeqw %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x75,0xcb]
290 ; SKX-NEXT: kunpckdq %k0, %k1, %k0 ## encoding: [0xc4,0xe1,0xf4,0x4b,0xc0]
291 ; SKX-NEXT: kmovq %k0, %rax ## encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
292 ; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
293 ; SKX-NEXT: retq ## encoding: [0xc3]
294 %res = icmp eq <64 x i16> %a, %b
295 %res1 = bitcast <64 x i1> %res to i64
299 define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
300 ; AVX512-LABEL: test13:
302 ; AVX512-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x00]
303 ; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
304 ; AVX512-NEXT: vpsrld $31, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x1f]
305 ; AVX512-NEXT: retq ## encoding: [0xc3]
309 ; SKX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc1,0x00]
310 ; SKX-NEXT: vpmovm2d %k0, %zmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x38,0xc0]
311 ; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x1f]
312 ; SKX-NEXT: retq ## encoding: [0xc3]
314 %cmpvector_i = fcmp oeq <16 x float> %a, %b
315 %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
319 define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
320 ; CHECK-LABEL: test14:
322 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
323 ; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
324 ; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
325 ; CHECK-NEXT: retq ## encoding: [0xc3]
326 %sub_r = sub <16 x i32> %a, %b
327 %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
328 %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
329 %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
330 %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
334 define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
335 ; CHECK-LABEL: test15:
337 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
338 ; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
339 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
340 ; CHECK-NEXT: retq ## encoding: [0xc3]
341 %sub_r = sub <8 x i64> %a, %b
342 %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
343 %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
344 %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
345 %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
349 define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
350 ; CHECK-LABEL: test16:
352 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
353 ; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
354 ; CHECK-NEXT: retq ## encoding: [0xc3]
355 %mask = icmp sge <16 x i32> %x, %y
356 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
360 define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
361 ; CHECK-LABEL: test17:
363 ; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
364 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
365 ; CHECK-NEXT: retq ## encoding: [0xc3]
366 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
367 %mask = icmp sgt <16 x i32> %x, %y
368 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
372 define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
373 ; CHECK-LABEL: test18:
375 ; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
376 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
377 ; CHECK-NEXT: retq ## encoding: [0xc3]
378 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
379 %mask = icmp sle <16 x i32> %x, %y
380 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
384 define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
385 ; CHECK-LABEL: test19:
387 ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
388 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
389 ; CHECK-NEXT: retq ## encoding: [0xc3]
390 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
391 %mask = icmp ule <16 x i32> %x, %y
392 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
396 define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
397 ; CHECK-LABEL: test20:
399 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
400 ; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
401 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
402 ; CHECK-NEXT: retq ## encoding: [0xc3]
403 %mask1 = icmp eq <16 x i32> %x1, %y1
404 %mask0 = icmp eq <16 x i32> %x, %y
405 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
406 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
410 define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
411 ; CHECK-LABEL: test21:
413 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
414 ; CHECK-NEXT: vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
415 ; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
416 ; CHECK-NEXT: retq ## encoding: [0xc3]
417 %mask1 = icmp sge <8 x i64> %x1, %y1
418 %mask0 = icmp sle <8 x i64> %x, %y
419 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
420 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
424 define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
425 ; CHECK-LABEL: test22:
427 ; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
428 ; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
429 ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
430 ; CHECK-NEXT: retq ## encoding: [0xc3]
431 %mask1 = icmp sgt <8 x i64> %x1, %y1
432 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
433 %mask0 = icmp sgt <8 x i64> %x, %y
434 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
435 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
439 define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
440 ; CHECK-LABEL: test23:
442 ; CHECK-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
443 ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
444 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
445 ; CHECK-NEXT: retq ## encoding: [0xc3]
446 %mask1 = icmp sge <16 x i32> %x1, %y1
447 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
448 %mask0 = icmp ule <16 x i32> %x, %y
449 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
450 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
454 define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
455 ; CHECK-LABEL: test24:
457 ; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
458 ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
459 ; CHECK-NEXT: retq ## encoding: [0xc3]
460 %yb = load i64, i64* %yb.ptr, align 4
461 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
462 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
463 %mask = icmp eq <8 x i64> %x, %y
464 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
468 define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
469 ; CHECK-LABEL: test25:
471 ; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
472 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
473 ; CHECK-NEXT: retq ## encoding: [0xc3]
474 %yb = load i32, i32* %yb.ptr, align 4
475 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
476 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
477 %mask = icmp sle <16 x i32> %x, %y
478 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
482 define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
483 ; CHECK-LABEL: test26:
485 ; CHECK-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
486 ; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
487 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
488 ; CHECK-NEXT: retq ## encoding: [0xc3]
489 %mask1 = icmp sge <16 x i32> %x1, %y1
490 %yb = load i32, i32* %yb.ptr, align 4
491 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
492 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
493 %mask0 = icmp sgt <16 x i32> %x, %y
494 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
495 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
499 define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
500 ; CHECK-LABEL: test27:
502 ; CHECK-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
503 ; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
504 ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
505 ; CHECK-NEXT: retq ## encoding: [0xc3]
506 %mask1 = icmp sge <8 x i64> %x1, %y1
507 %yb = load i64, i64* %yb.ptr, align 4
508 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
509 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
510 %mask0 = icmp sle <8 x i64> %x, %y
511 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
512 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
516 define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
517 ; AVX512-LABEL: test28:
519 ; AVX512-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
520 ; AVX512-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x48,0x37,0xcb]
521 ; AVX512-NEXT: kxnorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc9]
522 ; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
523 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
524 ; AVX512-NEXT: retq ## encoding: [0xc3]
528 ; SKX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
529 ; SKX-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x48,0x37,0xcb]
530 ; SKX-NEXT: kxnorb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x46,0xc1]
531 ; SKX-NEXT: vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
532 ; SKX-NEXT: retq ## encoding: [0xc3]
533 %x_gt_y = icmp sgt <8 x i64> %x, %y
534 %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
535 %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
536 %resse = sext <8 x i1>%res to <8 x i32>
540 define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
543 ; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
544 ; KNL-NEXT: vpcmpgtd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x66,0xcb]
545 ; KNL-NEXT: kxorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc9]
546 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
547 ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
548 ; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
549 ; KNL-NEXT: retq ## encoding: [0xc3]
551 ; AVX512BW-LABEL: test29:
552 ; AVX512BW: ## %bb.0:
553 ; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
554 ; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x66,0xcb]
555 ; AVX512BW-NEXT: kxorw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x47,0xc1]
556 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
557 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
558 ; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
559 ; AVX512BW-NEXT: retq ## encoding: [0xc3]
563 ; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
564 ; SKX-NEXT: vpcmpgtd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x66,0xcb]
565 ; SKX-NEXT: kxorw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x47,0xc1]
566 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
567 ; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
568 ; SKX-NEXT: retq ## encoding: [0xc3]
569 %x_gt_y = icmp sgt <16 x i32> %x, %y
570 %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
571 %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
572 %resse = sext <16 x i1>%res to <16 x i8>
576 define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
577 ; AVX512-LABEL: test30:
579 ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
580 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
581 ; AVX512-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x00]
582 ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
583 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
584 ; AVX512-NEXT: retq ## encoding: [0xc3]
588 ; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc9,0x00]
589 ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
590 ; SKX-NEXT: retq ## encoding: [0xc3]
592 %mask = fcmp oeq <4 x double> %x, %y
593 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
594 ret <4 x double> %max
597 define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
598 ; AVX512-LABEL: test31:
600 ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
601 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
602 ; AVX512-NEXT: vmovupd (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x10,0x17]
603 ; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
604 ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
605 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
606 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
607 ; AVX512-NEXT: retq ## encoding: [0xc3]
611 ; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x01]
612 ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
613 ; SKX-NEXT: retq ## encoding: [0xc3]
615 %y = load <2 x double>, <2 x double>* %yp, align 4
616 %mask = fcmp olt <2 x double> %x, %y
617 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
618 ret <2 x double> %max
621 define <2 x double> @test31_commute(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
622 ; AVX512-LABEL: test31_commute:
624 ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
625 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
626 ; AVX512-NEXT: vmovupd (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x10,0x17]
627 ; AVX512-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
628 ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
629 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
630 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
631 ; AVX512-NEXT: retq ## encoding: [0xc3]
633 ; SKX-LABEL: test31_commute:
635 ; SKX-NEXT: vcmpgtpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x0e]
636 ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
637 ; SKX-NEXT: retq ## encoding: [0xc3]
639 %y = load <2 x double>, <2 x double>* %yp, align 4
640 %mask = fcmp olt <2 x double> %y, %x
641 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
642 ret <2 x double> %max
645 define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
646 ; AVX512-LABEL: test32:
648 ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
649 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
650 ; AVX512-NEXT: vmovupd (%rdi), %ymm2 ## encoding: [0xc5,0xfd,0x10,0x17]
651 ; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
652 ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
653 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
654 ; AVX512-NEXT: retq ## encoding: [0xc3]
658 ; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x01]
659 ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
660 ; SKX-NEXT: retq ## encoding: [0xc3]
662 %y = load <4 x double>, <4 x double>* %yp, align 4
663 %mask = fcmp ogt <4 x double> %y, %x
664 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
665 ret <4 x double> %max
668 define <4 x double> @test32_commute(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
669 ; AVX512-LABEL: test32_commute:
671 ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
672 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
673 ; AVX512-NEXT: vmovupd (%rdi), %ymm2 ## encoding: [0xc5,0xfd,0x10,0x17]
674 ; AVX512-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
675 ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
676 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
677 ; AVX512-NEXT: retq ## encoding: [0xc3]
679 ; SKX-LABEL: test32_commute:
681 ; SKX-NEXT: vcmpgtpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x0e]
682 ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
683 ; SKX-NEXT: retq ## encoding: [0xc3]
685 %y = load <4 x double>, <4 x double>* %yp, align 4
686 %mask = fcmp ogt <4 x double> %x, %y
687 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
688 ret <4 x double> %max
691 define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
692 ; CHECK-LABEL: test33:
694 ; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
695 ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
696 ; CHECK-NEXT: retq ## encoding: [0xc3]
697 %y = load <8 x double>, <8 x double>* %yp, align 4
698 %mask = fcmp olt <8 x double> %x, %y
699 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
700 ret <8 x double> %max
703 define <8 x double> @test33_commute(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
704 ; CHECK-LABEL: test33_commute:
706 ; CHECK-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
707 ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
708 ; CHECK-NEXT: retq ## encoding: [0xc3]
709 %y = load <8 x double>, <8 x double>* %yp, align 4
710 %mask = fcmp olt <8 x double> %y, %x
711 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
712 ret <8 x double> %max
715 define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
716 ; AVX512-LABEL: test34:
718 ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
719 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
720 ; AVX512-NEXT: vmovups (%rdi), %xmm2 ## encoding: [0xc5,0xf8,0x10,0x17]
721 ; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
722 ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
723 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
724 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
725 ; AVX512-NEXT: retq ## encoding: [0xc3]
729 ; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x01]
730 ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
731 ; SKX-NEXT: retq ## encoding: [0xc3]
732 %y = load <4 x float>, <4 x float>* %yp, align 4
733 %mask = fcmp olt <4 x float> %x, %y
734 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
738 define <4 x float> @test34_commute(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
739 ; AVX512-LABEL: test34_commute:
741 ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
742 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
743 ; AVX512-NEXT: vmovups (%rdi), %xmm2 ## encoding: [0xc5,0xf8,0x10,0x17]
744 ; AVX512-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
745 ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
746 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
747 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
748 ; AVX512-NEXT: retq ## encoding: [0xc3]
750 ; SKX-LABEL: test34_commute:
752 ; SKX-NEXT: vcmpgtps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x0e]
753 ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
754 ; SKX-NEXT: retq ## encoding: [0xc3]
755 %y = load <4 x float>, <4 x float>* %yp, align 4
756 %mask = fcmp olt <4 x float> %y, %x
757 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
761 define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
762 ; AVX512-LABEL: test35:
764 ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
765 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
766 ; AVX512-NEXT: vmovups (%rdi), %ymm2 ## encoding: [0xc5,0xfc,0x10,0x17]
767 ; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
768 ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
769 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
770 ; AVX512-NEXT: retq ## encoding: [0xc3]
774 ; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x01]
775 ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
776 ; SKX-NEXT: retq ## encoding: [0xc3]
778 %y = load <8 x float>, <8 x float>* %yp, align 4
779 %mask = fcmp ogt <8 x float> %y, %x
780 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
784 define <8 x float> @test35_commute(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
785 ; AVX512-LABEL: test35_commute:
787 ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
788 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
789 ; AVX512-NEXT: vmovups (%rdi), %ymm2 ## encoding: [0xc5,0xfc,0x10,0x17]
790 ; AVX512-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
791 ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
792 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
793 ; AVX512-NEXT: retq ## encoding: [0xc3]
795 ; SKX-LABEL: test35_commute:
797 ; SKX-NEXT: vcmpgtps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x0e]
798 ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
799 ; SKX-NEXT: retq ## encoding: [0xc3]
801 %y = load <8 x float>, <8 x float>* %yp, align 4
802 %mask = fcmp ogt <8 x float> %x, %y
803 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
807 define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
808 ; CHECK-LABEL: test36:
810 ; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
811 ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
812 ; CHECK-NEXT: retq ## encoding: [0xc3]
813 %y = load <16 x float>, <16 x float>* %yp, align 4
814 %mask = fcmp olt <16 x float> %x, %y
815 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
816 ret <16 x float> %max
819 define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
820 ; CHECK-LABEL: test36_commute:
822 ; CHECK-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
823 ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
824 ; CHECK-NEXT: retq ## encoding: [0xc3]
825 %y = load <16 x float>, <16 x float>* %yp, align 4
826 %mask = fcmp olt <16 x float> %y, %x
827 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
828 ret <16 x float> %max
831 define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
832 ; CHECK-LABEL: test37:
834 ; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
835 ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
836 ; CHECK-NEXT: retq ## encoding: [0xc3]
838 %a = load double, double* %ptr
839 %v = insertelement <8 x double> undef, double %a, i32 0
840 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
842 %mask = fcmp ogt <8 x double> %shuffle, %x
843 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
844 ret <8 x double> %max
847 define <8 x double> @test37_commute(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
848 ; CHECK-LABEL: test37_commute:
850 ; CHECK-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
851 ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
852 ; CHECK-NEXT: retq ## encoding: [0xc3]
854 %a = load double, double* %ptr
855 %v = insertelement <8 x double> undef, double %a, i32 0
856 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
858 %mask = fcmp ogt <8 x double> %x, %shuffle
859 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
860 ret <8 x double> %max
863 define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
864 ; AVX512-LABEL: test38:
866 ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
867 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
868 ; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
869 ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
870 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
871 ; AVX512-NEXT: retq ## encoding: [0xc3]
875 ; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x01]
876 ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
877 ; SKX-NEXT: retq ## encoding: [0xc3]
879 %a = load double, double* %ptr
880 %v = insertelement <4 x double> undef, double %a, i32 0
881 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
883 %mask = fcmp ogt <4 x double> %shuffle, %x
884 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
885 ret <4 x double> %max
888 define <4 x double> @test38_commute(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
889 ; AVX512-LABEL: test38_commute:
891 ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
892 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
893 ; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
894 ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
895 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
896 ; AVX512-NEXT: retq ## encoding: [0xc3]
898 ; SKX-LABEL: test38_commute:
900 ; SKX-NEXT: vcmpgtpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x0e]
901 ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
902 ; SKX-NEXT: retq ## encoding: [0xc3]
904 %a = load double, double* %ptr
905 %v = insertelement <4 x double> undef, double %a, i32 0
906 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
908 %mask = fcmp ogt <4 x double> %x, %shuffle
909 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
910 ret <4 x double> %max
913 define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
914 ; AVX512-LABEL: test39:
916 ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
917 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
918 ; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
919 ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
920 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
921 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
922 ; AVX512-NEXT: retq ## encoding: [0xc3]
926 ; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x01]
927 ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
928 ; SKX-NEXT: retq ## encoding: [0xc3]
930 %a = load double, double* %ptr
931 %v = insertelement <2 x double> undef, double %a, i32 0
932 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
934 %mask = fcmp ogt <2 x double> %shuffle, %x
935 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
936 ret <2 x double> %max
939 define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
940 ; AVX512-LABEL: test39_commute:
942 ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
943 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
944 ; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
945 ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
946 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
947 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
948 ; AVX512-NEXT: retq ## encoding: [0xc3]
950 ; SKX-LABEL: test39_commute:
952 ; SKX-NEXT: vcmpgtpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x0e]
953 ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
954 ; SKX-NEXT: retq ## encoding: [0xc3]
956 %a = load double, double* %ptr
957 %v = insertelement <2 x double> undef, double %a, i32 0
958 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
960 %mask = fcmp ogt <2 x double> %x, %shuffle
961 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
962 ret <2 x double> %max
966 define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind {
967 ; CHECK-LABEL: test40:
969 ; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
970 ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
971 ; CHECK-NEXT: retq ## encoding: [0xc3]
973 %a = load float, float* %ptr
974 %v = insertelement <16 x float> undef, float %a, i32 0
975 %shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
977 %mask = fcmp ogt <16 x float> %shuffle, %x
978 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
979 ret <16 x float> %max
982 define <16 x float> @test40_commute(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind {
983 ; CHECK-LABEL: test40_commute:
985 ; CHECK-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
986 ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
987 ; CHECK-NEXT: retq ## encoding: [0xc3]
989 %a = load float, float* %ptr
990 %v = insertelement <16 x float> undef, float %a, i32 0
991 %shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
993 %mask = fcmp ogt <16 x float> %x, %shuffle
994 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
995 ret <16 x float> %max
998 define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind {
999 ; AVX512-LABEL: test41:
1001 ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
1002 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
1003 ; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
1004 ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
1005 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
1006 ; AVX512-NEXT: retq ## encoding: [0xc3]
1008 ; SKX-LABEL: test41:
1010 ; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x01]
1011 ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
1012 ; SKX-NEXT: retq ## encoding: [0xc3]
1014 %a = load float, float* %ptr
1015 %v = insertelement <8 x float> undef, float %a, i32 0
1016 %shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1018 %mask = fcmp ogt <8 x float> %shuffle, %x
1019 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
1020 ret <8 x float> %max
1023 define <8 x float> @test41_commute(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind {
1024 ; AVX512-LABEL: test41_commute:
1026 ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
1027 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
1028 ; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
1029 ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
1030 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
1031 ; AVX512-NEXT: retq ## encoding: [0xc3]
1033 ; SKX-LABEL: test41_commute:
1035 ; SKX-NEXT: vcmpgtps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x0e]
1036 ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
1037 ; SKX-NEXT: retq ## encoding: [0xc3]
1039 %a = load float, float* %ptr
1040 %v = insertelement <8 x float> undef, float %a, i32 0
1041 %shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1043 %mask = fcmp ogt <8 x float> %x, %shuffle
1044 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
1045 ret <8 x float> %max
1048 define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind {
1049 ; AVX512-LABEL: test42:
1051 ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
1052 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
1053 ; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
1054 ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
1055 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
1056 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1057 ; AVX512-NEXT: retq ## encoding: [0xc3]
1059 ; SKX-LABEL: test42:
1061 ; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x01]
1062 ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
1063 ; SKX-NEXT: retq ## encoding: [0xc3]
1065 %a = load float, float* %ptr
1066 %v = insertelement <4 x float> undef, float %a, i32 0
1067 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1069 %mask = fcmp ogt <4 x float> %shuffle, %x
1070 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
1071 ret <4 x float> %max
1074 define <4 x float> @test42_commute(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind {
1075 ; AVX512-LABEL: test42_commute:
1077 ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
1078 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
1079 ; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
1080 ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
1081 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
1082 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1083 ; AVX512-NEXT: retq ## encoding: [0xc3]
1085 ; SKX-LABEL: test42_commute:
1087 ; SKX-NEXT: vcmpgtps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x0e]
1088 ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
1089 ; SKX-NEXT: retq ## encoding: [0xc3]
1091 %a = load float, float* %ptr
1092 %v = insertelement <4 x float> undef, float %a, i32 0
1093 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
1095 %mask = fcmp ogt <4 x float> %x, %shuffle
1096 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
1097 ret <4 x float> %max
1100 define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
1101 ; KNL-LABEL: test43:
1103 ; KNL-NEXT: vpmovsxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd2]
1104 ; KNL-NEXT: vpsllq $63, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf2,0x3f]
1105 ; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
1106 ; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x27,0xca]
1107 ; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1108 ; KNL-NEXT: retq ## encoding: [0xc3]
1110 ; AVX512BW-LABEL: test43:
1111 ; AVX512BW: ## %bb.0:
1112 ; AVX512BW-NEXT: vpsllw $15, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
1113 ; AVX512BW-NEXT: vpmovw2m %zmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x29,0xca]
1114 ; AVX512BW-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x01]
1115 ; AVX512BW-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1116 ; AVX512BW-NEXT: retq ## encoding: [0xc3]
1118 ; SKX-LABEL: test43:
1120 ; SKX-NEXT: vpsllw $15, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
1121 ; SKX-NEXT: vpmovw2m %xmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xca]
1122 ; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x01]
1123 ; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1124 ; SKX-NEXT: retq ## encoding: [0xc3]
1126 %a = load double, double* %ptr
1127 %v = insertelement <8 x double> undef, double %a, i32 0
1128 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
1130 %mask_cmp = fcmp ogt <8 x double> %shuffle, %x
1131 %mask = and <8 x i1> %mask_cmp, %mask_in
1132 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
1133 ret <8 x double> %max
1136 define <8 x double> @test43_commute(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
1137 ; KNL-LABEL: test43_commute:
1139 ; KNL-NEXT: vpmovsxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd2]
1140 ; KNL-NEXT: vpsllq $63, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf2,0x3f]
1141 ; KNL-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
1142 ; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x27,0xca]
1143 ; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1144 ; KNL-NEXT: retq ## encoding: [0xc3]
1146 ; AVX512BW-LABEL: test43_commute:
1147 ; AVX512BW: ## %bb.0:
1148 ; AVX512BW-NEXT: vpsllw $15, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
1149 ; AVX512BW-NEXT: vpmovw2m %zmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x29,0xca]
1150 ; AVX512BW-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e]
1151 ; AVX512BW-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1152 ; AVX512BW-NEXT: retq ## encoding: [0xc3]
1154 ; SKX-LABEL: test43_commute:
1156 ; SKX-NEXT: vpsllw $15, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
1157 ; SKX-NEXT: vpmovw2m %xmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xca]
1158 ; SKX-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e]
1159 ; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
1160 ; SKX-NEXT: retq ## encoding: [0xc3]
1162 %a = load double, double* %ptr
1163 %v = insertelement <8 x double> undef, double %a, i32 0
1164 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
1166 %mask_cmp = fcmp ogt <8 x double> %x, %shuffle
1167 %mask = and <8 x i1> %mask_cmp, %mask_in
1168 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
1169 ret <8 x double> %max
1172 define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
1173 ; AVX512-LABEL: test44:
1175 ; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x75,0xc1]
1176 ; AVX512-NEXT: vpmovsxwd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x23,0xc0]
1177 ; AVX512-NEXT: retq ## encoding: [0xc3]
1179 ; SKX-LABEL: test44:
1181 ; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1182 ; SKX-NEXT: vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
1183 ; SKX-NEXT: retq ## encoding: [0xc3]
1184 %mask = icmp eq <4 x i16> %x, %y
1185 %1 = sext <4 x i1> %mask to <4 x i32>
1189 define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 {
1190 ; AVX512-LABEL: test45:
1192 ; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x75,0xc1]
1193 ; AVX512-NEXT: vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0]
1194 ; AVX512-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1195 ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
1196 ; AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1197 ; AVX512-NEXT: retq ## encoding: [0xc3]
1199 ; SKX-LABEL: test45:
1201 ; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
1202 ; SKX-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1203 ; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
1204 ; SKX-NEXT: retq ## encoding: [0xc3]
1205 %mask = icmp eq <2 x i16> %x, %y
1206 %1 = zext <2 x i1> %mask to <2 x i64>
1210 define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
1211 ; AVX512-LABEL: test46:
1213 ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc2,0xc1,0x00]
1214 ; AVX512-NEXT: vpermilps $212, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xd4]
1215 ; AVX512-NEXT: ## xmm0 = xmm0[0,1,1,3]
1216 ; AVX512-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x54,0x05,A,A,A,A]
1217 ; AVX512-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1218 ; AVX512-NEXT: retq ## encoding: [0xc3]
1220 ; SKX-LABEL: test46:
1222 ; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00]
1223 ; SKX-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
1224 ; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
1225 ; SKX-NEXT: retq ## encoding: [0xc3]
1226 %mask = fcmp oeq <2 x float> %x, %y
1227 %1 = zext <2 x i1> %mask to <2 x i64>
1231 define <16 x i8> @test47(<16 x i32> %a, <16 x i8> %b, <16 x i8> %c) {
1232 ; KNL-LABEL: test47:
1234 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1235 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1236 ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
1237 ; KNL-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 ## encoding: [0xc4,0xe3,0x69,0x4c,0xc1,0x00]
1238 ; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1239 ; KNL-NEXT: retq ## encoding: [0xc3]
1241 ; AVX512BW-LABEL: test47:
1242 ; AVX512BW: ## %bb.0:
1243 ; AVX512BW-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
1244 ; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
1245 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1246 ; AVX512BW-NEXT: vpblendmb %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x66,0xc1]
1247 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
1248 ; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1249 ; AVX512BW-NEXT: retq ## encoding: [0xc3]
1251 ; SKX-LABEL: test47:
1253 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1254 ; SKX-NEXT: vpblendmb %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x66,0xc1]
1255 ; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1256 ; SKX-NEXT: retq ## encoding: [0xc3]
1257 %cmp = icmp eq <16 x i32> %a, zeroinitializer
1258 %res = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %c
1262 define <16 x i16> @test48(<16 x i32> %a, <16 x i16> %b, <16 x i16> %c) {
1263 ; KNL-LABEL: test48:
1265 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1266 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1267 ; KNL-NEXT: vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
1268 ; KNL-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 ## encoding: [0xc4,0xe3,0x6d,0x4c,0xc1,0x00]
1269 ; KNL-NEXT: retq ## encoding: [0xc3]
1271 ; AVX512BW-LABEL: test48:
1272 ; AVX512BW: ## %bb.0:
1273 ; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
1274 ; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
1275 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1276 ; AVX512BW-NEXT: vpblendmw %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x66,0xc1]
1277 ; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
1278 ; AVX512BW-NEXT: retq ## encoding: [0xc3]
1280 ; SKX-LABEL: test48:
1282 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
1283 ; SKX-NEXT: vpblendmw %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x66,0xc1]
1284 ; SKX-NEXT: retq ## encoding: [0xc3]
1285 %cmp = icmp eq <16 x i32> %a, zeroinitializer
1286 %res = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %c
1290 define <8 x i16> @test49(<8 x i64> %a, <8 x i16> %b, <8 x i16> %c) {
1291 ; KNL-LABEL: test49:
1293 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc8]
1294 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1295 ; KNL-NEXT: vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
1296 ; KNL-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 ## encoding: [0xc4,0xe3,0x69,0x4c,0xc1,0x00]
1297 ; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1298 ; KNL-NEXT: retq ## encoding: [0xc3]
1300 ; AVX512BW-LABEL: test49:
1301 ; AVX512BW: ## %bb.0:
1302 ; AVX512BW-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
1303 ; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
1304 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc8]
1305 ; AVX512BW-NEXT: vpblendmw %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x66,0xc1]
1306 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
1307 ; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1308 ; AVX512BW-NEXT: retq ## encoding: [0xc3]
1310 ; SKX-LABEL: test49:
1312 ; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc8]
1313 ; SKX-NEXT: vpblendmw %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x66,0xc1]
1314 ; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1315 ; SKX-NEXT: retq ## encoding: [0xc3]
1316 %cmp = icmp eq <8 x i64> %a, zeroinitializer
1317 %res = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %c
1321 define i16 @pcmpeq_mem_1(<16 x i32> %a, <16 x i32>* %b) {
1322 ; KNL-LABEL: pcmpeq_mem_1:
1324 ; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1325 ; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1326 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
1327 ; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1328 ; KNL-NEXT: retq ## encoding: [0xc3]
1330 ; AVX512BW-LABEL: pcmpeq_mem_1:
1331 ; AVX512BW: ## %bb.0:
1332 ; AVX512BW-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1333 ; AVX512BW-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
1334 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
1335 ; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1336 ; AVX512BW-NEXT: retq ## encoding: [0xc3]
1338 ; SKX-LABEL: pcmpeq_mem_1:
1340 ; SKX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1341 ; SKX-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
1342 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
1343 ; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1344 ; SKX-NEXT: retq ## encoding: [0xc3]
1345 %load = load <16 x i32>, <16 x i32>* %b
1346 %cmp = icmp eq <16 x i32> %a, %load
1347 %cast = bitcast <16 x i1> %cmp to i16
1351 ; Make sure we use the short pcmpeq encoding like the test above when the memoryo
1352 ; operand is in the first argument instead of the second.
1353 define i16 @pcmpeq_mem_2(<16 x i32> %a, <16 x i32>* %b) {
1354 ; KNL-LABEL: pcmpeq_mem_2:
1356 ; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1357 ; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1358 ; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
1359 ; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1360 ; KNL-NEXT: retq ## encoding: [0xc3]
1362 ; AVX512BW-LABEL: pcmpeq_mem_2:
1363 ; AVX512BW: ## %bb.0:
1364 ; AVX512BW-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1365 ; AVX512BW-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
1366 ; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
1367 ; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1368 ; AVX512BW-NEXT: retq ## encoding: [0xc3]
1370 ; SKX-LABEL: pcmpeq_mem_2:
1372 ; SKX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
1373 ; SKX-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
1374 ; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
1375 ; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1376 ; SKX-NEXT: retq ## encoding: [0xc3]
1377 %load = load <16 x i32>, <16 x i32>* %b
1378 %cmp = icmp eq <16 x i32> %load, %a
1379 %cast = bitcast <16 x i1> %cmp to i16
1383 ; Don't let a degenerate case trigger an infinite loop.
1384 ; This should get simplified before it even exists as a vselect node,
1385 ; but that does not happen as of this change.
1387 define <2 x i64> @PR41066(<2 x i64> %t0, <2 x double> %x, <2 x double> %y) {
1388 ; AVX512-LABEL: PR41066:
1390 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x57,0xc0]
1391 ; AVX512-NEXT: retq ## encoding: [0xc3]
1393 ; SKX-LABEL: PR41066:
1395 ; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
1396 ; SKX-NEXT: retq ## encoding: [0xc3]
1397 %t1 = fcmp ogt <2 x double> %x, %y
1398 %t2 = select <2 x i1> %t1, <2 x i64> <i64 undef, i64 0>, <2 x i64> zeroinitializer
1402 define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32> %x) {
1403 ; AVX512-LABEL: zext_bool_logic:
1405 ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
1406 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
1407 ; AVX512-NEXT: vptestnmq %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc0]
1408 ; AVX512-NEXT: vptestnmq %zmm1, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x48,0x27,0xc9]
1409 ; AVX512-NEXT: korw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x45,0xc9]
1410 ; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1411 ; AVX512-NEXT: vpsubd %xmm0, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0xfa,0xc0]
1412 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1413 ; AVX512-NEXT: retq ## encoding: [0xc3]
1415 ; SKX-LABEL: zext_bool_logic:
1417 ; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc0]
1418 ; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x28,0x27,0xc9]
1419 ; SKX-NEXT: korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
1420 ; SKX-NEXT: vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
1421 ; SKX-NEXT: vpsubd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfa,0xc0]
1422 ; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1423 ; SKX-NEXT: retq ## encoding: [0xc3]
1424 %a = icmp eq <4 x i64> %cond1, zeroinitializer
1425 %b = icmp eq <4 x i64> %cond2, zeroinitializer
1426 %c = or <4 x i1> %a, %b
1427 %d = zext <4 x i1> %c to <4 x i32>
1428 %e = add <4 x i32> %d, %x
1432 ; This used to crash in WidenVecRes_SETCC due to generating the wrong
1434 define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
1435 ; KNL-LABEL: half_vec_compare:
1436 ; KNL: ## %bb.0: ## %entry
1437 ; KNL-NEXT: movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
1438 ; KNL-NEXT: movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
1439 ; KNL-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
1440 ; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1441 ; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
1442 ; KNL-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
1443 ; KNL-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]
1444 ; KNL-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2]
1445 ; KNL-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
1446 ; KNL-NEXT: andl $1, %edx ## encoding: [0x83,0xe2,0x01]
1447 ; KNL-NEXT: kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
1448 ; KNL-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
1449 ; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1450 ; KNL-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
1451 ; KNL-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
1452 ; KNL-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
1453 ; KNL-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
1454 ; KNL-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
1455 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
1456 ; KNL-NEXT: korw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x45,0xc9]
1457 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
1458 ; KNL-NEXT: vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
1459 ; KNL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
1460 ; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
1461 ; KNL-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1462 ; KNL-NEXT: vpextrw $0, %xmm0, (%rsi) ## encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
1463 ; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1464 ; KNL-NEXT: retq ## encoding: [0xc3]
1466 ; AVX512BW-LABEL: half_vec_compare:
1467 ; AVX512BW: ## %bb.0: ## %entry
1468 ; AVX512BW-NEXT: movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
1469 ; AVX512BW-NEXT: movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
1470 ; AVX512BW-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
1471 ; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1472 ; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
1473 ; AVX512BW-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
1474 ; AVX512BW-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]
1475 ; AVX512BW-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2]
1476 ; AVX512BW-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
1477 ; AVX512BW-NEXT: andl $1, %edx ## encoding: [0x83,0xe2,0x01]
1478 ; AVX512BW-NEXT: kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
1479 ; AVX512BW-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
1480 ; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1481 ; AVX512BW-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
1482 ; AVX512BW-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
1483 ; AVX512BW-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
1484 ; AVX512BW-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
1485 ; AVX512BW-NEXT: kmovd %ecx, %k1 ## encoding: [0xc5,0xfb,0x92,0xc9]
1486 ; AVX512BW-NEXT: kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
1487 ; AVX512BW-NEXT: korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
1488 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ## encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
1489 ; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
1490 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
1491 ; AVX512BW-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1492 ; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
1493 ; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1494 ; AVX512BW-NEXT: retq ## encoding: [0xc3]
1496 ; SKX-LABEL: half_vec_compare:
1497 ; SKX: ## %bb.0: ## %entry
1498 ; SKX-NEXT: movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
1499 ; SKX-NEXT: movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
1500 ; SKX-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
1501 ; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1502 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9]
1503 ; SKX-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
1504 ; SKX-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]
1505 ; SKX-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2]
1506 ; SKX-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
1507 ; SKX-NEXT: kmovd %edx, %k0 ## encoding: [0xc5,0xfb,0x92,0xc2]
1508 ; SKX-NEXT: kshiftlb $1, %k0, %k0 ## encoding: [0xc4,0xe3,0x79,0x32,0xc0,0x01]
1509 ; SKX-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
1510 ; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
1511 ; SKX-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
1512 ; SKX-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
1513 ; SKX-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
1514 ; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
1515 ; SKX-NEXT: kmovd %ecx, %k1 ## encoding: [0xc5,0xfb,0x92,0xc9]
1516 ; SKX-NEXT: kshiftlb $7, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x32,0xc9,0x07]
1517 ; SKX-NEXT: kshiftrb $7, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x30,0xc9,0x07]
1518 ; SKX-NEXT: korw %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x45,0xc0]
1519 ; SKX-NEXT: vpmovm2w %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
1520 ; SKX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc0]
1521 ; SKX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
1522 ; SKX-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1523 ; SKX-NEXT: vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
1524 ; SKX-NEXT: retq ## encoding: [0xc3]
1526 %0 = load <2 x half>, <2 x half>* %x
1527 %1 = fcmp une <2 x half> %0, zeroinitializer
1528 %2 = zext <2 x i1> %1 to <2 x i8>
1529 store <2 x i8> %2, <2 x i8>* %y
1533 ; This test used to end up with the vpcmpgtb on KNL having its operands in the wrong order.
1534 define <8 x i64> @cmp_swap_bug(<16 x i8>* %x, <8 x i64> %y, <8 x i64> %z) {
1535 ; KNL-LABEL: cmp_swap_bug:
1536 ; KNL: ## %bb.0: ## %entry
1537 ; KNL-NEXT: vmovdqa (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x6f,0x17]
1538 ; KNL-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1539 ; KNL-NEXT: ## encoding: [0xc4,0xe2,0x69,0x00,0x15,A,A,A,A]
1540 ; KNL-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1541 ; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0xef,0xdb]
1542 ; KNL-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 ## encoding: [0xc5,0xe1,0x64,0xd2]
1543 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x21,0xd2]
1544 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x48,0x27,0xca]
1545 ; KNL-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
1546 ; KNL-NEXT: retq ## encoding: [0xc3]
1548 ; AVX512BW-LABEL: cmp_swap_bug:
1549 ; AVX512BW: ## %bb.0: ## %entry
1550 ; AVX512BW-NEXT: vmovdqa (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x6f,0x17]
1551 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1552 ; AVX512BW-NEXT: ## encoding: [0xc4,0xe2,0x69,0x00,0x15,A,A,A,A]
1553 ; AVX512BW-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1554 ; AVX512BW-NEXT: vpmovb2m %zmm2, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x29,0xca]
1555 ; AVX512BW-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
1556 ; AVX512BW-NEXT: retq ## encoding: [0xc3]
1558 ; SKX-LABEL: cmp_swap_bug:
1559 ; SKX: ## %bb.0: ## %entry
1560 ; SKX-NEXT: vmovdqa (%rdi), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x17]
1561 ; SKX-NEXT: vpmovwb %xmm2, %xmm2 ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0xd2]
1562 ; SKX-NEXT: vpmovb2m %xmm2, %k1 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xca]
1563 ; SKX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
1564 ; SKX-NEXT: retq ## encoding: [0xc3]
1566 %0 = load <16 x i8>, <16 x i8>* %x
1567 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1568 %2 = icmp slt <8 x i8> %1, zeroinitializer
1569 %3 = select <8 x i1> %2, <8 x i64> %y, <8 x i64> %z
1573 define <2 x i32> @narrow_cmp_select_reverse(<2 x i64> %x, <2 x i32> %y) nounwind {
1574 ; AVX512-LABEL: narrow_cmp_select_reverse:
1576 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2]
1577 ; AVX512-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x29,0xc2]
1578 ; AVX512-NEXT: vpshufd $232, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x70,0xc0,0xe8]
1579 ; AVX512-NEXT: ## xmm0 = xmm0[0,2,2,3]
1580 ; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdf,0xc1]
1581 ; AVX512-NEXT: retq ## encoding: [0xc3]
1583 ; SKX-LABEL: narrow_cmp_select_reverse:
1585 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
1586 ; SKX-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xc1]
1587 ; SKX-NEXT: retq ## encoding: [0xc3]
1588 %mask = icmp eq <2 x i64> %x, zeroinitializer
1589 %res = select <2 x i1> %mask, <2 x i32> zeroinitializer, <2 x i32> %y