1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
7 define <8 x i32> @vpandd256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
8 ; CHECK-LABEL: vpandd256:
9 ; CHECK: ## %bb.0: ## %entry
10 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0
11 ; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0
14 ; Force the execution domain with an add.
15 %a2 = add <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
16 %x = and <8 x i32> %a2, %b
20 define <8 x i32> @vpandnd256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
21 ; CHECK-LABEL: vpandnd256:
22 ; CHECK: ## %bb.0: ## %entry
23 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm1
24 ; CHECK-NEXT: vpandn %ymm1, %ymm0, %ymm0
27 ; Force the execution domain with an add.
28 %a2 = add <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
29 %b2 = xor <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
30 %x = and <8 x i32> %a2, %b2
34 define <8 x i32> @vpord256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
35 ; CHECK-LABEL: vpord256:
36 ; CHECK: ## %bb.0: ## %entry
37 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0
38 ; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0
41 ; Force the execution domain with an add.
42 %a2 = add <8 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
43 %x = or <8 x i32> %a2, %b
47 define <8 x i32> @vpxord256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
48 ; CHECK-LABEL: vpxord256:
49 ; CHECK: ## %bb.0: ## %entry
50 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0
51 ; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0
54 ; Force the execution domain with an add.
55 %a2 = add <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
56 %x = xor <8 x i32> %a2, %b
60 define <4 x i64> @vpandq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
61 ; CHECK-LABEL: vpandq256:
62 ; CHECK: ## %bb.0: ## %entry
63 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to4}, %ymm0, %ymm0
64 ; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0
67 ; Force the execution domain with an add.
68 %a2 = add <4 x i64> %a, <i64 6, i64 6, i64 6, i64 6>
69 %x = and <4 x i64> %a2, %b
73 define <4 x i64> @vpandnq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
74 ; CHECK-LABEL: vpandnq256:
75 ; CHECK: ## %bb.0: ## %entry
76 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to4}, %ymm0, %ymm0
77 ; CHECK-NEXT: vpandn %ymm0, %ymm1, %ymm0
80 ; Force the execution domain with an add.
81 %a2 = add <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
82 %b2 = xor <4 x i64> %b, <i64 -1, i64 -1, i64 -1, i64 -1>
83 %x = and <4 x i64> %a2, %b2
87 define <4 x i64> @vporq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
88 ; CHECK-LABEL: vporq256:
89 ; CHECK: ## %bb.0: ## %entry
90 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to4}, %ymm0, %ymm0
91 ; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0
94 ; Force the execution domain with an add.
95 %a2 = add <4 x i64> %a, <i64 21, i64 21, i64 21, i64 21>
96 %x = or <4 x i64> %a2, %b
100 define <4 x i64> @vpxorq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
101 ; CHECK-LABEL: vpxorq256:
102 ; CHECK: ## %bb.0: ## %entry
103 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to4}, %ymm0, %ymm0
104 ; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0
107 ; Force the execution domain with an add.
108 %a2 = add <4 x i64> %a, <i64 22, i64 22, i64 22, i64 22>
109 %x = xor <4 x i64> %a2, %b
115 define <4 x i32> @vpandd128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
116 ; CHECK-LABEL: vpandd128:
117 ; CHECK: ## %bb.0: ## %entry
118 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
119 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
122 ; Force the execution domain with an add.
123 %a2 = add <4 x i32> %a, <i32 8, i32 8, i32 8, i32 8>
124 %x = and <4 x i32> %a2, %b
128 define <4 x i32> @vpandnd128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
129 ; CHECK-LABEL: vpandnd128:
130 ; CHECK: ## %bb.0: ## %entry
131 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
132 ; CHECK-NEXT: vpandn %xmm0, %xmm1, %xmm0
135 ; Force the execution domain with an add.
136 %a2 = add <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
137 %b2 = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
138 %x = and <4 x i32> %a2, %b2
142 define <4 x i32> @vpord128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
143 ; CHECK-LABEL: vpord128:
144 ; CHECK: ## %bb.0: ## %entry
145 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
146 ; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
149 ; Force the execution domain with an add.
150 %a2 = add <4 x i32> %a, <i32 10, i32 10, i32 10, i32 10>
151 %x = or <4 x i32> %a2, %b
155 define <4 x i32> @vpxord128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
156 ; CHECK-LABEL: vpxord128:
157 ; CHECK: ## %bb.0: ## %entry
158 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
159 ; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
162 ; Force the execution domain with an add.
163 %a2 = add <4 x i32> %a, <i32 11, i32 11, i32 11, i32 11>
164 %x = xor <4 x i32> %a2, %b
168 define <2 x i64> @vpandq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
169 ; CHECK-LABEL: vpandq128:
170 ; CHECK: ## %bb.0: ## %entry
171 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
172 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
175 ; Force the execution domain with an add.
176 %a2 = add <2 x i64> %a, <i64 12, i64 12>
177 %x = and <2 x i64> %a2, %b
181 define <2 x i64> @vpandnq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
182 ; CHECK-LABEL: vpandnq128:
183 ; CHECK: ## %bb.0: ## %entry
184 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
185 ; CHECK-NEXT: vpandn %xmm0, %xmm1, %xmm0
188 ; Force the execution domain with an add.
189 %a2 = add <2 x i64> %a, <i64 13, i64 13>
190 %b2 = xor <2 x i64> %b, <i64 -1, i64 -1>
191 %x = and <2 x i64> %a2, %b2
195 define <2 x i64> @vporq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
196 ; CHECK-LABEL: vporq128:
197 ; CHECK: ## %bb.0: ## %entry
198 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
199 ; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
202 ; Force the execution domain with an add.
203 %a2 = add <2 x i64> %a, <i64 14, i64 14>
204 %x = or <2 x i64> %a2, %b
208 define <2 x i64> @vpxorq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
209 ; CHECK-LABEL: vpxorq128:
210 ; CHECK: ## %bb.0: ## %entry
211 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
212 ; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
215 ; Force the execution domain with an add.
216 %a2 = add <2 x i64> %a, <i64 15, i64 15>
217 %x = xor <2 x i64> %a2, %b
222 define <4 x double> @test_mm256_mask_andnot_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
223 ; KNL-LABEL: test_mm256_mask_andnot_pd:
224 ; KNL: ## %bb.0: ## %entry
225 ; KNL-NEXT: kmovw %edi, %k1
226 ; KNL-NEXT: vpandnq %ymm2, %ymm1, %ymm0 {%k1}
229 ; SKX-LABEL: test_mm256_mask_andnot_pd:
230 ; SKX: ## %bb.0: ## %entry
231 ; SKX-NEXT: kmovd %edi, %k1
232 ; SKX-NEXT: vandnpd %ymm2, %ymm1, %ymm0 {%k1}
235 %0 = bitcast <4 x double> %__A to <4 x i64>
236 %neg.i.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
237 %1 = bitcast <4 x double> %__B to <4 x i64>
238 %and.i.i = and <4 x i64> %1, %neg.i.i
239 %2 = bitcast <4 x i64> %and.i.i to <4 x double>
240 %3 = bitcast i8 %__U to <8 x i1>
241 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
242 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W
246 define <4 x double> @test_mm256_maskz_andnot_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
247 ; KNL-LABEL: test_mm256_maskz_andnot_pd:
248 ; KNL: ## %bb.0: ## %entry
249 ; KNL-NEXT: kmovw %edi, %k1
250 ; KNL-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z}
253 ; SKX-LABEL: test_mm256_maskz_andnot_pd:
254 ; SKX: ## %bb.0: ## %entry
255 ; SKX-NEXT: kmovd %edi, %k1
256 ; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 {%k1} {z}
259 %0 = bitcast <4 x double> %__A to <4 x i64>
260 %neg.i.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
261 %1 = bitcast <4 x double> %__B to <4 x i64>
262 %and.i.i = and <4 x i64> %1, %neg.i.i
263 %2 = bitcast <4 x i64> %and.i.i to <4 x double>
264 %3 = bitcast i8 %__U to <8 x i1>
265 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
266 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer
270 define <2 x double> @test_mm_mask_andnot_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
271 ; KNL-LABEL: test_mm_mask_andnot_pd:
272 ; KNL: ## %bb.0: ## %entry
273 ; KNL-NEXT: kmovw %edi, %k1
274 ; KNL-NEXT: vpandnq %xmm2, %xmm1, %xmm0 {%k1}
277 ; SKX-LABEL: test_mm_mask_andnot_pd:
278 ; SKX: ## %bb.0: ## %entry
279 ; SKX-NEXT: kmovd %edi, %k1
280 ; SKX-NEXT: vandnpd %xmm2, %xmm1, %xmm0 {%k1}
283 %0 = bitcast <2 x double> %__A to <2 x i64>
284 %neg.i.i = xor <2 x i64> %0, <i64 -1, i64 -1>
285 %1 = bitcast <2 x double> %__B to <2 x i64>
286 %and.i.i = and <2 x i64> %1, %neg.i.i
287 %2 = bitcast <2 x i64> %and.i.i to <2 x double>
288 %3 = bitcast i8 %__U to <8 x i1>
289 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
290 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W
294 define <2 x double> @test_mm_maskz_andnot_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
295 ; KNL-LABEL: test_mm_maskz_andnot_pd:
296 ; KNL: ## %bb.0: ## %entry
297 ; KNL-NEXT: kmovw %edi, %k1
298 ; KNL-NEXT: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z}
301 ; SKX-LABEL: test_mm_maskz_andnot_pd:
302 ; SKX: ## %bb.0: ## %entry
303 ; SKX-NEXT: kmovd %edi, %k1
304 ; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 {%k1} {z}
307 %0 = bitcast <2 x double> %__A to <2 x i64>
308 %neg.i.i = xor <2 x i64> %0, <i64 -1, i64 -1>
309 %1 = bitcast <2 x double> %__B to <2 x i64>
310 %and.i.i = and <2 x i64> %1, %neg.i.i
311 %2 = bitcast <2 x i64> %and.i.i to <2 x double>
312 %3 = bitcast i8 %__U to <8 x i1>
313 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
314 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer
318 define <8 x float> @test_mm256_mask_andnot_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
319 ; KNL-LABEL: test_mm256_mask_andnot_ps:
320 ; KNL: ## %bb.0: ## %entry
321 ; KNL-NEXT: kmovw %edi, %k1
322 ; KNL-NEXT: vpandnd %ymm2, %ymm1, %ymm0 {%k1}
325 ; SKX-LABEL: test_mm256_mask_andnot_ps:
326 ; SKX: ## %bb.0: ## %entry
327 ; SKX-NEXT: kmovd %edi, %k1
328 ; SKX-NEXT: vandnps %ymm2, %ymm1, %ymm0 {%k1}
331 %0 = bitcast <8 x float> %__A to <8 x i32>
332 %neg.i.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
333 %1 = bitcast <8 x float> %__B to <8 x i32>
334 %and.i.i = and <8 x i32> %1, %neg.i.i
335 %2 = bitcast <8 x i32> %and.i.i to <8 x float>
336 %3 = bitcast i8 %__U to <8 x i1>
337 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W
341 define <8 x float> @test_mm256_maskz_andnot_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
342 ; KNL-LABEL: test_mm256_maskz_andnot_ps:
343 ; KNL: ## %bb.0: ## %entry
344 ; KNL-NEXT: kmovw %edi, %k1
345 ; KNL-NEXT: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z}
348 ; SKX-LABEL: test_mm256_maskz_andnot_ps:
349 ; SKX: ## %bb.0: ## %entry
350 ; SKX-NEXT: kmovd %edi, %k1
351 ; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 {%k1} {z}
354 %0 = bitcast <8 x float> %__A to <8 x i32>
355 %neg.i.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
356 %1 = bitcast <8 x float> %__B to <8 x i32>
357 %and.i.i = and <8 x i32> %1, %neg.i.i
358 %2 = bitcast <8 x i32> %and.i.i to <8 x float>
359 %3 = bitcast i8 %__U to <8 x i1>
360 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer
364 define <4 x float> @test_mm_mask_andnot_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
365 ; KNL-LABEL: test_mm_mask_andnot_ps:
366 ; KNL: ## %bb.0: ## %entry
367 ; KNL-NEXT: kmovw %edi, %k1
368 ; KNL-NEXT: vpandnd %xmm2, %xmm1, %xmm0 {%k1}
371 ; SKX-LABEL: test_mm_mask_andnot_ps:
372 ; SKX: ## %bb.0: ## %entry
373 ; SKX-NEXT: kmovd %edi, %k1
374 ; SKX-NEXT: vandnps %xmm2, %xmm1, %xmm0 {%k1}
377 %0 = bitcast <4 x float> %__A to <4 x i32>
378 %neg.i.i = xor <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1>
379 %1 = bitcast <4 x float> %__B to <4 x i32>
380 %and.i.i = and <4 x i32> %1, %neg.i.i
381 %2 = bitcast <4 x i32> %and.i.i to <4 x float>
382 %3 = bitcast i8 %__U to <8 x i1>
383 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
384 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W
388 define <4 x float> @test_mm_maskz_andnot_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
389 ; KNL-LABEL: test_mm_maskz_andnot_ps:
390 ; KNL: ## %bb.0: ## %entry
391 ; KNL-NEXT: kmovw %edi, %k1
392 ; KNL-NEXT: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z}
395 ; SKX-LABEL: test_mm_maskz_andnot_ps:
396 ; SKX: ## %bb.0: ## %entry
397 ; SKX-NEXT: kmovd %edi, %k1
398 ; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 {%k1} {z}
401 %0 = bitcast <4 x float> %__A to <4 x i32>
402 %neg.i.i = xor <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1>
403 %1 = bitcast <4 x float> %__B to <4 x i32>
404 %and.i.i = and <4 x i32> %1, %neg.i.i
405 %2 = bitcast <4 x i32> %and.i.i to <4 x float>
406 %3 = bitcast i8 %__U to <8 x i1>
407 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
408 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer
412 define <4 x double> @test_mm256_mask_and_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
413 ; KNL-LABEL: test_mm256_mask_and_pd:
414 ; KNL: ## %bb.0: ## %entry
415 ; KNL-NEXT: kmovw %edi, %k1
416 ; KNL-NEXT: vpandq %ymm1, %ymm2, %ymm0 {%k1}
419 ; SKX-LABEL: test_mm256_mask_and_pd:
420 ; SKX: ## %bb.0: ## %entry
421 ; SKX-NEXT: kmovd %edi, %k1
422 ; SKX-NEXT: vandpd %ymm1, %ymm2, %ymm0 {%k1}
425 %0 = bitcast <4 x double> %__A to <4 x i64>
426 %1 = bitcast <4 x double> %__B to <4 x i64>
427 %and.i.i = and <4 x i64> %1, %0
428 %2 = bitcast <4 x i64> %and.i.i to <4 x double>
429 %3 = bitcast i8 %__U to <8 x i1>
430 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
431 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W
435 define <4 x double> @test_mm256_maskz_and_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
436 ; KNL-LABEL: test_mm256_maskz_and_pd:
437 ; KNL: ## %bb.0: ## %entry
438 ; KNL-NEXT: kmovw %edi, %k1
439 ; KNL-NEXT: vpandq %ymm0, %ymm1, %ymm0 {%k1} {z}
442 ; SKX-LABEL: test_mm256_maskz_and_pd:
443 ; SKX: ## %bb.0: ## %entry
444 ; SKX-NEXT: kmovd %edi, %k1
445 ; SKX-NEXT: vandpd %ymm0, %ymm1, %ymm0 {%k1} {z}
448 %0 = bitcast <4 x double> %__A to <4 x i64>
449 %1 = bitcast <4 x double> %__B to <4 x i64>
450 %and.i.i = and <4 x i64> %1, %0
451 %2 = bitcast <4 x i64> %and.i.i to <4 x double>
452 %3 = bitcast i8 %__U to <8 x i1>
453 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
454 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer
458 define <2 x double> @test_mm_mask_and_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
459 ; KNL-LABEL: test_mm_mask_and_pd:
460 ; KNL: ## %bb.0: ## %entry
461 ; KNL-NEXT: kmovw %edi, %k1
462 ; KNL-NEXT: vpandq %xmm1, %xmm2, %xmm0 {%k1}
465 ; SKX-LABEL: test_mm_mask_and_pd:
466 ; SKX: ## %bb.0: ## %entry
467 ; SKX-NEXT: kmovd %edi, %k1
468 ; SKX-NEXT: vandpd %xmm1, %xmm2, %xmm0 {%k1}
471 %0 = bitcast <2 x double> %__A to <2 x i64>
472 %1 = bitcast <2 x double> %__B to <2 x i64>
473 %and.i.i = and <2 x i64> %1, %0
474 %2 = bitcast <2 x i64> %and.i.i to <2 x double>
475 %3 = bitcast i8 %__U to <8 x i1>
476 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
477 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W
481 define <2 x double> @test_mm_maskz_and_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
482 ; KNL-LABEL: test_mm_maskz_and_pd:
483 ; KNL: ## %bb.0: ## %entry
484 ; KNL-NEXT: kmovw %edi, %k1
485 ; KNL-NEXT: vpandq %xmm0, %xmm1, %xmm0 {%k1} {z}
488 ; SKX-LABEL: test_mm_maskz_and_pd:
489 ; SKX: ## %bb.0: ## %entry
490 ; SKX-NEXT: kmovd %edi, %k1
491 ; SKX-NEXT: vandpd %xmm0, %xmm1, %xmm0 {%k1} {z}
494 %0 = bitcast <2 x double> %__A to <2 x i64>
495 %1 = bitcast <2 x double> %__B to <2 x i64>
496 %and.i.i = and <2 x i64> %1, %0
497 %2 = bitcast <2 x i64> %and.i.i to <2 x double>
498 %3 = bitcast i8 %__U to <8 x i1>
499 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
500 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer
504 define <8 x float> @test_mm256_mask_and_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
505 ; KNL-LABEL: test_mm256_mask_and_ps:
506 ; KNL: ## %bb.0: ## %entry
507 ; KNL-NEXT: kmovw %edi, %k1
508 ; KNL-NEXT: vpandd %ymm1, %ymm2, %ymm0 {%k1}
511 ; SKX-LABEL: test_mm256_mask_and_ps:
512 ; SKX: ## %bb.0: ## %entry
513 ; SKX-NEXT: kmovd %edi, %k1
514 ; SKX-NEXT: vandps %ymm1, %ymm2, %ymm0 {%k1}
517 %0 = bitcast <8 x float> %__A to <8 x i32>
518 %1 = bitcast <8 x float> %__B to <8 x i32>
519 %and.i.i = and <8 x i32> %1, %0
520 %2 = bitcast <8 x i32> %and.i.i to <8 x float>
521 %3 = bitcast i8 %__U to <8 x i1>
522 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W
526 define <8 x float> @test_mm256_maskz_and_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
527 ; KNL-LABEL: test_mm256_maskz_and_ps:
528 ; KNL: ## %bb.0: ## %entry
529 ; KNL-NEXT: kmovw %edi, %k1
530 ; KNL-NEXT: vpandd %ymm0, %ymm1, %ymm0 {%k1} {z}
533 ; SKX-LABEL: test_mm256_maskz_and_ps:
534 ; SKX: ## %bb.0: ## %entry
535 ; SKX-NEXT: kmovd %edi, %k1
536 ; SKX-NEXT: vandps %ymm0, %ymm1, %ymm0 {%k1} {z}
539 %0 = bitcast <8 x float> %__A to <8 x i32>
540 %1 = bitcast <8 x float> %__B to <8 x i32>
541 %and.i.i = and <8 x i32> %1, %0
542 %2 = bitcast <8 x i32> %and.i.i to <8 x float>
543 %3 = bitcast i8 %__U to <8 x i1>
544 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer
548 define <4 x float> @test_mm_mask_and_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
549 ; KNL-LABEL: test_mm_mask_and_ps:
550 ; KNL: ## %bb.0: ## %entry
551 ; KNL-NEXT: kmovw %edi, %k1
552 ; KNL-NEXT: vpandd %xmm1, %xmm2, %xmm0 {%k1}
555 ; SKX-LABEL: test_mm_mask_and_ps:
556 ; SKX: ## %bb.0: ## %entry
557 ; SKX-NEXT: kmovd %edi, %k1
558 ; SKX-NEXT: vandps %xmm1, %xmm2, %xmm0 {%k1}
561 %0 = bitcast <4 x float> %__A to <4 x i32>
562 %1 = bitcast <4 x float> %__B to <4 x i32>
563 %and.i.i = and <4 x i32> %1, %0
564 %2 = bitcast <4 x i32> %and.i.i to <4 x float>
565 %3 = bitcast i8 %__U to <8 x i1>
566 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
567 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W
571 define <4 x float> @test_mm_maskz_and_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
572 ; KNL-LABEL: test_mm_maskz_and_ps:
573 ; KNL: ## %bb.0: ## %entry
574 ; KNL-NEXT: kmovw %edi, %k1
575 ; KNL-NEXT: vpandd %xmm0, %xmm1, %xmm0 {%k1} {z}
578 ; SKX-LABEL: test_mm_maskz_and_ps:
579 ; SKX: ## %bb.0: ## %entry
580 ; SKX-NEXT: kmovd %edi, %k1
581 ; SKX-NEXT: vandps %xmm0, %xmm1, %xmm0 {%k1} {z}
584 %0 = bitcast <4 x float> %__A to <4 x i32>
585 %1 = bitcast <4 x float> %__B to <4 x i32>
586 %and.i.i = and <4 x i32> %1, %0
587 %2 = bitcast <4 x i32> %and.i.i to <4 x float>
588 %3 = bitcast i8 %__U to <8 x i1>
589 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
590 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer
594 define <4 x double> @test_mm256_mask_xor_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
595 ; KNL-LABEL: test_mm256_mask_xor_pd:
596 ; KNL: ## %bb.0: ## %entry
597 ; KNL-NEXT: kmovw %edi, %k1
598 ; KNL-NEXT: vpxorq %ymm2, %ymm1, %ymm0 {%k1}
601 ; SKX-LABEL: test_mm256_mask_xor_pd:
602 ; SKX: ## %bb.0: ## %entry
603 ; SKX-NEXT: kmovd %edi, %k1
604 ; SKX-NEXT: vxorpd %ymm2, %ymm1, %ymm0 {%k1}
607 %0 = bitcast <4 x double> %__A to <4 x i64>
608 %1 = bitcast <4 x double> %__B to <4 x i64>
609 %xor.i.i = xor <4 x i64> %0, %1
610 %2 = bitcast <4 x i64> %xor.i.i to <4 x double>
611 %3 = bitcast i8 %__U to <8 x i1>
612 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
613 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W
617 define <4 x double> @test_mm256_maskz_xor_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
618 ; KNL-LABEL: test_mm256_maskz_xor_pd:
619 ; KNL: ## %bb.0: ## %entry
620 ; KNL-NEXT: kmovw %edi, %k1
621 ; KNL-NEXT: vpxorq %ymm1, %ymm0, %ymm0 {%k1} {z}
624 ; SKX-LABEL: test_mm256_maskz_xor_pd:
625 ; SKX: ## %bb.0: ## %entry
626 ; SKX-NEXT: kmovd %edi, %k1
627 ; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 {%k1} {z}
630 %0 = bitcast <4 x double> %__A to <4 x i64>
631 %1 = bitcast <4 x double> %__B to <4 x i64>
632 %xor.i.i = xor <4 x i64> %0, %1
633 %2 = bitcast <4 x i64> %xor.i.i to <4 x double>
634 %3 = bitcast i8 %__U to <8 x i1>
635 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
636 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer
640 define <2 x double> @test_mm_mask_xor_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
641 ; KNL-LABEL: test_mm_mask_xor_pd:
642 ; KNL: ## %bb.0: ## %entry
643 ; KNL-NEXT: kmovw %edi, %k1
644 ; KNL-NEXT: vpxorq %xmm2, %xmm1, %xmm0 {%k1}
647 ; SKX-LABEL: test_mm_mask_xor_pd:
648 ; SKX: ## %bb.0: ## %entry
649 ; SKX-NEXT: kmovd %edi, %k1
650 ; SKX-NEXT: vxorpd %xmm2, %xmm1, %xmm0 {%k1}
653 %0 = bitcast <2 x double> %__A to <2 x i64>
654 %1 = bitcast <2 x double> %__B to <2 x i64>
655 %xor.i.i = xor <2 x i64> %0, %1
656 %2 = bitcast <2 x i64> %xor.i.i to <2 x double>
657 %3 = bitcast i8 %__U to <8 x i1>
658 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
659 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W
663 define <2 x double> @test_mm_maskz_xor_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
664 ; KNL-LABEL: test_mm_maskz_xor_pd:
665 ; KNL: ## %bb.0: ## %entry
666 ; KNL-NEXT: kmovw %edi, %k1
667 ; KNL-NEXT: vpxorq %xmm1, %xmm0, %xmm0 {%k1} {z}
670 ; SKX-LABEL: test_mm_maskz_xor_pd:
671 ; SKX: ## %bb.0: ## %entry
672 ; SKX-NEXT: kmovd %edi, %k1
673 ; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 {%k1} {z}
676 %0 = bitcast <2 x double> %__A to <2 x i64>
677 %1 = bitcast <2 x double> %__B to <2 x i64>
678 %xor.i.i = xor <2 x i64> %0, %1
679 %2 = bitcast <2 x i64> %xor.i.i to <2 x double>
680 %3 = bitcast i8 %__U to <8 x i1>
681 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
682 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer
686 define <8 x float> @test_mm256_mask_xor_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
687 ; KNL-LABEL: test_mm256_mask_xor_ps:
688 ; KNL: ## %bb.0: ## %entry
689 ; KNL-NEXT: kmovw %edi, %k1
690 ; KNL-NEXT: vpxord %ymm2, %ymm1, %ymm0 {%k1}
693 ; SKX-LABEL: test_mm256_mask_xor_ps:
694 ; SKX: ## %bb.0: ## %entry
695 ; SKX-NEXT: kmovd %edi, %k1
696 ; SKX-NEXT: vxorps %ymm2, %ymm1, %ymm0 {%k1}
699 %0 = bitcast <8 x float> %__A to <8 x i32>
700 %1 = bitcast <8 x float> %__B to <8 x i32>
701 %xor.i.i = xor <8 x i32> %0, %1
702 %2 = bitcast <8 x i32> %xor.i.i to <8 x float>
703 %3 = bitcast i8 %__U to <8 x i1>
704 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W
708 define <8 x float> @test_mm256_maskz_xor_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
709 ; KNL-LABEL: test_mm256_maskz_xor_ps:
710 ; KNL: ## %bb.0: ## %entry
711 ; KNL-NEXT: kmovw %edi, %k1
712 ; KNL-NEXT: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z}
715 ; SKX-LABEL: test_mm256_maskz_xor_ps:
716 ; SKX: ## %bb.0: ## %entry
717 ; SKX-NEXT: kmovd %edi, %k1
718 ; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 {%k1} {z}
721 %0 = bitcast <8 x float> %__A to <8 x i32>
722 %1 = bitcast <8 x float> %__B to <8 x i32>
723 %xor.i.i = xor <8 x i32> %0, %1
724 %2 = bitcast <8 x i32> %xor.i.i to <8 x float>
725 %3 = bitcast i8 %__U to <8 x i1>
726 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer
730 define <4 x float> @test_mm_mask_xor_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
731 ; KNL-LABEL: test_mm_mask_xor_ps:
732 ; KNL: ## %bb.0: ## %entry
733 ; KNL-NEXT: kmovw %edi, %k1
734 ; KNL-NEXT: vpxord %xmm2, %xmm1, %xmm0 {%k1}
737 ; SKX-LABEL: test_mm_mask_xor_ps:
738 ; SKX: ## %bb.0: ## %entry
739 ; SKX-NEXT: kmovd %edi, %k1
740 ; SKX-NEXT: vxorps %xmm2, %xmm1, %xmm0 {%k1}
743 %0 = bitcast <4 x float> %__A to <4 x i32>
744 %1 = bitcast <4 x float> %__B to <4 x i32>
745 %xor.i.i = xor <4 x i32> %0, %1
746 %2 = bitcast <4 x i32> %xor.i.i to <4 x float>
747 %3 = bitcast i8 %__U to <8 x i1>
748 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
749 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W
753 define <4 x float> @test_mm_maskz_xor_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
754 ; KNL-LABEL: test_mm_maskz_xor_ps:
755 ; KNL: ## %bb.0: ## %entry
756 ; KNL-NEXT: kmovw %edi, %k1
757 ; KNL-NEXT: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z}
760 ; SKX-LABEL: test_mm_maskz_xor_ps:
761 ; SKX: ## %bb.0: ## %entry
762 ; SKX-NEXT: kmovd %edi, %k1
763 ; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 {%k1} {z}
766 %0 = bitcast <4 x float> %__A to <4 x i32>
767 %1 = bitcast <4 x float> %__B to <4 x i32>
768 %xor.i.i = xor <4 x i32> %0, %1
769 %2 = bitcast <4 x i32> %xor.i.i to <4 x float>
770 %3 = bitcast i8 %__U to <8 x i1>
771 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
772 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer
776 define <4 x double> @test_mm256_mask_or_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
777 ; KNL-LABEL: test_mm256_mask_or_pd:
778 ; KNL: ## %bb.0: ## %entry
779 ; KNL-NEXT: kmovw %edi, %k1
780 ; KNL-NEXT: vporq %ymm1, %ymm2, %ymm0 {%k1}
783 ; SKX-LABEL: test_mm256_mask_or_pd:
784 ; SKX: ## %bb.0: ## %entry
785 ; SKX-NEXT: kmovd %edi, %k1
786 ; SKX-NEXT: vorpd %ymm1, %ymm2, %ymm0 {%k1}
789 %0 = bitcast <4 x double> %__A to <4 x i64>
790 %1 = bitcast <4 x double> %__B to <4 x i64>
791 %or.i.i = or <4 x i64> %1, %0
792 %2 = bitcast <4 x i64> %or.i.i to <4 x double>
793 %3 = bitcast i8 %__U to <8 x i1>
794 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
795 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W
799 define <4 x double> @test_mm256_maskz_or_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
800 ; KNL-LABEL: test_mm256_maskz_or_pd:
801 ; KNL: ## %bb.0: ## %entry
802 ; KNL-NEXT: kmovw %edi, %k1
803 ; KNL-NEXT: vporq %ymm0, %ymm1, %ymm0 {%k1} {z}
806 ; SKX-LABEL: test_mm256_maskz_or_pd:
807 ; SKX: ## %bb.0: ## %entry
808 ; SKX-NEXT: kmovd %edi, %k1
809 ; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 {%k1} {z}
812 %0 = bitcast <4 x double> %__A to <4 x i64>
813 %1 = bitcast <4 x double> %__B to <4 x i64>
814 %or.i.i = or <4 x i64> %1, %0
815 %2 = bitcast <4 x i64> %or.i.i to <4 x double>
816 %3 = bitcast i8 %__U to <8 x i1>
817 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
818 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer
822 define <2 x double> @test_mm_mask_or_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
823 ; KNL-LABEL: test_mm_mask_or_pd:
824 ; KNL: ## %bb.0: ## %entry
825 ; KNL-NEXT: kmovw %edi, %k1
826 ; KNL-NEXT: vporq %xmm1, %xmm2, %xmm0 {%k1}
829 ; SKX-LABEL: test_mm_mask_or_pd:
830 ; SKX: ## %bb.0: ## %entry
831 ; SKX-NEXT: kmovd %edi, %k1
832 ; SKX-NEXT: vorpd %xmm1, %xmm2, %xmm0 {%k1}
835 %0 = bitcast <2 x double> %__A to <2 x i64>
836 %1 = bitcast <2 x double> %__B to <2 x i64>
837 %or.i.i = or <2 x i64> %1, %0
838 %2 = bitcast <2 x i64> %or.i.i to <2 x double>
839 %3 = bitcast i8 %__U to <8 x i1>
840 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
841 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W
845 define <2 x double> @test_mm_maskz_or_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
846 ; KNL-LABEL: test_mm_maskz_or_pd:
847 ; KNL: ## %bb.0: ## %entry
848 ; KNL-NEXT: kmovw %edi, %k1
849 ; KNL-NEXT: vporq %xmm0, %xmm1, %xmm0 {%k1} {z}
852 ; SKX-LABEL: test_mm_maskz_or_pd:
853 ; SKX: ## %bb.0: ## %entry
854 ; SKX-NEXT: kmovd %edi, %k1
855 ; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 {%k1} {z}
858 %0 = bitcast <2 x double> %__A to <2 x i64>
859 %1 = bitcast <2 x double> %__B to <2 x i64>
860 %or.i.i = or <2 x i64> %1, %0
861 %2 = bitcast <2 x i64> %or.i.i to <2 x double>
862 %3 = bitcast i8 %__U to <8 x i1>
863 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
864 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer
868 define <8 x float> @test_mm256_mask_or_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
869 ; KNL-LABEL: test_mm256_mask_or_ps:
870 ; KNL: ## %bb.0: ## %entry
871 ; KNL-NEXT: kmovw %edi, %k1
872 ; KNL-NEXT: vpord %ymm1, %ymm2, %ymm0 {%k1}
875 ; SKX-LABEL: test_mm256_mask_or_ps:
876 ; SKX: ## %bb.0: ## %entry
877 ; SKX-NEXT: kmovd %edi, %k1
878 ; SKX-NEXT: vorps %ymm1, %ymm2, %ymm0 {%k1}
881 %0 = bitcast <8 x float> %__A to <8 x i32>
882 %1 = bitcast <8 x float> %__B to <8 x i32>
883 %or.i.i = or <8 x i32> %1, %0
884 %2 = bitcast <8 x i32> %or.i.i to <8 x float>
885 %3 = bitcast i8 %__U to <8 x i1>
886 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W
890 define <8 x float> @test_mm256_maskz_or_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
891 ; KNL-LABEL: test_mm256_maskz_or_ps:
892 ; KNL: ## %bb.0: ## %entry
893 ; KNL-NEXT: kmovw %edi, %k1
894 ; KNL-NEXT: vpord %ymm0, %ymm1, %ymm0 {%k1} {z}
897 ; SKX-LABEL: test_mm256_maskz_or_ps:
898 ; SKX: ## %bb.0: ## %entry
899 ; SKX-NEXT: kmovd %edi, %k1
900 ; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 {%k1} {z}
903 %0 = bitcast <8 x float> %__A to <8 x i32>
904 %1 = bitcast <8 x float> %__B to <8 x i32>
905 %or.i.i = or <8 x i32> %1, %0
906 %2 = bitcast <8 x i32> %or.i.i to <8 x float>
907 %3 = bitcast i8 %__U to <8 x i1>
908 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer
912 define <4 x float> @test_mm_mask_or_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
913 ; KNL-LABEL: test_mm_mask_or_ps:
914 ; KNL: ## %bb.0: ## %entry
915 ; KNL-NEXT: kmovw %edi, %k1
916 ; KNL-NEXT: vpord %xmm1, %xmm2, %xmm0 {%k1}
919 ; SKX-LABEL: test_mm_mask_or_ps:
920 ; SKX: ## %bb.0: ## %entry
921 ; SKX-NEXT: kmovd %edi, %k1
922 ; SKX-NEXT: vorps %xmm1, %xmm2, %xmm0 {%k1}
925 %0 = bitcast <4 x float> %__A to <4 x i32>
926 %1 = bitcast <4 x float> %__B to <4 x i32>
927 %or.i.i = or <4 x i32> %1, %0
928 %2 = bitcast <4 x i32> %or.i.i to <4 x float>
929 %3 = bitcast i8 %__U to <8 x i1>
930 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
931 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W
935 define <4 x float> @test_mm_maskz_or_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
936 ; KNL-LABEL: test_mm_maskz_or_ps:
937 ; KNL: ## %bb.0: ## %entry
938 ; KNL-NEXT: kmovw %edi, %k1
939 ; KNL-NEXT: vpord %xmm0, %xmm1, %xmm0 {%k1} {z}
942 ; SKX-LABEL: test_mm_maskz_or_ps:
943 ; SKX: ## %bb.0: ## %entry
944 ; SKX-NEXT: kmovd %edi, %k1
945 ; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 {%k1} {z}
948 %0 = bitcast <4 x float> %__A to <4 x i32>
949 %1 = bitcast <4 x float> %__B to <4 x i32>
950 %or.i.i = or <4 x i32> %1, %0
951 %2 = bitcast <4 x i32> %or.i.i to <4 x float>
952 %3 = bitcast i8 %__U to <8 x i1>
953 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
954 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer