1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
6 define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
8 ; ALL: ## %bb.0: ## %entry
9 ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
10 ; ALL-NEXT: vpandd %zmm1, %zmm0, %zmm0
13 ; Force the execution domain with an add.
14 %a2 = add <16 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2,
15 i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
16 %x = and <16 x i32> %a2, %b
20 define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
22 ; ALL: ## %bb.0: ## %entry
23 ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
24 ; ALL-NEXT: vpandnd %zmm0, %zmm1, %zmm0
27 ; Force the execution domain with an add.
28 %a2 = add <16 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3,
29 i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
30 %b2 = xor <16 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1,
31 i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
32 %x = and <16 x i32> %a2, %b2
36 define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
38 ; ALL: ## %bb.0: ## %entry
39 ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
40 ; ALL-NEXT: vpord %zmm1, %zmm0, %zmm0
43 ; Force the execution domain with an add.
44 %a2 = add <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4,
45 i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
46 %x = or <16 x i32> %a2, %b
50 define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
52 ; ALL: ## %bb.0: ## %entry
53 ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
54 ; ALL-NEXT: vpxord %zmm1, %zmm0, %zmm0
57 ; Force the execution domain with an add.
58 %a2 = add <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5,
59 i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
60 %x = xor <16 x i32> %a2, %b
64 define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
66 ; ALL: ## %bb.0: ## %entry
67 ; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
68 ; ALL-NEXT: vpandq %zmm1, %zmm0, %zmm0
71 ; Force the execution domain with an add.
72 %a2 = add <8 x i64> %a, <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>
73 %x = and <8 x i64> %a2, %b
77 define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
79 ; ALL: ## %bb.0: ## %entry
80 ; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
81 ; ALL-NEXT: vpandnq %zmm0, %zmm1, %zmm0
84 ; Force the execution domain with an add.
85 %a2 = add <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
86 %b2 = xor <8 x i64> %b, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
87 %x = and <8 x i64> %a2, %b2
91 define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
93 ; ALL: ## %bb.0: ## %entry
94 ; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
95 ; ALL-NEXT: vporq %zmm1, %zmm0, %zmm0
98 ; Force the execution domain with an add.
99 %a2 = add <8 x i64> %a, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
100 %x = or <8 x i64> %a2, %b
104 define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
106 ; ALL: ## %bb.0: ## %entry
107 ; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
108 ; ALL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
111 ; Force the execution domain with an add.
112 %a2 = add <8 x i64> %a, <i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9>
113 %x = xor <8 x i64> %a2, %b
118 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
119 ; KNL-LABEL: orq_broadcast:
121 ; KNL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
124 ; SKX-LABEL: orq_broadcast:
126 ; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
128 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
132 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
133 ; KNL-LABEL: andd512fold:
134 ; KNL: ## %bb.0: ## %entry
135 ; KNL-NEXT: vpandd (%rdi), %zmm0, %zmm0
138 ; SKX-LABEL: andd512fold:
139 ; SKX: ## %bb.0: ## %entry
140 ; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0
143 %a = load <16 x i32>, <16 x i32>* %x, align 4
144 %b = and <16 x i32> %y, %a
148 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
149 ; KNL-LABEL: andqbrst:
150 ; KNL: ## %bb.0: ## %entry
151 ; KNL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
154 ; SKX-LABEL: andqbrst:
155 ; SKX: ## %bb.0: ## %entry
156 ; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
159 %a = load i64, i64* %ap, align 8
160 %b = insertelement <8 x i64> undef, i64 %a, i32 0
161 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
162 %d = and <8 x i64> %p1, %c
166 define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) {
167 ; KNL-LABEL: and_v64i8:
169 ; KNL-NEXT: vpandd %zmm1, %zmm0, %zmm0
172 ; SKX-LABEL: and_v64i8:
174 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0
176 %res = and <64 x i8> %a, %b
180 define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) {
181 ; KNL-LABEL: andn_v64i8:
183 ; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
184 ; KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
185 ; KNL-NEXT: vandnps %ymm2, %ymm3, %ymm2
186 ; KNL-NEXT: vandnps %ymm0, %ymm1, %ymm0
187 ; KNL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
190 ; SKX-LABEL: andn_v64i8:
192 ; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0
194 %b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
195 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
196 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
197 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
198 %res = and <64 x i8> %a, %b2
202 define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) {
203 ; KNL-LABEL: or_v64i8:
205 ; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0
208 ; SKX-LABEL: or_v64i8:
210 ; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0
212 %res = or <64 x i8> %a, %b
216 define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) {
217 ; KNL-LABEL: xor_v64i8:
219 ; KNL-NEXT: vpxord %zmm1, %zmm0, %zmm0
222 ; SKX-LABEL: xor_v64i8:
224 ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0
226 %res = xor <64 x i8> %a, %b
230 define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) {
231 ; KNL-LABEL: and_v32i16:
233 ; KNL-NEXT: vpandd %zmm1, %zmm0, %zmm0
236 ; SKX-LABEL: and_v32i16:
238 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0
240 %res = and <32 x i16> %a, %b
244 define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) {
245 ; KNL-LABEL: andn_v32i16:
247 ; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
248 ; KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
249 ; KNL-NEXT: vandnps %ymm2, %ymm3, %ymm2
250 ; KNL-NEXT: vandnps %ymm0, %ymm1, %ymm0
251 ; KNL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
254 ; SKX-LABEL: andn_v32i16:
256 ; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0
258 %b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
259 i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
260 %res = and <32 x i16> %a, %b2
264 define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) {
265 ; KNL-LABEL: or_v32i16:
267 ; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0
270 ; SKX-LABEL: or_v32i16:
272 ; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0
274 %res = or <32 x i16> %a, %b
278 define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) {
279 ; KNL-LABEL: xor_v32i16:
281 ; KNL-NEXT: vpxord %zmm1, %zmm0, %zmm0
284 ; SKX-LABEL: xor_v32i16:
286 ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0
288 %res = xor <32 x i16> %a, %b
292 define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
293 ; KNL-LABEL: masked_and_v16f32:
295 ; KNL-NEXT: kmovw %edi, %k1
296 ; KNL-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1}
297 ; KNL-NEXT: vaddps %zmm2, %zmm3, %zmm0
300 ; SKX-LABEL: masked_and_v16f32:
302 ; SKX-NEXT: kmovd %edi, %k1
303 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1}
304 ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0
306 %a1 = bitcast <16 x float> %a to <16 x i32>
307 %b1 = bitcast <16 x float> %b to <16 x i32>
308 %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
309 %mask1 = bitcast i16 %mask to <16 x i1>
310 %op = and <16 x i32> %a1, %b1
311 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
312 %cast = bitcast <16 x i32> %select to <16 x float>
313 %add = fadd <16 x float> %c, %cast
314 ret <16 x float> %add
317 define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
318 ; KNL-LABEL: masked_or_v16f32:
320 ; KNL-NEXT: kmovw %edi, %k1
321 ; KNL-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1}
322 ; KNL-NEXT: vaddps %zmm2, %zmm3, %zmm0
325 ; SKX-LABEL: masked_or_v16f32:
327 ; SKX-NEXT: kmovd %edi, %k1
328 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1}
329 ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0
331 %a1 = bitcast <16 x float> %a to <16 x i32>
332 %b1 = bitcast <16 x float> %b to <16 x i32>
333 %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
334 %mask1 = bitcast i16 %mask to <16 x i1>
335 %op = and <16 x i32> %a1, %b1
336 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
337 %cast = bitcast <16 x i32> %select to <16 x float>
338 %add = fadd <16 x float> %c, %cast
339 ret <16 x float> %add
342 define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
343 ; KNL-LABEL: masked_xor_v16f32:
345 ; KNL-NEXT: kmovw %edi, %k1
346 ; KNL-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1}
347 ; KNL-NEXT: vaddps %zmm2, %zmm3, %zmm0
350 ; SKX-LABEL: masked_xor_v16f32:
352 ; SKX-NEXT: kmovd %edi, %k1
353 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1}
354 ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0
356 %a1 = bitcast <16 x float> %a to <16 x i32>
357 %b1 = bitcast <16 x float> %b to <16 x i32>
358 %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
359 %mask1 = bitcast i16 %mask to <16 x i1>
360 %op = and <16 x i32> %a1, %b1
361 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
362 %cast = bitcast <16 x i32> %select to <16 x float>
363 %add = fadd <16 x float> %c, %cast
364 ret <16 x float> %add
367 define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
368 ; KNL-LABEL: masked_and_v8f64:
370 ; KNL-NEXT: kmovw %edi, %k1
371 ; KNL-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1}
372 ; KNL-NEXT: vaddpd %zmm2, %zmm3, %zmm0
375 ; SKX-LABEL: masked_and_v8f64:
377 ; SKX-NEXT: kmovd %edi, %k1
378 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1}
379 ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0
381 %a1 = bitcast <8 x double> %a to <8 x i64>
382 %b1 = bitcast <8 x double> %b to <8 x i64>
383 %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
384 %mask1 = bitcast i8 %mask to <8 x i1>
385 %op = and <8 x i64> %a1, %b1
386 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
387 %cast = bitcast <8 x i64> %select to <8 x double>
388 %add = fadd <8 x double> %c, %cast
389 ret <8 x double> %add
392 define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
393 ; KNL-LABEL: masked_or_v8f64:
395 ; KNL-NEXT: kmovw %edi, %k1
396 ; KNL-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1}
397 ; KNL-NEXT: vaddpd %zmm2, %zmm3, %zmm0
400 ; SKX-LABEL: masked_or_v8f64:
402 ; SKX-NEXT: kmovd %edi, %k1
403 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1}
404 ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0
406 %a1 = bitcast <8 x double> %a to <8 x i64>
407 %b1 = bitcast <8 x double> %b to <8 x i64>
408 %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
409 %mask1 = bitcast i8 %mask to <8 x i1>
410 %op = and <8 x i64> %a1, %b1
411 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
412 %cast = bitcast <8 x i64> %select to <8 x double>
413 %add = fadd <8 x double> %c, %cast
414 ret <8 x double> %add
417 define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
418 ; KNL-LABEL: masked_xor_v8f64:
420 ; KNL-NEXT: kmovw %edi, %k1
421 ; KNL-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1}
422 ; KNL-NEXT: vaddpd %zmm2, %zmm3, %zmm0
425 ; SKX-LABEL: masked_xor_v8f64:
427 ; SKX-NEXT: kmovd %edi, %k1
428 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1}
429 ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0
431 %a1 = bitcast <8 x double> %a to <8 x i64>
432 %b1 = bitcast <8 x double> %b to <8 x i64>
433 %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
434 %mask1 = bitcast i8 %mask to <8 x i1>
435 %op = and <8 x i64> %a1, %b1
436 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
437 %cast = bitcast <8 x i64> %select to <8 x double>
438 %add = fadd <8 x double> %c, %cast
439 ret <8 x double> %add
442 define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
443 ; KNL-LABEL: test_mm512_mask_and_epi32:
444 ; KNL: ## %bb.0: ## %entry
445 ; KNL-NEXT: kmovw %edi, %k1
446 ; KNL-NEXT: vpandd %zmm2, %zmm1, %zmm0 {%k1}
449 ; SKX-LABEL: test_mm512_mask_and_epi32:
450 ; SKX: ## %bb.0: ## %entry
451 ; SKX-NEXT: kmovd %edi, %k1
452 ; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1}
455 %and1.i.i = and <8 x i64> %__a, %__b
456 %0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
457 %1 = bitcast <8 x i64> %__src to <16 x i32>
458 %2 = bitcast i16 %__k to <16 x i1>
459 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
460 %4 = bitcast <16 x i32> %3 to <8 x i64>
464 define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
465 ; KNL-LABEL: test_mm512_mask_or_epi32:
466 ; KNL: ## %bb.0: ## %entry
467 ; KNL-NEXT: kmovw %edi, %k1
468 ; KNL-NEXT: vpord %zmm2, %zmm1, %zmm0 {%k1}
471 ; SKX-LABEL: test_mm512_mask_or_epi32:
472 ; SKX: ## %bb.0: ## %entry
473 ; SKX-NEXT: kmovd %edi, %k1
474 ; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1}
477 %or1.i.i = or <8 x i64> %__a, %__b
478 %0 = bitcast <8 x i64> %or1.i.i to <16 x i32>
479 %1 = bitcast <8 x i64> %__src to <16 x i32>
480 %2 = bitcast i16 %__k to <16 x i1>
481 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
482 %4 = bitcast <16 x i32> %3 to <8 x i64>
486 define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
487 ; KNL-LABEL: test_mm512_mask_xor_epi32:
488 ; KNL: ## %bb.0: ## %entry
489 ; KNL-NEXT: kmovw %edi, %k1
490 ; KNL-NEXT: vpxord %zmm2, %zmm1, %zmm0 {%k1}
493 ; SKX-LABEL: test_mm512_mask_xor_epi32:
494 ; SKX: ## %bb.0: ## %entry
495 ; SKX-NEXT: kmovd %edi, %k1
496 ; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1}
499 %xor1.i.i = xor <8 x i64> %__a, %__b
500 %0 = bitcast <8 x i64> %xor1.i.i to <16 x i32>
501 %1 = bitcast <8 x i64> %__src to <16 x i32>
502 %2 = bitcast i16 %__k to <16 x i1>
503 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
504 %4 = bitcast <16 x i32> %3 to <8 x i64>
508 define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
509 ; KNL-LABEL: test_mm512_mask_xor_pd:
510 ; KNL: ## %bb.0: ## %entry
511 ; KNL-NEXT: kmovw %edi, %k1
512 ; KNL-NEXT: vpxorq %zmm2, %zmm1, %zmm0 {%k1}
515 ; SKX-LABEL: test_mm512_mask_xor_pd:
516 ; SKX: ## %bb.0: ## %entry
517 ; SKX-NEXT: kmovd %edi, %k1
518 ; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1}
521 %0 = bitcast <8 x double> %__A to <8 x i64>
522 %1 = bitcast <8 x double> %__B to <8 x i64>
523 %xor.i.i = xor <8 x i64> %0, %1
524 %2 = bitcast <8 x i64> %xor.i.i to <8 x double>
525 %3 = bitcast i8 %__U to <8 x i1>
526 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
530 define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
531 ; KNL-LABEL: test_mm512_maskz_xor_pd:
532 ; KNL: ## %bb.0: ## %entry
533 ; KNL-NEXT: kmovw %edi, %k1
534 ; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 {%k1} {z}
537 ; SKX-LABEL: test_mm512_maskz_xor_pd:
538 ; SKX: ## %bb.0: ## %entry
539 ; SKX-NEXT: kmovd %edi, %k1
540 ; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z}
543 %0 = bitcast <8 x double> %__A to <8 x i64>
544 %1 = bitcast <8 x double> %__B to <8 x i64>
545 %xor.i.i = xor <8 x i64> %0, %1
546 %2 = bitcast <8 x i64> %xor.i.i to <8 x double>
547 %3 = bitcast i8 %__U to <8 x i1>
548 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
552 define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
553 ; KNL-LABEL: test_mm512_mask_xor_ps:
554 ; KNL: ## %bb.0: ## %entry
555 ; KNL-NEXT: kmovw %edi, %k1
556 ; KNL-NEXT: vpxord %zmm2, %zmm1, %zmm0 {%k1}
559 ; SKX-LABEL: test_mm512_mask_xor_ps:
560 ; SKX: ## %bb.0: ## %entry
561 ; SKX-NEXT: kmovd %edi, %k1
562 ; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1}
565 %0 = bitcast <16 x float> %__A to <16 x i32>
566 %1 = bitcast <16 x float> %__B to <16 x i32>
567 %xor.i.i = xor <16 x i32> %0, %1
568 %2 = bitcast <16 x i32> %xor.i.i to <16 x float>
569 %3 = bitcast i16 %__U to <16 x i1>
570 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
574 define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
575 ; KNL-LABEL: test_mm512_maskz_xor_ps:
576 ; KNL: ## %bb.0: ## %entry
577 ; KNL-NEXT: kmovw %edi, %k1
578 ; KNL-NEXT: vpxord %zmm1, %zmm0, %zmm0 {%k1} {z}
581 ; SKX-LABEL: test_mm512_maskz_xor_ps:
582 ; SKX: ## %bb.0: ## %entry
583 ; SKX-NEXT: kmovd %edi, %k1
584 ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z}
587 %0 = bitcast <16 x float> %__A to <16 x i32>
588 %1 = bitcast <16 x float> %__B to <16 x i32>
589 %xor.i.i = xor <16 x i32> %0, %1
590 %2 = bitcast <16 x i32> %xor.i.i to <16 x float>
591 %3 = bitcast i16 %__U to <16 x i1>
592 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
596 define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
597 ; KNL-LABEL: test_mm512_mask_or_pd:
598 ; KNL: ## %bb.0: ## %entry
599 ; KNL-NEXT: kmovw %edi, %k1
600 ; KNL-NEXT: vporq %zmm1, %zmm2, %zmm0 {%k1}
603 ; SKX-LABEL: test_mm512_mask_or_pd:
604 ; SKX: ## %bb.0: ## %entry
605 ; SKX-NEXT: kmovd %edi, %k1
606 ; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1}
609 %0 = bitcast <8 x double> %__A to <8 x i64>
610 %1 = bitcast <8 x double> %__B to <8 x i64>
611 %or.i.i = or <8 x i64> %1, %0
612 %2 = bitcast <8 x i64> %or.i.i to <8 x double>
613 %3 = bitcast i8 %__U to <8 x i1>
614 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
618 define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
619 ; KNL-LABEL: test_mm512_maskz_or_pd:
620 ; KNL: ## %bb.0: ## %entry
621 ; KNL-NEXT: kmovw %edi, %k1
622 ; KNL-NEXT: vporq %zmm0, %zmm1, %zmm0 {%k1} {z}
625 ; SKX-LABEL: test_mm512_maskz_or_pd:
626 ; SKX: ## %bb.0: ## %entry
627 ; SKX-NEXT: kmovd %edi, %k1
628 ; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z}
631 %0 = bitcast <8 x double> %__A to <8 x i64>
632 %1 = bitcast <8 x double> %__B to <8 x i64>
633 %or.i.i = or <8 x i64> %1, %0
634 %2 = bitcast <8 x i64> %or.i.i to <8 x double>
635 %3 = bitcast i8 %__U to <8 x i1>
636 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
640 define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
641 ; KNL-LABEL: test_mm512_mask_or_ps:
642 ; KNL: ## %bb.0: ## %entry
643 ; KNL-NEXT: kmovw %edi, %k1
644 ; KNL-NEXT: vpord %zmm1, %zmm2, %zmm0 {%k1}
647 ; SKX-LABEL: test_mm512_mask_or_ps:
648 ; SKX: ## %bb.0: ## %entry
649 ; SKX-NEXT: kmovd %edi, %k1
650 ; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1}
653 %0 = bitcast <16 x float> %__A to <16 x i32>
654 %1 = bitcast <16 x float> %__B to <16 x i32>
655 %or.i.i = or <16 x i32> %1, %0
656 %2 = bitcast <16 x i32> %or.i.i to <16 x float>
657 %3 = bitcast i16 %__U to <16 x i1>
658 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
662 define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
663 ; KNL-LABEL: test_mm512_maskz_or_ps:
664 ; KNL: ## %bb.0: ## %entry
665 ; KNL-NEXT: kmovw %edi, %k1
666 ; KNL-NEXT: vpord %zmm0, %zmm1, %zmm0 {%k1} {z}
669 ; SKX-LABEL: test_mm512_maskz_or_ps:
670 ; SKX: ## %bb.0: ## %entry
671 ; SKX-NEXT: kmovd %edi, %k1
672 ; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z}
675 %0 = bitcast <16 x float> %__A to <16 x i32>
676 %1 = bitcast <16 x float> %__B to <16 x i32>
677 %or.i.i = or <16 x i32> %1, %0
678 %2 = bitcast <16 x i32> %or.i.i to <16 x float>
679 %3 = bitcast i16 %__U to <16 x i1>
680 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
684 define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
685 ; KNL-LABEL: test_mm512_mask_and_pd:
686 ; KNL: ## %bb.0: ## %entry
687 ; KNL-NEXT: kmovw %edi, %k1
688 ; KNL-NEXT: vpandq %zmm1, %zmm2, %zmm0 {%k1}
691 ; SKX-LABEL: test_mm512_mask_and_pd:
692 ; SKX: ## %bb.0: ## %entry
693 ; SKX-NEXT: kmovd %edi, %k1
694 ; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1}
697 %0 = bitcast <8 x double> %__A to <8 x i64>
698 %1 = bitcast <8 x double> %__B to <8 x i64>
699 %and.i.i = and <8 x i64> %1, %0
700 %2 = bitcast <8 x i64> %and.i.i to <8 x double>
701 %3 = bitcast i8 %__U to <8 x i1>
702 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
706 define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
707 ; KNL-LABEL: test_mm512_maskz_and_pd:
708 ; KNL: ## %bb.0: ## %entry
709 ; KNL-NEXT: kmovw %edi, %k1
710 ; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0 {%k1} {z}
713 ; SKX-LABEL: test_mm512_maskz_and_pd:
714 ; SKX: ## %bb.0: ## %entry
715 ; SKX-NEXT: kmovd %edi, %k1
716 ; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z}
719 %0 = bitcast <8 x double> %__A to <8 x i64>
720 %1 = bitcast <8 x double> %__B to <8 x i64>
721 %and.i.i = and <8 x i64> %1, %0
722 %2 = bitcast <8 x i64> %and.i.i to <8 x double>
723 %3 = bitcast i8 %__U to <8 x i1>
724 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
728 define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
729 ; KNL-LABEL: test_mm512_mask_and_ps:
730 ; KNL: ## %bb.0: ## %entry
731 ; KNL-NEXT: kmovw %edi, %k1
732 ; KNL-NEXT: vpandd %zmm1, %zmm2, %zmm0 {%k1}
735 ; SKX-LABEL: test_mm512_mask_and_ps:
736 ; SKX: ## %bb.0: ## %entry
737 ; SKX-NEXT: kmovd %edi, %k1
738 ; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1}
741 %0 = bitcast <16 x float> %__A to <16 x i32>
742 %1 = bitcast <16 x float> %__B to <16 x i32>
743 %and.i.i = and <16 x i32> %1, %0
744 %2 = bitcast <16 x i32> %and.i.i to <16 x float>
745 %3 = bitcast i16 %__U to <16 x i1>
746 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
750 define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
751 ; KNL-LABEL: test_mm512_maskz_and_ps:
752 ; KNL: ## %bb.0: ## %entry
753 ; KNL-NEXT: kmovw %edi, %k1
754 ; KNL-NEXT: vpandd %zmm0, %zmm1, %zmm0 {%k1} {z}
757 ; SKX-LABEL: test_mm512_maskz_and_ps:
758 ; SKX: ## %bb.0: ## %entry
759 ; SKX-NEXT: kmovd %edi, %k1
760 ; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z}
763 %0 = bitcast <16 x float> %__A to <16 x i32>
764 %1 = bitcast <16 x float> %__B to <16 x i32>
765 %and.i.i = and <16 x i32> %1, %0
766 %2 = bitcast <16 x i32> %and.i.i to <16 x float>
767 %3 = bitcast i16 %__U to <16 x i1>
768 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
772 define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
773 ; KNL-LABEL: test_mm512_mask_andnot_pd:
774 ; KNL: ## %bb.0: ## %entry
775 ; KNL-NEXT: kmovw %edi, %k1
776 ; KNL-NEXT: vpandnq %zmm2, %zmm1, %zmm0 {%k1}
779 ; SKX-LABEL: test_mm512_mask_andnot_pd:
780 ; SKX: ## %bb.0: ## %entry
781 ; SKX-NEXT: kmovd %edi, %k1
782 ; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1}
785 %0 = bitcast <8 x double> %__A to <8 x i64>
786 %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
787 %1 = bitcast <8 x double> %__B to <8 x i64>
788 %and.i.i = and <8 x i64> %1, %neg.i.i
789 %2 = bitcast <8 x i64> %and.i.i to <8 x double>
790 %3 = bitcast i8 %__U to <8 x i1>
791 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
795 define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
796 ; KNL-LABEL: test_mm512_maskz_andnot_pd:
797 ; KNL: ## %bb.0: ## %entry
798 ; KNL-NEXT: kmovw %edi, %k1
799 ; KNL-NEXT: vpandnq %zmm1, %zmm0, %zmm0 {%k1} {z}
802 ; SKX-LABEL: test_mm512_maskz_andnot_pd:
803 ; SKX: ## %bb.0: ## %entry
804 ; SKX-NEXT: kmovd %edi, %k1
805 ; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z}
808 %0 = bitcast <8 x double> %__A to <8 x i64>
809 %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
810 %1 = bitcast <8 x double> %__B to <8 x i64>
811 %and.i.i = and <8 x i64> %1, %neg.i.i
812 %2 = bitcast <8 x i64> %and.i.i to <8 x double>
813 %3 = bitcast i8 %__U to <8 x i1>
814 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
818 define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
819 ; KNL-LABEL: test_mm512_mask_andnot_ps:
820 ; KNL: ## %bb.0: ## %entry
821 ; KNL-NEXT: kmovw %edi, %k1
822 ; KNL-NEXT: vpandnd %zmm2, %zmm1, %zmm0 {%k1}
825 ; SKX-LABEL: test_mm512_mask_andnot_ps:
826 ; SKX: ## %bb.0: ## %entry
827 ; SKX-NEXT: kmovd %edi, %k1
828 ; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1}
831 %0 = bitcast <16 x float> %__A to <16 x i32>
832 %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
833 %1 = bitcast <16 x float> %__B to <16 x i32>
834 %and.i.i = and <16 x i32> %1, %neg.i.i
835 %2 = bitcast <16 x i32> %and.i.i to <16 x float>
836 %3 = bitcast i16 %__U to <16 x i1>
837 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
841 define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
842 ; KNL-LABEL: test_mm512_maskz_andnot_ps:
843 ; KNL: ## %bb.0: ## %entry
844 ; KNL-NEXT: kmovw %edi, %k1
845 ; KNL-NEXT: vpandnd %zmm1, %zmm0, %zmm0 {%k1} {z}
848 ; SKX-LABEL: test_mm512_maskz_andnot_ps:
849 ; SKX: ## %bb.0: ## %entry
850 ; SKX-NEXT: kmovd %edi, %k1
851 ; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z}
854 %0 = bitcast <16 x float> %__A to <16 x i32>
855 %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
856 %1 = bitcast <16 x float> %__B to <16 x i32>
857 %and.i.i = and <16 x i32> %1, %neg.i.i
858 %2 = bitcast <16 x i32> %and.i.i to <16 x float>
859 %3 = bitcast i16 %__U to <16 x i1>
860 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer