1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512F
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
8 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
9 ; CHECK-LABEL: addpd512:
10 ; CHECK: # %bb.0: # %entry
11 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
14 %add.i = fadd <8 x double> %x, %y
15 ret <8 x double> %add.i
18 define <8 x double> @addpd512fold(<8 x double> %y) {
19 ; CHECK-LABEL: addpd512fold:
20 ; CHECK: # %bb.0: # %entry
21 ; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
24 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
25 ret <8 x double> %add.i
28 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
29 ; CHECK-LABEL: addps512:
30 ; CHECK: # %bb.0: # %entry
31 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
34 %add.i = fadd <16 x float> %x, %y
35 ret <16 x float> %add.i
38 define <16 x float> @addps512fold(<16 x float> %y) {
39 ; CHECK-LABEL: addps512fold:
40 ; CHECK: # %bb.0: # %entry
41 ; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
44 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
45 ret <16 x float> %add.i
48 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
49 ; CHECK-LABEL: subpd512:
50 ; CHECK: # %bb.0: # %entry
51 ; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0
54 %sub.i = fsub <8 x double> %x, %y
55 ret <8 x double> %sub.i
58 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
59 ; CHECK-LABEL: subpd512fold:
60 ; CHECK: # %bb.0: # %entry
61 ; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0
64 %tmp2 = load <8 x double>, <8 x double>* %x, align 8
65 %sub.i = fsub <8 x double> %y, %tmp2
66 ret <8 x double> %sub.i
69 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
70 ; CHECK-LABEL: subps512:
71 ; CHECK: # %bb.0: # %entry
72 ; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0
75 %sub.i = fsub <16 x float> %x, %y
76 ret <16 x float> %sub.i
79 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
80 ; CHECK-LABEL: subps512fold:
81 ; CHECK: # %bb.0: # %entry
82 ; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0
85 %tmp2 = load <16 x float>, <16 x float>* %x, align 4
86 %sub.i = fsub <16 x float> %y, %tmp2
87 ret <16 x float> %sub.i
90 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
91 ; AVX512F-LABEL: imulq512:
93 ; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm2
94 ; AVX512F-NEXT: vpmuludq %zmm0, %zmm2, %zmm2
95 ; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3
96 ; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
97 ; AVX512F-NEXT: vpaddq %zmm2, %zmm3, %zmm2
98 ; AVX512F-NEXT: vpsllq $32, %zmm2, %zmm2
99 ; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
100 ; AVX512F-NEXT: vpaddq %zmm2, %zmm0, %zmm0
103 ; AVX512VL-LABEL: imulq512:
105 ; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm2
106 ; AVX512VL-NEXT: vpmuludq %zmm0, %zmm2, %zmm2
107 ; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3
108 ; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
109 ; AVX512VL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
110 ; AVX512VL-NEXT: vpsllq $32, %zmm2, %zmm2
111 ; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
112 ; AVX512VL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
113 ; AVX512VL-NEXT: retq
115 ; AVX512BW-LABEL: imulq512:
117 ; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm2
118 ; AVX512BW-NEXT: vpmuludq %zmm0, %zmm2, %zmm2
119 ; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3
120 ; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
121 ; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
122 ; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
123 ; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
124 ; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
125 ; AVX512BW-NEXT: retq
127 ; AVX512DQ-LABEL: imulq512:
129 ; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
130 ; AVX512DQ-NEXT: retq
132 ; SKX-LABEL: imulq512:
134 ; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0
136 %z = mul <8 x i64>%x, %y
140 define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
141 ; AVX512F-LABEL: imulq256:
143 ; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm2
144 ; AVX512F-NEXT: vpmuludq %ymm0, %ymm2, %ymm2
145 ; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm3
146 ; AVX512F-NEXT: vpmuludq %ymm3, %ymm1, %ymm3
147 ; AVX512F-NEXT: vpaddq %ymm2, %ymm3, %ymm2
148 ; AVX512F-NEXT: vpsllq $32, %ymm2, %ymm2
149 ; AVX512F-NEXT: vpmuludq %ymm0, %ymm1, %ymm0
150 ; AVX512F-NEXT: vpaddq %ymm2, %ymm0, %ymm0
153 ; AVX512VL-LABEL: imulq256:
155 ; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm2
156 ; AVX512VL-NEXT: vpmuludq %ymm0, %ymm2, %ymm2
157 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm3
158 ; AVX512VL-NEXT: vpmuludq %ymm3, %ymm1, %ymm3
159 ; AVX512VL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
160 ; AVX512VL-NEXT: vpsllq $32, %ymm2, %ymm2
161 ; AVX512VL-NEXT: vpmuludq %ymm0, %ymm1, %ymm0
162 ; AVX512VL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
163 ; AVX512VL-NEXT: retq
165 ; AVX512BW-LABEL: imulq256:
167 ; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm2
168 ; AVX512BW-NEXT: vpmuludq %ymm0, %ymm2, %ymm2
169 ; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3
170 ; AVX512BW-NEXT: vpmuludq %ymm3, %ymm1, %ymm3
171 ; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
172 ; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
173 ; AVX512BW-NEXT: vpmuludq %ymm0, %ymm1, %ymm0
174 ; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
175 ; AVX512BW-NEXT: retq
177 ; AVX512DQ-LABEL: imulq256:
179 ; AVX512DQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
180 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
181 ; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
182 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
183 ; AVX512DQ-NEXT: retq
185 ; SKX-LABEL: imulq256:
187 ; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0
189 %z = mul <4 x i64>%x, %y
193 define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
194 ; AVX512F-LABEL: imulq128:
196 ; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm2
197 ; AVX512F-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
198 ; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm3
199 ; AVX512F-NEXT: vpmuludq %xmm3, %xmm1, %xmm3
200 ; AVX512F-NEXT: vpaddq %xmm2, %xmm3, %xmm2
201 ; AVX512F-NEXT: vpsllq $32, %xmm2, %xmm2
202 ; AVX512F-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
203 ; AVX512F-NEXT: vpaddq %xmm2, %xmm0, %xmm0
206 ; AVX512VL-LABEL: imulq128:
208 ; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm2
209 ; AVX512VL-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
210 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3
211 ; AVX512VL-NEXT: vpmuludq %xmm3, %xmm1, %xmm3
212 ; AVX512VL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
213 ; AVX512VL-NEXT: vpsllq $32, %xmm2, %xmm2
214 ; AVX512VL-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
215 ; AVX512VL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
216 ; AVX512VL-NEXT: retq
218 ; AVX512BW-LABEL: imulq128:
220 ; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm2
221 ; AVX512BW-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
222 ; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm3
223 ; AVX512BW-NEXT: vpmuludq %xmm3, %xmm1, %xmm3
224 ; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
225 ; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
226 ; AVX512BW-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
227 ; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
228 ; AVX512BW-NEXT: retq
230 ; AVX512DQ-LABEL: imulq128:
232 ; AVX512DQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
233 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
234 ; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
235 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
236 ; AVX512DQ-NEXT: vzeroupper
237 ; AVX512DQ-NEXT: retq
239 ; SKX-LABEL: imulq128:
241 ; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0
243 %z = mul <2 x i64>%x, %y
247 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
248 ; CHECK-LABEL: mulpd512:
249 ; CHECK: # %bb.0: # %entry
250 ; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0
253 %mul.i = fmul <8 x double> %x, %y
254 ret <8 x double> %mul.i
257 define <8 x double> @mulpd512fold(<8 x double> %y) {
258 ; CHECK-LABEL: mulpd512fold:
259 ; CHECK: # %bb.0: # %entry
260 ; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0
263 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
264 ret <8 x double> %mul.i
267 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
268 ; CHECK-LABEL: mulps512:
269 ; CHECK: # %bb.0: # %entry
270 ; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0
273 %mul.i = fmul <16 x float> %x, %y
274 ret <16 x float> %mul.i
277 define <16 x float> @mulps512fold(<16 x float> %y) {
278 ; CHECK-LABEL: mulps512fold:
279 ; CHECK: # %bb.0: # %entry
280 ; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
283 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
284 ret <16 x float> %mul.i
287 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
288 ; CHECK-LABEL: divpd512:
289 ; CHECK: # %bb.0: # %entry
290 ; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0
293 %div.i = fdiv <8 x double> %x, %y
294 ret <8 x double> %div.i
297 define <8 x double> @divpd512fold(<8 x double> %y) {
298 ; CHECK-LABEL: divpd512fold:
299 ; CHECK: # %bb.0: # %entry
300 ; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0
303 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
304 ret <8 x double> %div.i
307 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
308 ; CHECK-LABEL: divps512:
309 ; CHECK: # %bb.0: # %entry
310 ; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0
313 %div.i = fdiv <16 x float> %x, %y
314 ret <16 x float> %div.i
317 define <16 x float> @divps512fold(<16 x float> %y) {
318 ; CHECK-LABEL: divps512fold:
319 ; CHECK: # %bb.0: # %entry
320 ; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0
323 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
324 ret <16 x float> %div.i
327 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
328 ; CHECK-LABEL: vpaddq_test:
330 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
332 %x = add <8 x i64> %i, %j
336 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
337 ; CHECK-LABEL: vpaddq_fold_test:
339 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
341 %tmp = load <8 x i64>, <8 x i64>* %j, align 4
342 %x = add <8 x i64> %i, %tmp
346 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
347 ; CHECK-LABEL: vpaddq_broadcast_test:
349 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
351 %x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
355 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
356 ; CHECK-LABEL: vpaddq_broadcast2_test:
358 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
360 %tmp = load i64, i64* %j
361 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
362 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
363 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
364 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
365 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
366 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
367 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
368 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
369 %x = add <8 x i64> %i, %j.7
373 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
374 ; CHECK-LABEL: vpaddd_test:
376 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
378 %x = add <16 x i32> %i, %j
382 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
383 ; CHECK-LABEL: vpaddd_fold_test:
385 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
387 %tmp = load <16 x i32>, <16 x i32>* %j, align 4
388 %x = add <16 x i32> %i, %tmp
392 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
393 ; CHECK-LABEL: vpaddd_broadcast_test:
395 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
397 %x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
401 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
402 ; CHECK-LABEL: vpaddd_mask_test:
404 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
405 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
407 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
408 %x = add <16 x i32> %i, %j
409 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
413 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
414 ; CHECK-LABEL: vpaddd_maskz_test:
416 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
417 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
419 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
420 %x = add <16 x i32> %i, %j
421 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
425 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
426 ; CHECK-LABEL: vpaddd_mask_fold_test:
428 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
429 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
431 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
432 %j = load <16 x i32>, <16 x i32>* %j.ptr
433 %x = add <16 x i32> %i, %j
434 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
438 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
439 ; CHECK-LABEL: vpaddd_mask_broadcast_test:
441 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
442 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
444 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
445 %x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
446 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
450 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
451 ; CHECK-LABEL: vpaddd_maskz_fold_test:
453 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
454 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
456 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
457 %j = load <16 x i32>, <16 x i32>* %j.ptr
458 %x = add <16 x i32> %i, %j
459 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
463 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
464 ; CHECK-LABEL: vpaddd_maskz_broadcast_test:
466 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
467 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
469 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
470 %x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
471 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
475 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
476 ; CHECK-LABEL: vpsubq_test:
478 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
480 %x = sub <8 x i64> %i, %j
484 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
485 ; CHECK-LABEL: vpsubd_test:
487 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
489 %x = sub <16 x i32> %i, %j
493 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
494 ; CHECK-LABEL: vpmulld_test:
496 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
498 %x = mul <16 x i32> %i, %j
502 declare float @sqrtf(float) readnone
503 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
504 ; CHECK-LABEL: sqrtA:
505 ; CHECK: # %bb.0: # %entry
506 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
509 %conv1 = tail call float @sqrtf(float %a) nounwind readnone
513 declare double @sqrt(double) readnone
514 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
515 ; CHECK-LABEL: sqrtB:
516 ; CHECK: # %bb.0: # %entry
517 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
520 %call = tail call double @sqrt(double %a) nounwind readnone
524 declare float @llvm.sqrt.f32(float)
525 define float @sqrtC(float %a) nounwind {
526 ; CHECK-LABEL: sqrtC:
528 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
530 %b = call float @llvm.sqrt.f32(float %a)
534 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
535 define <16 x float> @sqrtD(<16 x float> %a) nounwind {
536 ; CHECK-LABEL: sqrtD:
538 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0
540 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
544 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
545 define <8 x double> @sqrtE(<8 x double> %a) nounwind {
546 ; CHECK-LABEL: sqrtE:
548 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
550 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
554 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
555 ; CHECK-LABEL: fadd_broadcast:
557 ; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
559 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
563 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
564 ; CHECK-LABEL: addq_broadcast:
566 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
568 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
572 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
573 ; AVX512F-LABEL: orq_broadcast:
575 ; AVX512F-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
578 ; AVX512VL-LABEL: orq_broadcast:
580 ; AVX512VL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
581 ; AVX512VL-NEXT: retq
583 ; AVX512BW-LABEL: orq_broadcast:
585 ; AVX512BW-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
586 ; AVX512BW-NEXT: retq
588 ; AVX512DQ-LABEL: orq_broadcast:
590 ; AVX512DQ-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
591 ; AVX512DQ-NEXT: retq
593 ; SKX-LABEL: orq_broadcast:
595 ; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
597 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
601 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
602 ; AVX512F-LABEL: andd512fold:
603 ; AVX512F: # %bb.0: # %entry
604 ; AVX512F-NEXT: vpandd (%rdi), %zmm0, %zmm0
607 ; AVX512VL-LABEL: andd512fold:
608 ; AVX512VL: # %bb.0: # %entry
609 ; AVX512VL-NEXT: vpandd (%rdi), %zmm0, %zmm0
610 ; AVX512VL-NEXT: retq
612 ; AVX512BW-LABEL: andd512fold:
613 ; AVX512BW: # %bb.0: # %entry
614 ; AVX512BW-NEXT: vpandd (%rdi), %zmm0, %zmm0
615 ; AVX512BW-NEXT: retq
617 ; AVX512DQ-LABEL: andd512fold:
618 ; AVX512DQ: # %bb.0: # %entry
619 ; AVX512DQ-NEXT: vandps (%rdi), %zmm0, %zmm0
620 ; AVX512DQ-NEXT: retq
622 ; SKX-LABEL: andd512fold:
623 ; SKX: # %bb.0: # %entry
624 ; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0
627 %a = load <16 x i32>, <16 x i32>* %x, align 4
628 %b = and <16 x i32> %y, %a
632 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
633 ; AVX512F-LABEL: andqbrst:
634 ; AVX512F: # %bb.0: # %entry
635 ; AVX512F-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
638 ; AVX512VL-LABEL: andqbrst:
639 ; AVX512VL: # %bb.0: # %entry
640 ; AVX512VL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
641 ; AVX512VL-NEXT: retq
643 ; AVX512BW-LABEL: andqbrst:
644 ; AVX512BW: # %bb.0: # %entry
645 ; AVX512BW-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
646 ; AVX512BW-NEXT: retq
648 ; AVX512DQ-LABEL: andqbrst:
649 ; AVX512DQ: # %bb.0: # %entry
650 ; AVX512DQ-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
651 ; AVX512DQ-NEXT: retq
653 ; SKX-LABEL: andqbrst:
654 ; SKX: # %bb.0: # %entry
655 ; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
658 %a = load i64, i64* %ap, align 8
659 %b = insertelement <8 x i64> undef, i64 %a, i32 0
660 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
661 %d = and <8 x i64> %p1, %c
665 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
666 ; CHECK-LABEL: test_mask_vaddps:
668 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
669 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1}
671 <16 x float> %j, <16 x i32> %mask1)
673 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
674 %x = fadd <16 x float> %i, %j
675 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
679 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
680 ; CHECK-LABEL: test_mask_vmulps:
682 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
683 ; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1}
685 <16 x float> %j, <16 x i32> %mask1)
687 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
688 %x = fmul <16 x float> %i, %j
689 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
693 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
694 ; CHECK-LABEL: test_mask_vminps:
696 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
697 ; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
699 <16 x float> %j, <16 x i32> %mask1)
701 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
702 %cmp_res = fcmp olt <16 x float> %i, %j
703 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
704 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
708 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
709 ; AVX512F-LABEL: test_mask_vminpd:
711 ; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
712 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1
713 ; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
716 ; AVX512VL-LABEL: test_mask_vminpd:
718 ; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1
719 ; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
720 ; AVX512VL-NEXT: retq
722 ; AVX512BW-LABEL: test_mask_vminpd:
724 ; AVX512BW-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
725 ; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1
726 ; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
727 ; AVX512BW-NEXT: retq
729 ; AVX512DQ-LABEL: test_mask_vminpd:
731 ; AVX512DQ-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
732 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1
733 ; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
734 ; AVX512DQ-NEXT: retq
736 ; SKX-LABEL: test_mask_vminpd:
738 ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1
739 ; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
741 <8 x double> %j, <8 x i32> %mask1)
743 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
744 %cmp_res = fcmp olt <8 x double> %i, %j
745 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
746 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
750 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
751 ; CHECK-LABEL: test_mask_vmaxps:
753 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
754 ; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
756 <16 x float> %j, <16 x i32> %mask1)
758 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
759 %cmp_res = fcmp ogt <16 x float> %i, %j
760 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
761 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
765 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
766 ; AVX512F-LABEL: test_mask_vmaxpd:
768 ; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
769 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1
770 ; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
773 ; AVX512VL-LABEL: test_mask_vmaxpd:
775 ; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1
776 ; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
777 ; AVX512VL-NEXT: retq
779 ; AVX512BW-LABEL: test_mask_vmaxpd:
781 ; AVX512BW-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
782 ; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1
783 ; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
784 ; AVX512BW-NEXT: retq
786 ; AVX512DQ-LABEL: test_mask_vmaxpd:
788 ; AVX512DQ-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
789 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1
790 ; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
791 ; AVX512DQ-NEXT: retq
793 ; SKX-LABEL: test_mask_vmaxpd:
795 ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1
796 ; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
798 <8 x double> %j, <8 x i32> %mask1)
800 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
801 %cmp_res = fcmp ogt <8 x double> %i, %j
802 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
803 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
807 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
808 ; CHECK-LABEL: test_mask_vsubps:
810 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
811 ; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1}
813 <16 x float> %j, <16 x i32> %mask1)
815 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
816 %x = fsub <16 x float> %i, %j
817 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
821 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
822 ; CHECK-LABEL: test_mask_vdivps:
824 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
825 ; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1}
827 <16 x float> %j, <16 x i32> %mask1)
829 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
830 %x = fdiv <16 x float> %i, %j
831 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
835 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
836 ; CHECK-LABEL: test_mask_vaddpd:
838 ; CHECK-NEXT: vptestmq %zmm3, %zmm3, %k1
839 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1}
841 <8 x double> %j, <8 x i64> %mask1)
843 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
844 %x = fadd <8 x double> %i, %j
845 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
849 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
850 ; CHECK-LABEL: test_maskz_vaddpd:
852 ; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1
853 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
855 <8 x i64> %mask1) nounwind readnone {
856 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
857 %x = fadd <8 x double> %i, %j
858 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
862 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
863 ; CHECK-LABEL: test_mask_fold_vaddpd:
865 ; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1
866 ; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1}
868 <8 x double>* %j, <8 x i64> %mask1)
870 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
871 %tmp = load <8 x double>, <8 x double>* %j, align 8
872 %x = fadd <8 x double> %i, %tmp
873 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
877 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
878 ; CHECK-LABEL: test_maskz_fold_vaddpd:
880 ; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
881 ; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
883 <8 x i64> %mask1) nounwind {
884 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
885 %tmp = load <8 x double>, <8 x double>* %j, align 8
886 %x = fadd <8 x double> %i, %tmp
887 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
891 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
892 ; CHECK-LABEL: test_broadcast_vaddpd:
894 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0
896 %tmp = load double, double* %j
897 %b = insertelement <8 x double> undef, double %tmp, i32 0
898 %c = shufflevector <8 x double> %b, <8 x double> undef,
899 <8 x i32> zeroinitializer
900 %x = fadd <8 x double> %c, %i
904 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
905 ; CHECK-LABEL: test_mask_broadcast_vaddpd:
907 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
908 ; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1
909 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1}
911 double* %j, <8 x i64> %mask1) nounwind {
912 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
913 %tmp = load double, double* %j
914 %b = insertelement <8 x double> undef, double %tmp, i32 0
915 %c = shufflevector <8 x double> %b, <8 x double> undef,
916 <8 x i32> zeroinitializer
917 %x = fadd <8 x double> %c, %i
918 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
922 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
923 ; CHECK-LABEL: test_maskz_broadcast_vaddpd:
925 ; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
926 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
928 <8 x i64> %mask1) nounwind {
929 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
930 %tmp = load double, double* %j
931 %b = insertelement <8 x double> undef, double %tmp, i32 0
932 %c = shufflevector <8 x double> %b, <8 x double> undef,
933 <8 x i32> zeroinitializer
934 %x = fadd <8 x double> %c, %i
935 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
939 define <16 x float> @test_fxor(<16 x float> %a) {
940 ; AVX512F-LABEL: test_fxor:
942 ; AVX512F-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
945 ; AVX512VL-LABEL: test_fxor:
947 ; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
948 ; AVX512VL-NEXT: retq
950 ; AVX512BW-LABEL: test_fxor:
952 ; AVX512BW-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
953 ; AVX512BW-NEXT: retq
955 ; AVX512DQ-LABEL: test_fxor:
957 ; AVX512DQ-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
958 ; AVX512DQ-NEXT: retq
960 ; SKX-LABEL: test_fxor:
962 ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
965 %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
969 define <8 x float> @test_fxor_8f32(<8 x float> %a) {
970 ; AVX512F-LABEL: test_fxor_8f32:
972 ; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
973 ; AVX512F-NEXT: vxorps %ymm1, %ymm0, %ymm0
976 ; AVX512VL-LABEL: test_fxor_8f32:
978 ; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to8}, %ymm0, %ymm0
979 ; AVX512VL-NEXT: retq
981 ; AVX512BW-LABEL: test_fxor_8f32:
983 ; AVX512BW-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
984 ; AVX512BW-NEXT: vxorps %ymm1, %ymm0, %ymm0
985 ; AVX512BW-NEXT: retq
987 ; AVX512DQ-LABEL: test_fxor_8f32:
989 ; AVX512DQ-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
990 ; AVX512DQ-NEXT: vxorps %ymm1, %ymm0, %ymm0
991 ; AVX512DQ-NEXT: retq
993 ; SKX-LABEL: test_fxor_8f32:
995 ; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0
997 %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
1001 define <8 x double> @fabs_v8f64(<8 x double> %p)
1002 ; AVX512F-LABEL: fabs_v8f64:
1004 ; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1005 ; AVX512F-NEXT: retq
1007 ; AVX512VL-LABEL: fabs_v8f64:
1008 ; AVX512VL: # %bb.0:
1009 ; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1010 ; AVX512VL-NEXT: retq
1012 ; AVX512BW-LABEL: fabs_v8f64:
1013 ; AVX512BW: # %bb.0:
1014 ; AVX512BW-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1015 ; AVX512BW-NEXT: retq
1017 ; AVX512DQ-LABEL: fabs_v8f64:
1018 ; AVX512DQ: # %bb.0:
1019 ; AVX512DQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
1020 ; AVX512DQ-NEXT: retq
1022 ; SKX-LABEL: fabs_v8f64:
1024 ; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
1027 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
1030 declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
1032 define <16 x float> @fabs_v16f32(<16 x float> %p)
1033 ; AVX512F-LABEL: fabs_v16f32:
1035 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
1036 ; AVX512F-NEXT: retq
1038 ; AVX512VL-LABEL: fabs_v16f32:
1039 ; AVX512VL: # %bb.0:
1040 ; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
1041 ; AVX512VL-NEXT: retq
1043 ; AVX512BW-LABEL: fabs_v16f32:
1044 ; AVX512BW: # %bb.0:
1045 ; AVX512BW-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
1046 ; AVX512BW-NEXT: retq
1048 ; AVX512DQ-LABEL: fabs_v16f32:
1049 ; AVX512DQ: # %bb.0:
1050 ; AVX512DQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
1051 ; AVX512DQ-NEXT: retq
1053 ; SKX-LABEL: fabs_v16f32:
1055 ; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
1058 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
1061 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)