1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512F
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
8 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
9 ; CHECK-LABEL: addpd512:
10 ; CHECK: # %bb.0: # %entry
11 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
14 %add.i = fadd <8 x double> %x, %y
15 ret <8 x double> %add.i
18 define <8 x double> @addpd512fold(<8 x double> %y) {
19 ; CHECK-LABEL: addpd512fold:
20 ; CHECK: # %bb.0: # %entry
21 ; CHECK-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
24 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
25 ret <8 x double> %add.i
28 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
29 ; CHECK-LABEL: addps512:
30 ; CHECK: # %bb.0: # %entry
31 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
34 %add.i = fadd <16 x float> %x, %y
35 ret <16 x float> %add.i
38 define <16 x float> @addps512fold(<16 x float> %y) {
39 ; CHECK-LABEL: addps512fold:
40 ; CHECK: # %bb.0: # %entry
41 ; CHECK-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
44 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
45 ret <16 x float> %add.i
48 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
49 ; CHECK-LABEL: subpd512:
50 ; CHECK: # %bb.0: # %entry
51 ; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0
54 %sub.i = fsub <8 x double> %x, %y
55 ret <8 x double> %sub.i
58 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
59 ; CHECK-LABEL: subpd512fold:
60 ; CHECK: # %bb.0: # %entry
61 ; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0
64 %tmp2 = load <8 x double>, <8 x double>* %x, align 8
65 %sub.i = fsub <8 x double> %y, %tmp2
66 ret <8 x double> %sub.i
69 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
70 ; CHECK-LABEL: subps512:
71 ; CHECK: # %bb.0: # %entry
72 ; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0
75 %sub.i = fsub <16 x float> %x, %y
76 ret <16 x float> %sub.i
79 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
80 ; CHECK-LABEL: subps512fold:
81 ; CHECK: # %bb.0: # %entry
82 ; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0
85 %tmp2 = load <16 x float>, <16 x float>* %x, align 4
86 %sub.i = fsub <16 x float> %y, %tmp2
87 ret <16 x float> %sub.i
90 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
91 ; AVX512F-LABEL: imulq512:
93 ; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm2
94 ; AVX512F-NEXT: vpmuludq %zmm0, %zmm2, %zmm2
95 ; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3
96 ; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
97 ; AVX512F-NEXT: vpaddq %zmm2, %zmm3, %zmm2
98 ; AVX512F-NEXT: vpsllq $32, %zmm2, %zmm2
99 ; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
100 ; AVX512F-NEXT: vpaddq %zmm2, %zmm0, %zmm0
103 ; AVX512VL-LABEL: imulq512:
105 ; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm2
106 ; AVX512VL-NEXT: vpmuludq %zmm0, %zmm2, %zmm2
107 ; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3
108 ; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
109 ; AVX512VL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
110 ; AVX512VL-NEXT: vpsllq $32, %zmm2, %zmm2
111 ; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
112 ; AVX512VL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
113 ; AVX512VL-NEXT: retq
115 ; AVX512BW-LABEL: imulq512:
117 ; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm2
118 ; AVX512BW-NEXT: vpmuludq %zmm0, %zmm2, %zmm2
119 ; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3
120 ; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
121 ; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
122 ; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
123 ; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
124 ; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
125 ; AVX512BW-NEXT: retq
127 ; AVX512DQ-LABEL: imulq512:
129 ; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
130 ; AVX512DQ-NEXT: retq
132 ; SKX-LABEL: imulq512:
134 ; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0
136 %z = mul <8 x i64>%x, %y
140 define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
141 ; AVX512F-LABEL: imulq256:
143 ; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm2
144 ; AVX512F-NEXT: vpmuludq %ymm0, %ymm2, %ymm2
145 ; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm3
146 ; AVX512F-NEXT: vpmuludq %ymm3, %ymm1, %ymm3
147 ; AVX512F-NEXT: vpaddq %ymm2, %ymm3, %ymm2
148 ; AVX512F-NEXT: vpsllq $32, %ymm2, %ymm2
149 ; AVX512F-NEXT: vpmuludq %ymm0, %ymm1, %ymm0
150 ; AVX512F-NEXT: vpaddq %ymm2, %ymm0, %ymm0
153 ; AVX512VL-LABEL: imulq256:
155 ; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm2
156 ; AVX512VL-NEXT: vpmuludq %ymm0, %ymm2, %ymm2
157 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm3
158 ; AVX512VL-NEXT: vpmuludq %ymm3, %ymm1, %ymm3
159 ; AVX512VL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
160 ; AVX512VL-NEXT: vpsllq $32, %ymm2, %ymm2
161 ; AVX512VL-NEXT: vpmuludq %ymm0, %ymm1, %ymm0
162 ; AVX512VL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
163 ; AVX512VL-NEXT: retq
165 ; AVX512BW-LABEL: imulq256:
167 ; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm2
168 ; AVX512BW-NEXT: vpmuludq %ymm0, %ymm2, %ymm2
169 ; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3
170 ; AVX512BW-NEXT: vpmuludq %ymm3, %ymm1, %ymm3
171 ; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2
172 ; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2
173 ; AVX512BW-NEXT: vpmuludq %ymm0, %ymm1, %ymm0
174 ; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
175 ; AVX512BW-NEXT: retq
177 ; AVX512DQ-LABEL: imulq256:
179 ; AVX512DQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
180 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
181 ; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
182 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
183 ; AVX512DQ-NEXT: retq
185 ; SKX-LABEL: imulq256:
187 ; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0
189 %z = mul <4 x i64>%x, %y
193 define <4 x i64> @imulq256_bcast(<4 x i64> %x) {
194 ; AVX512F-LABEL: imulq256_bcast:
196 ; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1337,1337,1337,1337]
197 ; AVX512F-NEXT: vpmuludq %ymm1, %ymm0, %ymm2
198 ; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm0
199 ; AVX512F-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
200 ; AVX512F-NEXT: vpsllq $32, %ymm0, %ymm0
201 ; AVX512F-NEXT: vpaddq %ymm0, %ymm2, %ymm0
204 ; AVX512VL-LABEL: imulq256_bcast:
206 ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1337,1337,1337,1337]
207 ; AVX512VL-NEXT: vpmuludq %ymm1, %ymm0, %ymm2
208 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0
209 ; AVX512VL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
210 ; AVX512VL-NEXT: vpsllq $32, %ymm0, %ymm0
211 ; AVX512VL-NEXT: vpaddq %ymm0, %ymm2, %ymm0
212 ; AVX512VL-NEXT: retq
214 ; AVX512BW-LABEL: imulq256_bcast:
216 ; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1337,1337,1337,1337]
217 ; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm2
218 ; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm0
219 ; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
220 ; AVX512BW-NEXT: vpsllq $32, %ymm0, %ymm0
221 ; AVX512BW-NEXT: vpaddq %ymm0, %ymm2, %ymm0
222 ; AVX512BW-NEXT: retq
224 ; AVX512DQ-LABEL: imulq256_bcast:
226 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
227 ; AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
228 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
229 ; AVX512DQ-NEXT: retq
231 ; SKX-LABEL: imulq256_bcast:
233 ; SKX-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
235 %z = mul <4 x i64> %x, <i64 1337, i64 1337, i64 1337, i64 1337>
239 define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
240 ; AVX512F-LABEL: imulq128:
242 ; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm2
243 ; AVX512F-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
244 ; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm3
245 ; AVX512F-NEXT: vpmuludq %xmm3, %xmm1, %xmm3
246 ; AVX512F-NEXT: vpaddq %xmm2, %xmm3, %xmm2
247 ; AVX512F-NEXT: vpsllq $32, %xmm2, %xmm2
248 ; AVX512F-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
249 ; AVX512F-NEXT: vpaddq %xmm2, %xmm0, %xmm0
252 ; AVX512VL-LABEL: imulq128:
254 ; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm2
255 ; AVX512VL-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
256 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3
257 ; AVX512VL-NEXT: vpmuludq %xmm3, %xmm1, %xmm3
258 ; AVX512VL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
259 ; AVX512VL-NEXT: vpsllq $32, %xmm2, %xmm2
260 ; AVX512VL-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
261 ; AVX512VL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
262 ; AVX512VL-NEXT: retq
264 ; AVX512BW-LABEL: imulq128:
266 ; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm2
267 ; AVX512BW-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
268 ; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm3
269 ; AVX512BW-NEXT: vpmuludq %xmm3, %xmm1, %xmm3
270 ; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
271 ; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
272 ; AVX512BW-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
273 ; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
274 ; AVX512BW-NEXT: retq
276 ; AVX512DQ-LABEL: imulq128:
278 ; AVX512DQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
279 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
280 ; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
281 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
282 ; AVX512DQ-NEXT: vzeroupper
283 ; AVX512DQ-NEXT: retq
285 ; SKX-LABEL: imulq128:
287 ; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0
289 %z = mul <2 x i64>%x, %y
293 define <2 x i64> @imulq128_bcast(<2 x i64> %x) {
294 ; AVX512F-LABEL: imulq128_bcast:
296 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [8086,8086]
297 ; AVX512F-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
298 ; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0
299 ; AVX512F-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
300 ; AVX512F-NEXT: vpsllq $32, %xmm0, %xmm0
301 ; AVX512F-NEXT: vpaddq %xmm0, %xmm2, %xmm0
304 ; AVX512VL-LABEL: imulq128_bcast:
306 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [8086,8086]
307 ; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
308 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0
309 ; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
310 ; AVX512VL-NEXT: vpsllq $32, %xmm0, %xmm0
311 ; AVX512VL-NEXT: vpaddq %xmm0, %xmm2, %xmm0
312 ; AVX512VL-NEXT: retq
314 ; AVX512BW-LABEL: imulq128_bcast:
316 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [8086,8086]
317 ; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
318 ; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm0
319 ; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
320 ; AVX512BW-NEXT: vpsllq $32, %xmm0, %xmm0
321 ; AVX512BW-NEXT: vpaddq %xmm0, %xmm2, %xmm0
322 ; AVX512BW-NEXT: retq
324 ; AVX512DQ-LABEL: imulq128_bcast:
326 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
327 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} xmm1 = [8086,8086]
328 ; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0
329 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
330 ; AVX512DQ-NEXT: vzeroupper
331 ; AVX512DQ-NEXT: retq
333 ; SKX-LABEL: imulq128_bcast:
335 ; SKX-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
337 %z = mul <2 x i64> %x, <i64 8086, i64 8086>
341 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
342 ; CHECK-LABEL: mulpd512:
343 ; CHECK: # %bb.0: # %entry
344 ; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0
347 %mul.i = fmul <8 x double> %x, %y
348 ret <8 x double> %mul.i
351 define <8 x double> @mulpd512fold(<8 x double> %y) {
352 ; CHECK-LABEL: mulpd512fold:
353 ; CHECK: # %bb.0: # %entry
354 ; CHECK-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
357 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
358 ret <8 x double> %mul.i
361 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
362 ; CHECK-LABEL: mulps512:
363 ; CHECK: # %bb.0: # %entry
364 ; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0
367 %mul.i = fmul <16 x float> %x, %y
368 ret <16 x float> %mul.i
371 define <16 x float> @mulps512fold(<16 x float> %y) {
372 ; CHECK-LABEL: mulps512fold:
373 ; CHECK: # %bb.0: # %entry
374 ; CHECK-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
377 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
378 ret <16 x float> %mul.i
381 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
382 ; CHECK-LABEL: divpd512:
383 ; CHECK: # %bb.0: # %entry
384 ; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0
387 %div.i = fdiv <8 x double> %x, %y
388 ret <8 x double> %div.i
391 define <8 x double> @divpd512fold(<8 x double> %y) {
392 ; CHECK-LABEL: divpd512fold:
393 ; CHECK: # %bb.0: # %entry
394 ; CHECK-NEXT: vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
397 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
398 ret <8 x double> %div.i
401 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
402 ; CHECK-LABEL: divps512:
403 ; CHECK: # %bb.0: # %entry
404 ; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0
407 %div.i = fdiv <16 x float> %x, %y
408 ret <16 x float> %div.i
411 define <16 x float> @divps512fold(<16 x float> %y) {
412 ; CHECK-LABEL: divps512fold:
413 ; CHECK: # %bb.0: # %entry
414 ; CHECK-NEXT: vdivps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
417 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
418 ret <16 x float> %div.i
421 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
422 ; CHECK-LABEL: vpaddq_test:
424 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
426 %x = add <8 x i64> %i, %j
430 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
431 ; CHECK-LABEL: vpaddq_fold_test:
433 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
435 %tmp = load <8 x i64>, <8 x i64>* %j, align 4
436 %x = add <8 x i64> %i, %tmp
440 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
441 ; CHECK-LABEL: vpaddq_broadcast_test:
443 ; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
445 %x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
449 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
450 ; CHECK-LABEL: vpaddq_broadcast2_test:
452 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
454 %tmp = load i64, i64* %j
455 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
456 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
457 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
458 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
459 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
460 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
461 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
462 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
463 %x = add <8 x i64> %i, %j.7
467 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
468 ; CHECK-LABEL: vpaddd_test:
470 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
472 %x = add <16 x i32> %i, %j
476 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
477 ; CHECK-LABEL: vpaddd_fold_test:
479 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
481 %tmp = load <16 x i32>, <16 x i32>* %j, align 4
482 %x = add <16 x i32> %i, %tmp
486 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
487 ; CHECK-LABEL: vpaddd_broadcast_test:
489 ; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
491 %x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
495 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
496 ; CHECK-LABEL: vpaddd_mask_test:
498 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
499 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
501 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
502 %x = add <16 x i32> %i, %j
503 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
507 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
508 ; CHECK-LABEL: vpaddd_maskz_test:
510 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
511 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
513 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
514 %x = add <16 x i32> %i, %j
515 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
519 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
520 ; CHECK-LABEL: vpaddd_mask_fold_test:
522 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
523 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
525 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
526 %j = load <16 x i32>, <16 x i32>* %j.ptr
527 %x = add <16 x i32> %i, %j
528 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
532 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
533 ; CHECK-LABEL: vpaddd_mask_broadcast_test:
535 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
536 ; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
538 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
539 %x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
540 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
544 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
545 ; CHECK-LABEL: vpaddd_maskz_fold_test:
547 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
548 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
550 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
551 %j = load <16 x i32>, <16 x i32>* %j.ptr
552 %x = add <16 x i32> %i, %j
553 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
557 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
558 ; CHECK-LABEL: vpaddd_maskz_broadcast_test:
560 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
561 ; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
563 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
564 %x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
565 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
569 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
570 ; CHECK-LABEL: vpsubq_test:
572 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
574 %x = sub <8 x i64> %i, %j
578 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
579 ; CHECK-LABEL: vpsubd_test:
581 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
583 %x = sub <16 x i32> %i, %j
587 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
588 ; CHECK-LABEL: vpmulld_test:
590 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
592 %x = mul <16 x i32> %i, %j
596 declare float @sqrtf(float) readnone
597 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
598 ; CHECK-LABEL: sqrtA:
599 ; CHECK: # %bb.0: # %entry
600 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
603 %conv1 = tail call float @sqrtf(float %a) nounwind readnone
607 declare double @sqrt(double) readnone
608 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
609 ; CHECK-LABEL: sqrtB:
610 ; CHECK: # %bb.0: # %entry
611 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
614 %call = tail call double @sqrt(double %a) nounwind readnone
618 declare float @llvm.sqrt.f32(float)
619 define float @sqrtC(float %a) nounwind {
620 ; CHECK-LABEL: sqrtC:
622 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
624 %b = call float @llvm.sqrt.f32(float %a)
628 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
629 define <16 x float> @sqrtD(<16 x float> %a) nounwind {
630 ; CHECK-LABEL: sqrtD:
632 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0
634 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
638 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
639 define <8 x double> @sqrtE(<8 x double> %a) nounwind {
640 ; CHECK-LABEL: sqrtE:
642 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
644 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
648 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
649 ; CHECK-LABEL: fadd_broadcast:
651 ; CHECK-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
653 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
657 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
658 ; CHECK-LABEL: addq_broadcast:
660 ; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
662 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
666 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
667 ; AVX512F-LABEL: orq_broadcast:
669 ; AVX512F-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
672 ; AVX512VL-LABEL: orq_broadcast:
674 ; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
675 ; AVX512VL-NEXT: retq
677 ; AVX512BW-LABEL: orq_broadcast:
679 ; AVX512BW-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
680 ; AVX512BW-NEXT: retq
682 ; AVX512DQ-LABEL: orq_broadcast:
684 ; AVX512DQ-NEXT: vorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
685 ; AVX512DQ-NEXT: retq
687 ; SKX-LABEL: orq_broadcast:
689 ; SKX-NEXT: vorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
691 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
695 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
696 ; AVX512F-LABEL: andd512fold:
697 ; AVX512F: # %bb.0: # %entry
698 ; AVX512F-NEXT: vpandd (%rdi), %zmm0, %zmm0
701 ; AVX512VL-LABEL: andd512fold:
702 ; AVX512VL: # %bb.0: # %entry
703 ; AVX512VL-NEXT: vpandd (%rdi), %zmm0, %zmm0
704 ; AVX512VL-NEXT: retq
706 ; AVX512BW-LABEL: andd512fold:
707 ; AVX512BW: # %bb.0: # %entry
708 ; AVX512BW-NEXT: vpandd (%rdi), %zmm0, %zmm0
709 ; AVX512BW-NEXT: retq
711 ; AVX512DQ-LABEL: andd512fold:
712 ; AVX512DQ: # %bb.0: # %entry
713 ; AVX512DQ-NEXT: vandps (%rdi), %zmm0, %zmm0
714 ; AVX512DQ-NEXT: retq
716 ; SKX-LABEL: andd512fold:
717 ; SKX: # %bb.0: # %entry
718 ; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0
721 %a = load <16 x i32>, <16 x i32>* %x, align 4
722 %b = and <16 x i32> %y, %a
726 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
727 ; AVX512F-LABEL: andqbrst:
728 ; AVX512F: # %bb.0: # %entry
729 ; AVX512F-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
732 ; AVX512VL-LABEL: andqbrst:
733 ; AVX512VL: # %bb.0: # %entry
734 ; AVX512VL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
735 ; AVX512VL-NEXT: retq
737 ; AVX512BW-LABEL: andqbrst:
738 ; AVX512BW: # %bb.0: # %entry
739 ; AVX512BW-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
740 ; AVX512BW-NEXT: retq
742 ; AVX512DQ-LABEL: andqbrst:
743 ; AVX512DQ: # %bb.0: # %entry
744 ; AVX512DQ-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
745 ; AVX512DQ-NEXT: retq
747 ; SKX-LABEL: andqbrst:
748 ; SKX: # %bb.0: # %entry
749 ; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
752 %a = load i64, i64* %ap, align 8
753 %b = insertelement <8 x i64> undef, i64 %a, i32 0
754 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
755 %d = and <8 x i64> %p1, %c
759 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
760 ; CHECK-LABEL: test_mask_vaddps:
762 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
763 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1}
765 <16 x float> %j, <16 x i32> %mask1)
767 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
768 %x = fadd <16 x float> %i, %j
769 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
773 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
774 ; CHECK-LABEL: test_mask_vmulps:
776 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
777 ; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1}
779 <16 x float> %j, <16 x i32> %mask1)
781 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
782 %x = fmul <16 x float> %i, %j
783 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
787 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
788 ; CHECK-LABEL: test_mask_vminps:
790 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
791 ; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
793 <16 x float> %j, <16 x i32> %mask1)
795 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
796 %cmp_res = fcmp olt <16 x float> %i, %j
797 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
798 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
802 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
803 ; AVX512F-LABEL: test_mask_vminpd:
805 ; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
806 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1
807 ; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
810 ; AVX512VL-LABEL: test_mask_vminpd:
812 ; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1
813 ; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
814 ; AVX512VL-NEXT: retq
816 ; AVX512BW-LABEL: test_mask_vminpd:
818 ; AVX512BW-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
819 ; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1
820 ; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
821 ; AVX512BW-NEXT: retq
823 ; AVX512DQ-LABEL: test_mask_vminpd:
825 ; AVX512DQ-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
826 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1
827 ; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
828 ; AVX512DQ-NEXT: retq
830 ; SKX-LABEL: test_mask_vminpd:
832 ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1
833 ; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
835 <8 x double> %j, <8 x i32> %mask1)
837 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
838 %cmp_res = fcmp olt <8 x double> %i, %j
839 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
840 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
844 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
845 ; CHECK-LABEL: test_mask_vmaxps:
847 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
848 ; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
850 <16 x float> %j, <16 x i32> %mask1)
852 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
853 %cmp_res = fcmp ogt <16 x float> %i, %j
854 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
855 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
859 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
860 ; AVX512F-LABEL: test_mask_vmaxpd:
862 ; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
863 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1
864 ; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
867 ; AVX512VL-LABEL: test_mask_vmaxpd:
869 ; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1
870 ; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
871 ; AVX512VL-NEXT: retq
873 ; AVX512BW-LABEL: test_mask_vmaxpd:
875 ; AVX512BW-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
876 ; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1
877 ; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
878 ; AVX512BW-NEXT: retq
880 ; AVX512DQ-LABEL: test_mask_vmaxpd:
882 ; AVX512DQ-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
883 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1
884 ; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
885 ; AVX512DQ-NEXT: retq
887 ; SKX-LABEL: test_mask_vmaxpd:
889 ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1
890 ; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
892 <8 x double> %j, <8 x i32> %mask1)
894 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
895 %cmp_res = fcmp ogt <8 x double> %i, %j
896 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
897 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
901 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
902 ; CHECK-LABEL: test_mask_vsubps:
904 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
905 ; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1}
907 <16 x float> %j, <16 x i32> %mask1)
909 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
910 %x = fsub <16 x float> %i, %j
911 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
915 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
916 ; CHECK-LABEL: test_mask_vdivps:
918 ; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1
919 ; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1}
921 <16 x float> %j, <16 x i32> %mask1)
923 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
924 %x = fdiv <16 x float> %i, %j
925 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
929 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
930 ; CHECK-LABEL: test_mask_vaddpd:
932 ; CHECK-NEXT: vptestmq %zmm3, %zmm3, %k1
933 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1}
935 <8 x double> %j, <8 x i64> %mask1)
937 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
938 %x = fadd <8 x double> %i, %j
939 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
943 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
944 ; CHECK-LABEL: test_maskz_vaddpd:
946 ; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1
947 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
949 <8 x i64> %mask1) nounwind readnone {
950 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
951 %x = fadd <8 x double> %i, %j
952 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
956 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
957 ; CHECK-LABEL: test_mask_fold_vaddpd:
959 ; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1
960 ; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1}
962 <8 x double>* %j, <8 x i64> %mask1)
964 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
965 %tmp = load <8 x double>, <8 x double>* %j, align 8
966 %x = fadd <8 x double> %i, %tmp
967 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
971 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
972 ; CHECK-LABEL: test_maskz_fold_vaddpd:
974 ; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
975 ; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
977 <8 x i64> %mask1) nounwind {
978 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
979 %tmp = load <8 x double>, <8 x double>* %j, align 8
980 %x = fadd <8 x double> %i, %tmp
981 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
985 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
986 ; CHECK-LABEL: test_broadcast_vaddpd:
988 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0
990 %tmp = load double, double* %j
991 %b = insertelement <8 x double> undef, double %tmp, i32 0
992 %c = shufflevector <8 x double> %b, <8 x double> undef,
993 <8 x i32> zeroinitializer
994 %x = fadd <8 x double> %c, %i
998 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
999 ; CHECK-LABEL: test_mask_broadcast_vaddpd:
1001 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
1002 ; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1
1003 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1}
1005 double* %j, <8 x i64> %mask1) nounwind {
1006 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
1007 %tmp = load double, double* %j
1008 %b = insertelement <8 x double> undef, double %tmp, i32 0
1009 %c = shufflevector <8 x double> %b, <8 x double> undef,
1010 <8 x i32> zeroinitializer
1011 %x = fadd <8 x double> %c, %i
1012 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
1016 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
1017 ; CHECK-LABEL: test_maskz_broadcast_vaddpd:
1019 ; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
1020 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
1022 <8 x i64> %mask1) nounwind {
1023 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
1024 %tmp = load double, double* %j
1025 %b = insertelement <8 x double> undef, double %tmp, i32 0
1026 %c = shufflevector <8 x double> %b, <8 x double> undef,
1027 <8 x i32> zeroinitializer
1028 %x = fadd <8 x double> %c, %i
1029 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
1033 define <16 x float> @test_fxor(<16 x float> %a) {
1034 ; AVX512F-LABEL: test_fxor:
1036 ; AVX512F-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
1037 ; AVX512F-NEXT: retq
1039 ; AVX512VL-LABEL: test_fxor:
1040 ; AVX512VL: # %bb.0:
1041 ; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
1042 ; AVX512VL-NEXT: retq
1044 ; AVX512BW-LABEL: test_fxor:
1045 ; AVX512BW: # %bb.0:
1046 ; AVX512BW-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
1047 ; AVX512BW-NEXT: retq
1049 ; AVX512DQ-LABEL: test_fxor:
1050 ; AVX512DQ: # %bb.0:
1051 ; AVX512DQ-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
1052 ; AVX512DQ-NEXT: retq
1054 ; SKX-LABEL: test_fxor:
1056 ; SKX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
1059 %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
1060 ret <16 x float>%res
1063 define <8 x float> @test_fxor_8f32(<8 x float> %a) {
1064 ; AVX512F-LABEL: test_fxor_8f32:
1066 ; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
1067 ; AVX512F-NEXT: vxorps %ymm1, %ymm0, %ymm0
1068 ; AVX512F-NEXT: retq
1070 ; AVX512VL-LABEL: test_fxor_8f32:
1071 ; AVX512VL: # %bb.0:
1072 ; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
1073 ; AVX512VL-NEXT: retq
1075 ; AVX512BW-LABEL: test_fxor_8f32:
1076 ; AVX512BW: # %bb.0:
1077 ; AVX512BW-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
1078 ; AVX512BW-NEXT: vxorps %ymm1, %ymm0, %ymm0
1079 ; AVX512BW-NEXT: retq
1081 ; AVX512DQ-LABEL: test_fxor_8f32:
1082 ; AVX512DQ: # %bb.0:
1083 ; AVX512DQ-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
1084 ; AVX512DQ-NEXT: vxorps %ymm1, %ymm0, %ymm0
1085 ; AVX512DQ-NEXT: retq
1087 ; SKX-LABEL: test_fxor_8f32:
1089 ; SKX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
1091 %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
1095 define <8 x double> @fabs_v8f64(<8 x double> %p)
1096 ; AVX512F-LABEL: fabs_v8f64:
1098 ; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
1099 ; AVX512F-NEXT: retq
1101 ; AVX512VL-LABEL: fabs_v8f64:
1102 ; AVX512VL: # %bb.0:
1103 ; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
1104 ; AVX512VL-NEXT: retq
1106 ; AVX512BW-LABEL: fabs_v8f64:
1107 ; AVX512BW: # %bb.0:
1108 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
1109 ; AVX512BW-NEXT: retq
1111 ; AVX512DQ-LABEL: fabs_v8f64:
1112 ; AVX512DQ: # %bb.0:
1113 ; AVX512DQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
1114 ; AVX512DQ-NEXT: retq
1116 ; SKX-LABEL: fabs_v8f64:
1118 ; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
1121 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
1124 declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
1126 define <16 x float> @fabs_v16f32(<16 x float> %p)
1127 ; AVX512F-LABEL: fabs_v16f32:
1129 ; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
1130 ; AVX512F-NEXT: retq
1132 ; AVX512VL-LABEL: fabs_v16f32:
1133 ; AVX512VL: # %bb.0:
1134 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
1135 ; AVX512VL-NEXT: retq
1137 ; AVX512BW-LABEL: fabs_v16f32:
1138 ; AVX512BW: # %bb.0:
1139 ; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
1140 ; AVX512BW-NEXT: retq
1142 ; AVX512DQ-LABEL: fabs_v16f32:
1143 ; AVX512DQ: # %bb.0:
1144 ; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
1145 ; AVX512DQ-NEXT: retq
1147 ; SKX-LABEL: fabs_v16f32:
1149 ; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
1152 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
1155 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
1157 define <16 x i32> @masked_inc_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
1158 ; CHECK-LABEL: masked_inc_test:
1160 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
1161 ; CHECK-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
1162 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1}
1164 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
1165 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1166 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
1170 define <16 x i32> @masked_dec_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
1171 ; CHECK-LABEL: masked_dec_test:
1173 ; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
1174 ; CHECK-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
1175 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
1177 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
1178 %x = sub <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1179 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i