1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
5 define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
6 ; ALL-LABEL: test_x86_fmadd_ps_z:
8 ; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
10 %x = fmul <16 x float> %a0, %a1
11 %res = fadd <16 x float> %x, %a2
15 define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
16 ; ALL-LABEL: test_x86_fmsub_ps_z:
18 ; ALL-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
20 %x = fmul <16 x float> %a0, %a1
21 %res = fsub <16 x float> %x, %a2
25 define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
26 ; ALL-LABEL: test_x86_fnmadd_ps_z:
28 ; ALL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
30 %x = fmul <16 x float> %a0, %a1
31 %res = fsub <16 x float> %a2, %x
35 define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
36 ; ALL-LABEL: test_x86_fnmsub_ps_z:
38 ; ALL-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
40 %x = fmul <16 x float> %a0, %a1
41 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
42 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
43 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
44 float -0.000000e+00>, %x
45 %res = fsub <16 x float> %y, %a2
49 define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
50 ; ALL-LABEL: test_x86_fmadd_pd_z:
52 ; ALL-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
54 %x = fmul <8 x double> %a0, %a1
55 %res = fadd <8 x double> %x, %a2
59 define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
60 ; ALL-LABEL: test_x86_fmsub_pd_z:
62 ; ALL-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
64 %x = fmul <8 x double> %a0, %a1
65 %res = fsub <8 x double> %x, %a2
69 define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
70 ; ALL-LABEL: test_x86_fmsub_213:
72 ; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
74 %x = fmul double %a0, %a1
75 %res = fsub double %x, %a2
79 define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
80 ; ALL-LABEL: test_x86_fmsub_213_m:
82 ; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem
84 %a2 = load double , double *%a2_ptr
85 %x = fmul double %a0, %a1
86 %res = fsub double %x, %a2
90 define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
91 ; ALL-LABEL: test_x86_fmsub_231_m:
93 ; ALL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
95 %a2 = load double , double *%a2_ptr
96 %x = fmul double %a0, %a2
97 %res = fsub double %x, %a1
101 define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
102 ; ALL-LABEL: test231_br:
104 ; ALL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
106 %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
107 %b2 = fadd <16 x float> %b1, %a2
111 define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
112 ; ALL-LABEL: test213_br:
114 ; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + mem
116 %b1 = fmul <16 x float> %a1, %a2
117 %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
122 define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
123 ; KNL-LABEL: test_x86_fmadd132_ps:
125 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
126 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
127 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
128 ; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
131 ; SKX-LABEL: test_x86_fmadd132_ps:
133 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2
134 ; SKX-NEXT: vpmovb2m %xmm2, %k1
135 ; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
137 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
138 %x = fmul <16 x float> %a0, %a2
139 %y = fadd <16 x float> %x, %a1
140 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
141 ret <16 x float> %res
145 define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
146 ; KNL-LABEL: test_x86_fmadd231_ps:
148 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
149 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
150 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
151 ; KNL-NEXT: vfmadd231ps {{.*#+}} zmm1 = (zmm0 * mem) + zmm1
152 ; KNL-NEXT: vmovaps %zmm1, %zmm0
155 ; SKX-LABEL: test_x86_fmadd231_ps:
157 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2
158 ; SKX-NEXT: vpmovb2m %xmm2, %k1
159 ; SKX-NEXT: vfmadd231ps {{.*#+}} zmm1 = (zmm0 * mem) + zmm1
160 ; SKX-NEXT: vmovaps %zmm1, %zmm0
162 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
163 %x = fmul <16 x float> %a0, %a2
164 %y = fadd <16 x float> %x, %a1
165 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
166 ret <16 x float> %res
170 define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
171 ; KNL-LABEL: test_x86_fmadd213_ps:
173 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
174 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
175 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
176 ; KNL-NEXT: vfmadd213ps {{.*#+}} zmm1 = (zmm0 * zmm1) + mem
177 ; KNL-NEXT: vmovaps %zmm1, %zmm0
180 ; SKX-LABEL: test_x86_fmadd213_ps:
182 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2
183 ; SKX-NEXT: vpmovb2m %xmm2, %k1
184 ; SKX-NEXT: vfmadd213ps {{.*#+}} zmm1 = (zmm0 * zmm1) + mem
185 ; SKX-NEXT: vmovaps %zmm1, %zmm0
187 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
188 %x = fmul <16 x float> %a1, %a0
189 %y = fadd <16 x float> %x, %a2
190 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
191 ret <16 x float> %res