Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / X86 / fma-builtins.c
blob4686b3686a4f8f6c5c9cb74bbacd648998d36e12
1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s
4 #include <immintrin.h>
6 __m128 test_mm_fmadd_ps(__m128 a, __m128 b, __m128 c) {
7 // CHECK-LABEL: test_mm_fmadd_ps
8 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
9 return _mm_fmadd_ps(a, b, c);
12 __m128d test_mm_fmadd_pd(__m128d a, __m128d b, __m128d c) {
13 // CHECK-LABEL: test_mm_fmadd_pd
14 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
15 return _mm_fmadd_pd(a, b, c);
18 __m128 test_mm_fmadd_ss(__m128 a, __m128 b, __m128 c) {
19 // CHECK-LABEL: test_mm_fmadd_ss
20 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
21 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
22 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
23 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
24 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
25 return _mm_fmadd_ss(a, b, c);
28 __m128d test_mm_fmadd_sd(__m128d a, __m128d b, __m128d c) {
29 // CHECK-LABEL: test_mm_fmadd_sd
30 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
31 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
32 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
33 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
34 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
35 return _mm_fmadd_sd(a, b, c);
38 __m128 test_mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {
39 // CHECK-LABEL: test_mm_fmsub_ps
40 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
41 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
42 return _mm_fmsub_ps(a, b, c);
45 __m128d test_mm_fmsub_pd(__m128d a, __m128d b, __m128d c) {
46 // CHECK-LABEL: test_mm_fmsub_pd
47 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
48 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
49 return _mm_fmsub_pd(a, b, c);
52 __m128 test_mm_fmsub_ss(__m128 a, __m128 b, __m128 c) {
53 // CHECK-LABEL: test_mm_fmsub_ss
54 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
55 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
56 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
57 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
58 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
59 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
60 return _mm_fmsub_ss(a, b, c);
63 __m128d test_mm_fmsub_sd(__m128d a, __m128d b, __m128d c) {
64 // CHECK-LABEL: test_mm_fmsub_sd
65 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
66 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
67 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
68 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
69 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
70 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
71 return _mm_fmsub_sd(a, b, c);
74 __m128 test_mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {
75 // CHECK-LABEL: test_mm_fnmadd_ps
76 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
77 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
78 return _mm_fnmadd_ps(a, b, c);
81 __m128d test_mm_fnmadd_pd(__m128d a, __m128d b, __m128d c) {
82 // CHECK-LABEL: test_mm_fnmadd_pd
83 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
84 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
85 return _mm_fnmadd_pd(a, b, c);
88 __m128 test_mm_fnmadd_ss(__m128 a, __m128 b, __m128 c) {
89 // CHECK-LABEL: test_mm_fnmadd_ss
90 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
91 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
92 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
93 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
94 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
95 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
96 return _mm_fnmadd_ss(a, b, c);
99 __m128d test_mm_fnmadd_sd(__m128d a, __m128d b, __m128d c) {
100 // CHECK-LABEL: test_mm_fnmadd_sd
101 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
102 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
103 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
104 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
105 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
106 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
107 return _mm_fnmadd_sd(a, b, c);
110 __m128 test_mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) {
111 // CHECK-LABEL: test_mm_fnmsub_ps
112 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
113 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
114 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
115 return _mm_fnmsub_ps(a, b, c);
118 __m128d test_mm_fnmsub_pd(__m128d a, __m128d b, __m128d c) {
119 // CHECK-LABEL: test_mm_fnmsub_pd
120 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
121 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
122 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
123 return _mm_fnmsub_pd(a, b, c);
126 __m128 test_mm_fnmsub_ss(__m128 a, __m128 b, __m128 c) {
127 // CHECK-LABEL: test_mm_fnmsub_ss
128 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
129 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
130 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
131 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
132 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
133 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
134 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
135 return _mm_fnmsub_ss(a, b, c);
138 __m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) {
139 // CHECK-LABEL: test_mm_fnmsub_sd
140 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
141 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
142 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
143 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
144 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
145 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
146 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
147 return _mm_fnmsub_sd(a, b, c);
150 __m128 test_mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) {
151 // CHECK-LABEL: test_mm_fmaddsub_ps
152 // CHECK-NOT: fneg
153 // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
154 return _mm_fmaddsub_ps(a, b, c);
157 __m128d test_mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c) {
158 // CHECK-LABEL: test_mm_fmaddsub_pd
159 // CHECK-NOT: fneg
160 // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
161 return _mm_fmaddsub_pd(a, b, c);
164 __m128 test_mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c) {
165 // CHECK-LABEL: test_mm_fmsubadd_ps
166 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
167 // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
168 return _mm_fmsubadd_ps(a, b, c);
171 __m128d test_mm_fmsubadd_pd(__m128d a, __m128d b, __m128d c) {
172 // CHECK-LABEL: test_mm_fmsubadd_pd
173 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
174 // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
175 return _mm_fmsubadd_pd(a, b, c);
178 __m256 test_mm256_fmadd_ps(__m256 a, __m256 b, __m256 c) {
179 // CHECK-LABEL: test_mm256_fmadd_ps
180 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
181 return _mm256_fmadd_ps(a, b, c);
184 __m256d test_mm256_fmadd_pd(__m256d a, __m256d b, __m256d c) {
185 // CHECK-LABEL: test_mm256_fmadd_pd
186 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
187 return _mm256_fmadd_pd(a, b, c);
190 __m256 test_mm256_fmsub_ps(__m256 a, __m256 b, __m256 c) {
191 // CHECK-LABEL: test_mm256_fmsub_ps
192 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
193 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
194 return _mm256_fmsub_ps(a, b, c);
197 __m256d test_mm256_fmsub_pd(__m256d a, __m256d b, __m256d c) {
198 // CHECK-LABEL: test_mm256_fmsub_pd
199 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
200 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
201 return _mm256_fmsub_pd(a, b, c);
204 __m256 test_mm256_fnmadd_ps(__m256 a, __m256 b, __m256 c) {
205 // CHECK-LABEL: test_mm256_fnmadd_ps
206 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
207 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
208 return _mm256_fnmadd_ps(a, b, c);
211 __m256d test_mm256_fnmadd_pd(__m256d a, __m256d b, __m256d c) {
212 // CHECK-LABEL: test_mm256_fnmadd_pd
213 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
214 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
215 return _mm256_fnmadd_pd(a, b, c);
218 __m256 test_mm256_fnmsub_ps(__m256 a, __m256 b, __m256 c) {
219 // CHECK-LABEL: test_mm256_fnmsub_ps
220 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
221 // CHECK: [[NEG2:%.+]] = fneg <8 x float> %{{.*}}
222 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
223 return _mm256_fnmsub_ps(a, b, c);
226 __m256d test_mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c) {
227 // CHECK-LABEL: test_mm256_fnmsub_pd
228 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
229 // CHECK: [[NEG2:%.+]] = fneg <4 x double> %{{.+}}
230 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
231 return _mm256_fnmsub_pd(a, b, c);
234 __m256 test_mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c) {
235 // CHECK-LABEL: test_mm256_fmaddsub_ps
236 // CHECK-NOT: fneg
237 // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
238 return _mm256_fmaddsub_ps(a, b, c);
241 __m256d test_mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c) {
242 // CHECK-LABEL: test_mm256_fmaddsub_pd
243 // CHECK-NOT: fneg
244 // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
245 return _mm256_fmaddsub_pd(a, b, c);
248 __m256 test_mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c) {
249 // CHECK-LABEL: test_mm256_fmsubadd_ps
250 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.+}}
251 // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
252 return _mm256_fmsubadd_ps(a, b, c);
255 __m256d test_mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c) {
256 // CHECK-LABEL: test_mm256_fmsubadd_pd
257 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
258 // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
259 return _mm256_fmsubadd_pd(a, b, c);