[clang] Implement lifetime analysis for lifetime_capture_by(X) (#115921)
[llvm-project.git] / clang / test / CodeGen / X86 / fma-builtins.c
blobaa17dcc62fbc06757bffed570db70be9ac6ad53b
1 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s
2 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s
3 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s
4 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s
7 #include <immintrin.h>
9 __m128 test_mm_fmadd_ps(__m128 a, __m128 b, __m128 c) {
10 // CHECK-LABEL: test_mm_fmadd_ps
11 // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
12 return _mm_fmadd_ps(a, b, c);
15 __m128d test_mm_fmadd_pd(__m128d a, __m128d b, __m128d c) {
16 // CHECK-LABEL: test_mm_fmadd_pd
17 // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
18 return _mm_fmadd_pd(a, b, c);
21 __m128 test_mm_fmadd_ss(__m128 a, __m128 b, __m128 c) {
22 // CHECK-LABEL: test_mm_fmadd_ss
23 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
24 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
25 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
26 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
27 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
28 return _mm_fmadd_ss(a, b, c);
31 __m128d test_mm_fmadd_sd(__m128d a, __m128d b, __m128d c) {
32 // CHECK-LABEL: test_mm_fmadd_sd
33 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
34 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
35 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
36 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
37 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
38 return _mm_fmadd_sd(a, b, c);
41 __m128 test_mm_fmsub_ps(__m128 a, __m128 b, __m128 c) {
42 // CHECK-LABEL: test_mm_fmsub_ps
43 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
44 // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
45 return _mm_fmsub_ps(a, b, c);
48 __m128d test_mm_fmsub_pd(__m128d a, __m128d b, __m128d c) {
49 // CHECK-LABEL: test_mm_fmsub_pd
50 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
51 // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
52 return _mm_fmsub_pd(a, b, c);
55 __m128 test_mm_fmsub_ss(__m128 a, __m128 b, __m128 c) {
56 // CHECK-LABEL: test_mm_fmsub_ss
57 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
58 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
59 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
60 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
61 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
62 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
63 return _mm_fmsub_ss(a, b, c);
66 __m128d test_mm_fmsub_sd(__m128d a, __m128d b, __m128d c) {
67 // CHECK-LABEL: test_mm_fmsub_sd
68 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
69 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
70 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
71 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
72 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
73 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
74 return _mm_fmsub_sd(a, b, c);
77 __m128 test_mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) {
78 // CHECK-LABEL: test_mm_fnmadd_ps
79 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
80 // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
81 return _mm_fnmadd_ps(a, b, c);
84 __m128d test_mm_fnmadd_pd(__m128d a, __m128d b, __m128d c) {
85 // CHECK-LABEL: test_mm_fnmadd_pd
86 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
87 // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
88 return _mm_fnmadd_pd(a, b, c);
91 __m128 test_mm_fnmadd_ss(__m128 a, __m128 b, __m128 c) {
92 // CHECK-LABEL: test_mm_fnmadd_ss
93 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
94 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
95 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
96 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
97 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
98 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
99 return _mm_fnmadd_ss(a, b, c);
102 __m128d test_mm_fnmadd_sd(__m128d a, __m128d b, __m128d c) {
103 // CHECK-LABEL: test_mm_fnmadd_sd
104 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
105 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
106 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
107 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
108 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
109 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
110 return _mm_fnmadd_sd(a, b, c);
113 __m128 test_mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) {
114 // CHECK-LABEL: test_mm_fnmsub_ps
115 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
116 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
117 // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
118 return _mm_fnmsub_ps(a, b, c);
121 __m128d test_mm_fnmsub_pd(__m128d a, __m128d b, __m128d c) {
122 // CHECK-LABEL: test_mm_fnmsub_pd
123 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
124 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
125 // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
126 return _mm_fnmsub_pd(a, b, c);
129 __m128 test_mm_fnmsub_ss(__m128 a, __m128 b, __m128 c) {
130 // CHECK-LABEL: test_mm_fnmsub_ss
131 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
132 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
133 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
134 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
135 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
136 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
137 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
138 return _mm_fnmsub_ss(a, b, c);
141 __m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) {
142 // CHECK-LABEL: test_mm_fnmsub_sd
143 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
144 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
145 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
146 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
147 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
148 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
149 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
150 return _mm_fnmsub_sd(a, b, c);
153 __m128 test_mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) {
154 // CHECK-LABEL: test_mm_fmaddsub_ps
155 // CHECK-NOT: fneg
156 // CHECK: call {{.*}}<4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
157 return _mm_fmaddsub_ps(a, b, c);
160 __m128d test_mm_fmaddsub_pd(__m128d a, __m128d b, __m128d c) {
161 // CHECK-LABEL: test_mm_fmaddsub_pd
162 // CHECK-NOT: fneg
163 // CHECK: call {{.*}}<2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
164 return _mm_fmaddsub_pd(a, b, c);
167 __m128 test_mm_fmsubadd_ps(__m128 a, __m128 b, __m128 c) {
168 // CHECK-LABEL: test_mm_fmsubadd_ps
169 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
170 // CHECK: call {{.*}}<4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
171 return _mm_fmsubadd_ps(a, b, c);
174 __m128d test_mm_fmsubadd_pd(__m128d a, __m128d b, __m128d c) {
175 // CHECK-LABEL: test_mm_fmsubadd_pd
176 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
177 // CHECK: call {{.*}}<2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
178 return _mm_fmsubadd_pd(a, b, c);
181 __m256 test_mm256_fmadd_ps(__m256 a, __m256 b, __m256 c) {
182 // CHECK-LABEL: test_mm256_fmadd_ps
183 // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
184 return _mm256_fmadd_ps(a, b, c);
187 __m256d test_mm256_fmadd_pd(__m256d a, __m256d b, __m256d c) {
188 // CHECK-LABEL: test_mm256_fmadd_pd
189 // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
190 return _mm256_fmadd_pd(a, b, c);
193 __m256 test_mm256_fmsub_ps(__m256 a, __m256 b, __m256 c) {
194 // CHECK-LABEL: test_mm256_fmsub_ps
195 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
196 // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
197 return _mm256_fmsub_ps(a, b, c);
200 __m256d test_mm256_fmsub_pd(__m256d a, __m256d b, __m256d c) {
201 // CHECK-LABEL: test_mm256_fmsub_pd
202 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
203 // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
204 return _mm256_fmsub_pd(a, b, c);
207 __m256 test_mm256_fnmadd_ps(__m256 a, __m256 b, __m256 c) {
208 // CHECK-LABEL: test_mm256_fnmadd_ps
209 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
210 // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
211 return _mm256_fnmadd_ps(a, b, c);
214 __m256d test_mm256_fnmadd_pd(__m256d a, __m256d b, __m256d c) {
215 // CHECK-LABEL: test_mm256_fnmadd_pd
216 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
217 // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
218 return _mm256_fnmadd_pd(a, b, c);
221 __m256 test_mm256_fnmsub_ps(__m256 a, __m256 b, __m256 c) {
222 // CHECK-LABEL: test_mm256_fnmsub_ps
223 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
224 // CHECK: [[NEG2:%.+]] = fneg <8 x float> %{{.*}}
225 // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
226 return _mm256_fnmsub_ps(a, b, c);
229 __m256d test_mm256_fnmsub_pd(__m256d a, __m256d b, __m256d c) {
230 // CHECK-LABEL: test_mm256_fnmsub_pd
231 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
232 // CHECK: [[NEG2:%.+]] = fneg <4 x double> %{{.+}}
233 // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
234 return _mm256_fnmsub_pd(a, b, c);
237 __m256 test_mm256_fmaddsub_ps(__m256 a, __m256 b, __m256 c) {
238 // CHECK-LABEL: test_mm256_fmaddsub_ps
239 // CHECK-NOT: fneg
240 // CHECK: call {{.*}}<8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
241 return _mm256_fmaddsub_ps(a, b, c);
244 __m256d test_mm256_fmaddsub_pd(__m256d a, __m256d b, __m256d c) {
245 // CHECK-LABEL: test_mm256_fmaddsub_pd
246 // CHECK-NOT: fneg
247 // CHECK: call {{.*}}<4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
248 return _mm256_fmaddsub_pd(a, b, c);
251 __m256 test_mm256_fmsubadd_ps(__m256 a, __m256 b, __m256 c) {
252 // CHECK-LABEL: test_mm256_fmsubadd_ps
253 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.+}}
254 // CHECK: call {{.*}}<8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
255 return _mm256_fmsubadd_ps(a, b, c);
258 __m256d test_mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c) {
259 // CHECK-LABEL: test_mm256_fmsubadd_pd
260 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
261 // CHECK: call {{.*}}<4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
262 return _mm256_fmsubadd_pd(a, b, c);