1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s
6 __m128
test_mm_fmadd_ps(__m128 a
, __m128 b
, __m128 c
) {
7 // CHECK-LABEL: test_mm_fmadd_ps
8 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
9 return _mm_fmadd_ps(a
, b
, c
);
12 __m128d
test_mm_fmadd_pd(__m128d a
, __m128d b
, __m128d c
) {
13 // CHECK-LABEL: test_mm_fmadd_pd
14 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
15 return _mm_fmadd_pd(a
, b
, c
);
18 __m128
test_mm_fmadd_ss(__m128 a
, __m128 b
, __m128 c
) {
19 // CHECK-LABEL: test_mm_fmadd_ss
20 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
21 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
22 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
23 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
24 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
25 return _mm_fmadd_ss(a
, b
, c
);
28 __m128d
test_mm_fmadd_sd(__m128d a
, __m128d b
, __m128d c
) {
29 // CHECK-LABEL: test_mm_fmadd_sd
30 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
31 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
32 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
33 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
34 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
35 return _mm_fmadd_sd(a
, b
, c
);
38 __m128
test_mm_fmsub_ps(__m128 a
, __m128 b
, __m128 c
) {
39 // CHECK-LABEL: test_mm_fmsub_ps
40 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
41 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
42 return _mm_fmsub_ps(a
, b
, c
);
45 __m128d
test_mm_fmsub_pd(__m128d a
, __m128d b
, __m128d c
) {
46 // CHECK-LABEL: test_mm_fmsub_pd
47 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
48 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
49 return _mm_fmsub_pd(a
, b
, c
);
52 __m128
test_mm_fmsub_ss(__m128 a
, __m128 b
, __m128 c
) {
53 // CHECK-LABEL: test_mm_fmsub_ss
54 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
55 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
56 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
57 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
58 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
59 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
60 return _mm_fmsub_ss(a
, b
, c
);
63 __m128d
test_mm_fmsub_sd(__m128d a
, __m128d b
, __m128d c
) {
64 // CHECK-LABEL: test_mm_fmsub_sd
65 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
66 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
67 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
68 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
69 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
70 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
71 return _mm_fmsub_sd(a
, b
, c
);
74 __m128
test_mm_fnmadd_ps(__m128 a
, __m128 b
, __m128 c
) {
75 // CHECK-LABEL: test_mm_fnmadd_ps
76 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
77 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
78 return _mm_fnmadd_ps(a
, b
, c
);
81 __m128d
test_mm_fnmadd_pd(__m128d a
, __m128d b
, __m128d c
) {
82 // CHECK-LABEL: test_mm_fnmadd_pd
83 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
84 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
85 return _mm_fnmadd_pd(a
, b
, c
);
88 __m128
test_mm_fnmadd_ss(__m128 a
, __m128 b
, __m128 c
) {
89 // CHECK-LABEL: test_mm_fnmadd_ss
90 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
91 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
92 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
93 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
94 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
95 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
96 return _mm_fnmadd_ss(a
, b
, c
);
99 __m128d
test_mm_fnmadd_sd(__m128d a
, __m128d b
, __m128d c
) {
100 // CHECK-LABEL: test_mm_fnmadd_sd
101 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
102 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
103 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
104 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
105 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
106 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
107 return _mm_fnmadd_sd(a
, b
, c
);
110 __m128
test_mm_fnmsub_ps(__m128 a
, __m128 b
, __m128 c
) {
111 // CHECK-LABEL: test_mm_fnmsub_ps
112 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
113 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
114 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
115 return _mm_fnmsub_ps(a
, b
, c
);
118 __m128d
test_mm_fnmsub_pd(__m128d a
, __m128d b
, __m128d c
) {
119 // CHECK-LABEL: test_mm_fnmsub_pd
120 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
121 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
122 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
123 return _mm_fnmsub_pd(a
, b
, c
);
126 __m128
test_mm_fnmsub_ss(__m128 a
, __m128 b
, __m128 c
) {
127 // CHECK-LABEL: test_mm_fnmsub_ss
128 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
129 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
130 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
131 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
132 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
133 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
134 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
135 return _mm_fnmsub_ss(a
, b
, c
);
138 __m128d
test_mm_fnmsub_sd(__m128d a
, __m128d b
, __m128d c
) {
139 // CHECK-LABEL: test_mm_fnmsub_sd
140 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
141 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
142 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
143 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
144 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
145 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
146 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
147 return _mm_fnmsub_sd(a
, b
, c
);
150 __m128
test_mm_fmaddsub_ps(__m128 a
, __m128 b
, __m128 c
) {
151 // CHECK-LABEL: test_mm_fmaddsub_ps
153 // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
154 return _mm_fmaddsub_ps(a
, b
, c
);
157 __m128d
test_mm_fmaddsub_pd(__m128d a
, __m128d b
, __m128d c
) {
158 // CHECK-LABEL: test_mm_fmaddsub_pd
160 // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
161 return _mm_fmaddsub_pd(a
, b
, c
);
164 __m128
test_mm_fmsubadd_ps(__m128 a
, __m128 b
, __m128 c
) {
165 // CHECK-LABEL: test_mm_fmsubadd_ps
166 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
167 // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
168 return _mm_fmsubadd_ps(a
, b
, c
);
171 __m128d
test_mm_fmsubadd_pd(__m128d a
, __m128d b
, __m128d c
) {
172 // CHECK-LABEL: test_mm_fmsubadd_pd
173 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
174 // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
175 return _mm_fmsubadd_pd(a
, b
, c
);
178 __m256
test_mm256_fmadd_ps(__m256 a
, __m256 b
, __m256 c
) {
179 // CHECK-LABEL: test_mm256_fmadd_ps
180 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
181 return _mm256_fmadd_ps(a
, b
, c
);
184 __m256d
test_mm256_fmadd_pd(__m256d a
, __m256d b
, __m256d c
) {
185 // CHECK-LABEL: test_mm256_fmadd_pd
186 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
187 return _mm256_fmadd_pd(a
, b
, c
);
190 __m256
test_mm256_fmsub_ps(__m256 a
, __m256 b
, __m256 c
) {
191 // CHECK-LABEL: test_mm256_fmsub_ps
192 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
193 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
194 return _mm256_fmsub_ps(a
, b
, c
);
197 __m256d
test_mm256_fmsub_pd(__m256d a
, __m256d b
, __m256d c
) {
198 // CHECK-LABEL: test_mm256_fmsub_pd
199 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
200 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
201 return _mm256_fmsub_pd(a
, b
, c
);
204 __m256
test_mm256_fnmadd_ps(__m256 a
, __m256 b
, __m256 c
) {
205 // CHECK-LABEL: test_mm256_fnmadd_ps
206 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
207 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
208 return _mm256_fnmadd_ps(a
, b
, c
);
211 __m256d
test_mm256_fnmadd_pd(__m256d a
, __m256d b
, __m256d c
) {
212 // CHECK-LABEL: test_mm256_fnmadd_pd
213 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
214 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
215 return _mm256_fnmadd_pd(a
, b
, c
);
218 __m256
test_mm256_fnmsub_ps(__m256 a
, __m256 b
, __m256 c
) {
219 // CHECK-LABEL: test_mm256_fnmsub_ps
220 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
221 // CHECK: [[NEG2:%.+]] = fneg <8 x float> %{{.*}}
222 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
223 return _mm256_fnmsub_ps(a
, b
, c
);
226 __m256d
test_mm256_fnmsub_pd(__m256d a
, __m256d b
, __m256d c
) {
227 // CHECK-LABEL: test_mm256_fnmsub_pd
228 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
229 // CHECK: [[NEG2:%.+]] = fneg <4 x double> %{{.+}}
230 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
231 return _mm256_fnmsub_pd(a
, b
, c
);
234 __m256
test_mm256_fmaddsub_ps(__m256 a
, __m256 b
, __m256 c
) {
235 // CHECK-LABEL: test_mm256_fmaddsub_ps
237 // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
238 return _mm256_fmaddsub_ps(a
, b
, c
);
241 __m256d
test_mm256_fmaddsub_pd(__m256d a
, __m256d b
, __m256d c
) {
242 // CHECK-LABEL: test_mm256_fmaddsub_pd
244 // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
245 return _mm256_fmaddsub_pd(a
, b
, c
);
248 __m256
test_mm256_fmsubadd_ps(__m256 a
, __m256 b
, __m256 c
) {
249 // CHECK-LABEL: test_mm256_fmsubadd_ps
250 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.+}}
251 // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
252 return _mm256_fmsubadd_ps(a
, b
, c
);
255 __m256d
test_mm256_fmsubadd_pd(__m256d a
, __m256d b
, __m256d c
) {
256 // CHECK-LABEL: test_mm256_fmsubadd_pd
257 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
258 // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
259 return _mm256_fmsubadd_pd(a
, b
, c
);