1 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s
2 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s
3 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s
4 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s
9 __m128
test_mm_macc_ps(__m128 a
, __m128 b
, __m128 c
) {
10 // CHECK-LABEL: test_mm_macc_ps
11 // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
12 return _mm_macc_ps(a
, b
, c
);
15 __m128d
test_mm_macc_pd(__m128d a
, __m128d b
, __m128d c
) {
16 // CHECK-LABEL: test_mm_macc_pd
17 // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
18 return _mm_macc_pd(a
, b
, c
);
21 __m128
test_mm_macc_ss(__m128 a
, __m128 b
, __m128 c
) {
22 // CHECK-LABEL: test_mm_macc_ss
23 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
24 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
25 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
26 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
27 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
28 return _mm_macc_ss(a
, b
, c
);
31 __m128d
test_mm_macc_sd(__m128d a
, __m128d b
, __m128d c
) {
32 // CHECK-LABEL: test_mm_macc_sd
33 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
34 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
35 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
36 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
37 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
38 return _mm_macc_sd(a
, b
, c
);
41 __m128
test_mm_msub_ps(__m128 a
, __m128 b
, __m128 c
) {
42 // CHECK-LABEL: test_mm_msub_ps
43 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
44 // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
45 return _mm_msub_ps(a
, b
, c
);
48 __m128d
test_mm_msub_pd(__m128d a
, __m128d b
, __m128d c
) {
49 // CHECK-LABEL: test_mm_msub_pd
50 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
51 // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
52 return _mm_msub_pd(a
, b
, c
);
55 __m128
test_mm_msub_ss(__m128 a
, __m128 b
, __m128 c
) {
56 // CHECK-LABEL: test_mm_msub_ss
57 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
58 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
59 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
60 // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
61 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float [[C]])
62 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
63 return _mm_msub_ss(a
, b
, c
);
66 __m128d
test_mm_msub_sd(__m128d a
, __m128d b
, __m128d c
) {
67 // CHECK-LABEL: test_mm_msub_sd
68 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
69 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
70 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
71 // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
72 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double [[C]])
73 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
74 return _mm_msub_sd(a
, b
, c
);
77 __m128
test_mm_nmacc_ps(__m128 a
, __m128 b
, __m128 c
) {
78 // CHECK-LABEL: test_mm_nmacc_ps
79 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
80 // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
81 return _mm_nmacc_ps(a
, b
, c
);
84 __m128d
test_mm_nmacc_pd(__m128d a
, __m128d b
, __m128d c
) {
85 // CHECK-LABEL: test_mm_nmacc_pd
86 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
87 // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
88 return _mm_nmacc_pd(a
, b
, c
);
91 __m128
test_mm_nmacc_ss(__m128 a
, __m128 b
, __m128 c
) {
92 // CHECK-LABEL: test_mm_nmacc_ss
93 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
94 // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0
95 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
96 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
97 // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float %{{.*}})
98 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
99 return _mm_nmacc_ss(a
, b
, c
);
102 __m128d
test_mm_nmacc_sd(__m128d a
, __m128d b
, __m128d c
) {
103 // CHECK-LABEL: test_mm_nmacc_sd
104 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
105 // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0
106 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
107 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
108 // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double %{{.*}})
109 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
110 return _mm_nmacc_sd(a
, b
, c
);
113 __m128
test_mm_nmsub_ps(__m128 a
, __m128 b
, __m128 c
) {
114 // CHECK-LABEL: test_mm_nmsub_ps
115 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
116 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
117 // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
118 return _mm_nmsub_ps(a
, b
, c
);
121 __m128d
test_mm_nmsub_pd(__m128d a
, __m128d b
, __m128d c
) {
122 // CHECK-LABEL: test_mm_nmsub_pd
123 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
124 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
125 // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
126 return _mm_nmsub_pd(a
, b
, c
);
129 __m128
test_mm_nmsub_ss(__m128 a
, __m128 b
, __m128 c
) {
130 // CHECK-LABEL: test_mm_nmsub_ss
131 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
132 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
133 // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0
134 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
135 // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
136 // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float [[C]])
137 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
138 return _mm_nmsub_ss(a
, b
, c
);
141 __m128d
test_mm_nmsub_sd(__m128d a
, __m128d b
, __m128d c
) {
142 // CHECK-LABEL: test_mm_nmsub_sd
143 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
144 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
145 // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0
146 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
147 // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
148 // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double [[C]])
149 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
150 return _mm_nmsub_sd(a
, b
, c
);
153 __m128
test_mm_maddsub_ps(__m128 a
, __m128 b
, __m128 c
) {
154 // CHECK-LABEL: test_mm_maddsub_ps
156 // CHECK: call {{.*}}<4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
157 return _mm_maddsub_ps(a
, b
, c
);
160 __m128d
test_mm_maddsub_pd(__m128d a
, __m128d b
, __m128d c
) {
161 // CHECK-LABEL: test_mm_maddsub_pd
163 // CHECK: call {{.*}}<2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
164 return _mm_maddsub_pd(a
, b
, c
);
167 __m128
test_mm_msubadd_ps(__m128 a
, __m128 b
, __m128 c
) {
168 // CHECK-LABEL: test_mm_msubadd_ps
169 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
170 // CHECK: call {{.*}}<4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
171 return _mm_msubadd_ps(a
, b
, c
);
174 __m128d
test_mm_msubadd_pd(__m128d a
, __m128d b
, __m128d c
) {
175 // CHECK-LABEL: test_mm_msubadd_pd
176 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
177 // CHECK: call {{.*}}<2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
178 return _mm_msubadd_pd(a
, b
, c
);
181 __m256
test_mm256_macc_ps(__m256 a
, __m256 b
, __m256 c
) {
182 // CHECK-LABEL: test_mm256_macc_ps
183 // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
184 return _mm256_macc_ps(a
, b
, c
);
187 __m256d
test_mm256_macc_pd(__m256d a
, __m256d b
, __m256d c
) {
188 // CHECK-LABEL: test_mm256_macc_pd
189 // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
190 return _mm256_macc_pd(a
, b
, c
);
193 __m256
test_mm256_msub_ps(__m256 a
, __m256 b
, __m256 c
) {
194 // CHECK-LABEL: test_mm256_msub_ps
195 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
196 // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
197 return _mm256_msub_ps(a
, b
, c
);
200 __m256d
test_mm256_msub_pd(__m256d a
, __m256d b
, __m256d c
) {
201 // CHECK-LABEL: test_mm256_msub_pd
202 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
203 // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
204 return _mm256_msub_pd(a
, b
, c
);
207 __m256
test_mm256_nmacc_ps(__m256 a
, __m256 b
, __m256 c
) {
208 // CHECK-LABEL: test_mm256_nmacc_ps
209 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
210 // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
211 return _mm256_nmacc_ps(a
, b
, c
);
214 __m256d
test_mm256_nmacc_pd(__m256d a
, __m256d b
, __m256d c
) {
215 // CHECK-LABEL: test_mm256_nmacc_pd
216 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
217 // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
218 return _mm256_nmacc_pd(a
, b
, c
);
221 __m256
test_mm256_nmsub_ps(__m256 a
, __m256 b
, __m256 c
) {
222 // CHECK-LABEL: test_mm256_nmsub_ps
223 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
224 // CHECK: [[NEG2:%.+]] = fneg <8 x float> %{{.*}}
225 // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
226 return _mm256_nmsub_ps(a
, b
, c
);
229 __m256d
test_mm256_nmsub_pd(__m256d a
, __m256d b
, __m256d c
) {
230 // CHECK-LABEL: test_mm256_nmsub_pd
231 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
232 // CHECK: [[NEG2:%.+]] = fneg <4 x double> %{{.+}}
233 // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
234 return _mm256_nmsub_pd(a
, b
, c
);
237 __m256
test_mm256_maddsub_ps(__m256 a
, __m256 b
, __m256 c
) {
238 // CHECK-LABEL: test_mm256_maddsub_ps
240 // CHECK: call {{.*}}<8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
241 return _mm256_maddsub_ps(a
, b
, c
);
244 __m256d
test_mm256_maddsub_pd(__m256d a
, __m256d b
, __m256d c
) {
245 // CHECK-LABEL: test_mm256_maddsub_pd
247 // CHECK: call {{.*}}<4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
248 return _mm256_maddsub_pd(a
, b
, c
);
251 __m256
test_mm256_msubadd_ps(__m256 a
, __m256 b
, __m256 c
) {
252 // CHECK-LABEL: test_mm256_msubadd_ps
253 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
254 // CHECK: call {{.*}}<8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
255 return _mm256_msubadd_ps(a
, b
, c
);
258 __m256d
test_mm256_msubadd_pd(__m256d a
, __m256d b
, __m256d c
) {
259 // CHECK-LABEL: test_mm256_msubadd_pd
260 // CHECK: [[NEG:%.+]] = fneg <4 x double> {{.+}}
261 // CHECK: call {{.*}}<4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
262 return _mm256_msubadd_pd(a
, b
, c
);