1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s
2 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s
7 __m128
test_mm_macc_ps(__m128 a
, __m128 b
, __m128 c
) {
8 // CHECK-LABEL: test_mm_macc_ps
9 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
10 return _mm_macc_ps(a
, b
, c
);
13 __m128d
test_mm_macc_pd(__m128d a
, __m128d b
, __m128d c
) {
14 // CHECK-LABEL: test_mm_macc_pd
15 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
16 return _mm_macc_pd(a
, b
, c
);
19 __m128
test_mm_macc_ss(__m128 a
, __m128 b
, __m128 c
) {
20 // CHECK-LABEL: test_mm_macc_ss
21 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
22 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
23 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
24 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
25 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
26 return _mm_macc_ss(a
, b
, c
);
29 __m128d
test_mm_macc_sd(__m128d a
, __m128d b
, __m128d c
) {
30 // CHECK-LABEL: test_mm_macc_sd
31 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
32 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
33 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
34 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
35 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
36 return _mm_macc_sd(a
, b
, c
);
39 __m128
test_mm_msub_ps(__m128 a
, __m128 b
, __m128 c
) {
40 // CHECK-LABEL: test_mm_msub_ps
41 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
42 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
43 return _mm_msub_ps(a
, b
, c
);
46 __m128d
test_mm_msub_pd(__m128d a
, __m128d b
, __m128d c
) {
47 // CHECK-LABEL: test_mm_msub_pd
48 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
49 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
50 return _mm_msub_pd(a
, b
, c
);
53 __m128
test_mm_msub_ss(__m128 a
, __m128 b
, __m128 c
) {
54 // CHECK-LABEL: test_mm_msub_ss
55 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
56 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
57 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
58 // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
59 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float [[C]])
60 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
61 return _mm_msub_ss(a
, b
, c
);
64 __m128d
test_mm_msub_sd(__m128d a
, __m128d b
, __m128d c
) {
65 // CHECK-LABEL: test_mm_msub_sd
66 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
67 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
68 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
69 // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
70 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double [[C]])
71 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
72 return _mm_msub_sd(a
, b
, c
);
75 __m128
test_mm_nmacc_ps(__m128 a
, __m128 b
, __m128 c
) {
76 // CHECK-LABEL: test_mm_nmacc_ps
77 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
78 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
79 return _mm_nmacc_ps(a
, b
, c
);
82 __m128d
test_mm_nmacc_pd(__m128d a
, __m128d b
, __m128d c
) {
83 // CHECK-LABEL: test_mm_nmacc_pd
84 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
85 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
86 return _mm_nmacc_pd(a
, b
, c
);
89 __m128
test_mm_nmacc_ss(__m128 a
, __m128 b
, __m128 c
) {
90 // CHECK-LABEL: test_mm_nmacc_ss
91 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
92 // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0
93 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
94 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
95 // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float %{{.*}})
96 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
97 return _mm_nmacc_ss(a
, b
, c
);
100 __m128d
test_mm_nmacc_sd(__m128d a
, __m128d b
, __m128d c
) {
101 // CHECK-LABEL: test_mm_nmacc_sd
102 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
103 // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0
104 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
105 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
106 // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double %{{.*}})
107 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
108 return _mm_nmacc_sd(a
, b
, c
);
111 __m128
test_mm_nmsub_ps(__m128 a
, __m128 b
, __m128 c
) {
112 // CHECK-LABEL: test_mm_nmsub_ps
113 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
114 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
115 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
116 return _mm_nmsub_ps(a
, b
, c
);
119 __m128d
test_mm_nmsub_pd(__m128d a
, __m128d b
, __m128d c
) {
120 // CHECK-LABEL: test_mm_nmsub_pd
121 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
122 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
123 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
124 return _mm_nmsub_pd(a
, b
, c
);
127 __m128
test_mm_nmsub_ss(__m128 a
, __m128 b
, __m128 c
) {
128 // CHECK-LABEL: test_mm_nmsub_ss
129 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
130 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
131 // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0
132 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
133 // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
134 // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float [[C]])
135 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
136 return _mm_nmsub_ss(a
, b
, c
);
139 __m128d
test_mm_nmsub_sd(__m128d a
, __m128d b
, __m128d c
) {
140 // CHECK-LABEL: test_mm_nmsub_sd
141 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
142 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
143 // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0
144 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
145 // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
146 // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double [[C]])
147 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
148 return _mm_nmsub_sd(a
, b
, c
);
151 __m128
test_mm_maddsub_ps(__m128 a
, __m128 b
, __m128 c
) {
152 // CHECK-LABEL: test_mm_maddsub_ps
154 // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
155 return _mm_maddsub_ps(a
, b
, c
);
158 __m128d
test_mm_maddsub_pd(__m128d a
, __m128d b
, __m128d c
) {
159 // CHECK-LABEL: test_mm_maddsub_pd
161 // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
162 return _mm_maddsub_pd(a
, b
, c
);
165 __m128
test_mm_msubadd_ps(__m128 a
, __m128 b
, __m128 c
) {
166 // CHECK-LABEL: test_mm_msubadd_ps
167 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
168 // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
169 return _mm_msubadd_ps(a
, b
, c
);
172 __m128d
test_mm_msubadd_pd(__m128d a
, __m128d b
, __m128d c
) {
173 // CHECK-LABEL: test_mm_msubadd_pd
174 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
175 // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
176 return _mm_msubadd_pd(a
, b
, c
);
179 __m256
test_mm256_macc_ps(__m256 a
, __m256 b
, __m256 c
) {
180 // CHECK-LABEL: test_mm256_macc_ps
181 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
182 return _mm256_macc_ps(a
, b
, c
);
185 __m256d
test_mm256_macc_pd(__m256d a
, __m256d b
, __m256d c
) {
186 // CHECK-LABEL: test_mm256_macc_pd
187 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
188 return _mm256_macc_pd(a
, b
, c
);
191 __m256
test_mm256_msub_ps(__m256 a
, __m256 b
, __m256 c
) {
192 // CHECK-LABEL: test_mm256_msub_ps
193 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
194 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
195 return _mm256_msub_ps(a
, b
, c
);
198 __m256d
test_mm256_msub_pd(__m256d a
, __m256d b
, __m256d c
) {
199 // CHECK-LABEL: test_mm256_msub_pd
200 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
201 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
202 return _mm256_msub_pd(a
, b
, c
);
205 __m256
test_mm256_nmacc_ps(__m256 a
, __m256 b
, __m256 c
) {
206 // CHECK-LABEL: test_mm256_nmacc_ps
207 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
208 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
209 return _mm256_nmacc_ps(a
, b
, c
);
212 __m256d
test_mm256_nmacc_pd(__m256d a
, __m256d b
, __m256d c
) {
213 // CHECK-LABEL: test_mm256_nmacc_pd
214 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
215 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
216 return _mm256_nmacc_pd(a
, b
, c
);
219 __m256
test_mm256_nmsub_ps(__m256 a
, __m256 b
, __m256 c
) {
220 // CHECK-LABEL: test_mm256_nmsub_ps
221 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
222 // CHECK: [[NEG2:%.+]] = fneg <8 x float> %{{.*}}
223 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
224 return _mm256_nmsub_ps(a
, b
, c
);
227 __m256d
test_mm256_nmsub_pd(__m256d a
, __m256d b
, __m256d c
) {
228 // CHECK-LABEL: test_mm256_nmsub_pd
229 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
230 // CHECK: [[NEG2:%.+]] = fneg <4 x double> %{{.+}}
231 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
232 return _mm256_nmsub_pd(a
, b
, c
);
235 __m256
test_mm256_maddsub_ps(__m256 a
, __m256 b
, __m256 c
) {
236 // CHECK-LABEL: test_mm256_maddsub_ps
238 // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
239 return _mm256_maddsub_ps(a
, b
, c
);
242 __m256d
test_mm256_maddsub_pd(__m256d a
, __m256d b
, __m256d c
) {
243 // CHECK-LABEL: test_mm256_maddsub_pd
245 // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
246 return _mm256_maddsub_pd(a
, b
, c
);
249 __m256
test_mm256_msubadd_ps(__m256 a
, __m256 b
, __m256 c
) {
250 // CHECK-LABEL: test_mm256_msubadd_ps
251 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
252 // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
253 return _mm256_msubadd_ps(a
, b
, c
);
256 __m256d
test_mm256_msubadd_pd(__m256d a
, __m256d b
, __m256d c
) {
257 // CHECK-LABEL: test_mm256_msubadd_pd
258 // CHECK: [[NEG:%.+]] = fneg <4 x double> {{.+}}
259 // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
260 return _mm256_msubadd_pd(a
, b
, c
);