[MLIR][TOSA] Update CustomOp input and output names (#118408)
[llvm-project.git] / clang / test / CodeGen / X86 / fma4-builtins.c
blobccdba8fea87b50b8f03afb58cab861df63b4b2fd
1 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s
2 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s
3 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s
4 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s
7 #include <x86intrin.h>
9 __m128 test_mm_macc_ps(__m128 a, __m128 b, __m128 c) {
10 // CHECK-LABEL: test_mm_macc_ps
11 // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
12 return _mm_macc_ps(a, b, c);
15 __m128d test_mm_macc_pd(__m128d a, __m128d b, __m128d c) {
16 // CHECK-LABEL: test_mm_macc_pd
17 // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
18 return _mm_macc_pd(a, b, c);
21 __m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) {
22 // CHECK-LABEL: test_mm_macc_ss
23 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
24 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
25 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
26 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
27 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
28 return _mm_macc_ss(a, b, c);
31 __m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) {
32 // CHECK-LABEL: test_mm_macc_sd
33 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
34 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
35 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
36 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
37 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
38 return _mm_macc_sd(a, b, c);
41 __m128 test_mm_msub_ps(__m128 a, __m128 b, __m128 c) {
42 // CHECK-LABEL: test_mm_msub_ps
43 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
44 // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
45 return _mm_msub_ps(a, b, c);
48 __m128d test_mm_msub_pd(__m128d a, __m128d b, __m128d c) {
49 // CHECK-LABEL: test_mm_msub_pd
50 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
51 // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
52 return _mm_msub_pd(a, b, c);
55 __m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) {
56 // CHECK-LABEL: test_mm_msub_ss
57 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
58 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
59 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
60 // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
61 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float [[C]])
62 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
63 return _mm_msub_ss(a, b, c);
66 __m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) {
67 // CHECK-LABEL: test_mm_msub_sd
68 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
69 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
70 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
71 // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
72 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double [[C]])
73 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
74 return _mm_msub_sd(a, b, c);
77 __m128 test_mm_nmacc_ps(__m128 a, __m128 b, __m128 c) {
78 // CHECK-LABEL: test_mm_nmacc_ps
79 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
80 // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
81 return _mm_nmacc_ps(a, b, c);
84 __m128d test_mm_nmacc_pd(__m128d a, __m128d b, __m128d c) {
85 // CHECK-LABEL: test_mm_nmacc_pd
86 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
87 // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
88 return _mm_nmacc_pd(a, b, c);
91 __m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) {
92 // CHECK-LABEL: test_mm_nmacc_ss
93 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
94 // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0
95 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
96 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
97 // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float %{{.*}})
98 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
99 return _mm_nmacc_ss(a, b, c);
102 __m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) {
103 // CHECK-LABEL: test_mm_nmacc_sd
104 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
105 // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0
106 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
107 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
108 // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double %{{.*}})
109 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
110 return _mm_nmacc_sd(a, b, c);
113 __m128 test_mm_nmsub_ps(__m128 a, __m128 b, __m128 c) {
114 // CHECK-LABEL: test_mm_nmsub_ps
115 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
116 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
117 // CHECK: call {{.*}}<4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
118 return _mm_nmsub_ps(a, b, c);
121 __m128d test_mm_nmsub_pd(__m128d a, __m128d b, __m128d c) {
122 // CHECK-LABEL: test_mm_nmsub_pd
123 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
124 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
125 // CHECK: call {{.*}}<2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
126 return _mm_nmsub_pd(a, b, c);
129 __m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) {
130 // CHECK-LABEL: test_mm_nmsub_ss
131 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
132 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
133 // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0
134 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
135 // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
136 // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float [[C]])
137 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
138 return _mm_nmsub_ss(a, b, c);
141 __m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) {
142 // CHECK-LABEL: test_mm_nmsub_sd
143 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
144 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
145 // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0
146 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
147 // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
148 // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double [[C]])
149 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
150 return _mm_nmsub_sd(a, b, c);
153 __m128 test_mm_maddsub_ps(__m128 a, __m128 b, __m128 c) {
154 // CHECK-LABEL: test_mm_maddsub_ps
155 // CHECK-NOT: fneg
156 // CHECK: call {{.*}}<4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
157 return _mm_maddsub_ps(a, b, c);
160 __m128d test_mm_maddsub_pd(__m128d a, __m128d b, __m128d c) {
161 // CHECK-LABEL: test_mm_maddsub_pd
162 // CHECK-NOT: fneg
163 // CHECK: call {{.*}}<2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
164 return _mm_maddsub_pd(a, b, c);
167 __m128 test_mm_msubadd_ps(__m128 a, __m128 b, __m128 c) {
168 // CHECK-LABEL: test_mm_msubadd_ps
169 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
170 // CHECK: call {{.*}}<4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
171 return _mm_msubadd_ps(a, b, c);
174 __m128d test_mm_msubadd_pd(__m128d a, __m128d b, __m128d c) {
175 // CHECK-LABEL: test_mm_msubadd_pd
176 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
177 // CHECK: call {{.*}}<2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
178 return _mm_msubadd_pd(a, b, c);
181 __m256 test_mm256_macc_ps(__m256 a, __m256 b, __m256 c) {
182 // CHECK-LABEL: test_mm256_macc_ps
183 // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
184 return _mm256_macc_ps(a, b, c);
187 __m256d test_mm256_macc_pd(__m256d a, __m256d b, __m256d c) {
188 // CHECK-LABEL: test_mm256_macc_pd
189 // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
190 return _mm256_macc_pd(a, b, c);
193 __m256 test_mm256_msub_ps(__m256 a, __m256 b, __m256 c) {
194 // CHECK-LABEL: test_mm256_msub_ps
195 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
196 // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
197 return _mm256_msub_ps(a, b, c);
200 __m256d test_mm256_msub_pd(__m256d a, __m256d b, __m256d c) {
201 // CHECK-LABEL: test_mm256_msub_pd
202 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
203 // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
204 return _mm256_msub_pd(a, b, c);
207 __m256 test_mm256_nmacc_ps(__m256 a, __m256 b, __m256 c) {
208 // CHECK-LABEL: test_mm256_nmacc_ps
209 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
210 // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
211 return _mm256_nmacc_ps(a, b, c);
214 __m256d test_mm256_nmacc_pd(__m256d a, __m256d b, __m256d c) {
215 // CHECK-LABEL: test_mm256_nmacc_pd
216 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
217 // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
218 return _mm256_nmacc_pd(a, b, c);
221 __m256 test_mm256_nmsub_ps(__m256 a, __m256 b, __m256 c) {
222 // CHECK-LABEL: test_mm256_nmsub_ps
223 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
224 // CHECK: [[NEG2:%.+]] = fneg <8 x float> %{{.*}}
225 // CHECK: call {{.*}}<8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
226 return _mm256_nmsub_ps(a, b, c);
229 __m256d test_mm256_nmsub_pd(__m256d a, __m256d b, __m256d c) {
230 // CHECK-LABEL: test_mm256_nmsub_pd
231 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
232 // CHECK: [[NEG2:%.+]] = fneg <4 x double> %{{.+}}
233 // CHECK: call {{.*}}<4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
234 return _mm256_nmsub_pd(a, b, c);
237 __m256 test_mm256_maddsub_ps(__m256 a, __m256 b, __m256 c) {
238 // CHECK-LABEL: test_mm256_maddsub_ps
239 // CHECK-NOT: fneg
240 // CHECK: call {{.*}}<8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
241 return _mm256_maddsub_ps(a, b, c);
244 __m256d test_mm256_maddsub_pd(__m256d a, __m256d b, __m256d c) {
245 // CHECK-LABEL: test_mm256_maddsub_pd
246 // CHECK-NOT: fneg
247 // CHECK: call {{.*}}<4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
248 return _mm256_maddsub_pd(a, b, c);
251 __m256 test_mm256_msubadd_ps(__m256 a, __m256 b, __m256 c) {
252 // CHECK-LABEL: test_mm256_msubadd_ps
253 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
254 // CHECK: call {{.*}}<8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
255 return _mm256_msubadd_ps(a, b, c);
258 __m256d test_mm256_msubadd_pd(__m256d a, __m256d b, __m256d c) {
259 // CHECK-LABEL: test_mm256_msubadd_pd
260 // CHECK: [[NEG:%.+]] = fneg <4 x double> {{.+}}
261 // CHECK: call {{.*}}<4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
262 return _mm256_msubadd_pd(a, b, c);