Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / X86 / fexcess-precision-bfloat16.c
blobceafa975d74b8cd171ef119b66d8abf769c85e91
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
2 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
3 // RUN: -fbfloat16-excess-precision=fast -emit-llvm -o - %s \
4 // RUN: | FileCheck -check-prefixes=CHECK-EXT %s
6 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
7 // RUN: -fbfloat16-excess-precision=fast -target-feature +fullbf16 \
8 // RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-NO-EXT %s
10 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
11 // RUN: -fbfloat16-excess-precision=standard -emit-llvm -o - %s \
12 // RUN: | FileCheck -check-prefixes=CHECK-EXT %s
14 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
15 // RUN: -fbfloat16-excess-precision=standard -target-feature +fullbf16 \
16 // RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-NO-EXT %s
18 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
19 // RUN: -fbfloat16-excess-precision=none -emit-llvm -o - %s \
20 // RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s
22 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
23 // RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \
24 // RUN: -emit-llvm -o - %s \
25 // RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s
27 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
28 // RUN: -fbfloat16-excess-precision=fast \
29 // RUN: -emit-llvm -ffp-eval-method=source -o - %s \
30 // RUN: | FileCheck -check-prefixes=CHECK-EXT %s
32 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
33 // RUN: -fbfloat16-excess-precision=fast -target-feature +fullbf16 \
34 // RUN: -emit-llvm -ffp-eval-method=source -o - %s \
35 // RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s
37 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
38 // RUN: -fbfloat16-excess-precision=standard \
39 // RUN: -emit-llvm -ffp-eval-method=source -o - %s \
40 // RUN: | FileCheck -check-prefixes=CHECK-EXT %s
42 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
43 // RUN: -fbfloat16-excess-precision=standard -target-feature +fullbf16 \
44 // RUN: -emit-llvm -ffp-eval-method=source -o - %s \
45 // RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s
47 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
48 // RUN: -fbfloat16-excess-precision=none \
49 // RUN: -emit-llvm -ffp-eval-method=source -o - %s \
50 // RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s
52 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
53 // RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \
54 // RUN: -emit-llvm -ffp-eval-method=source -o - %s \
55 // RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s
57 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
58 // RUN: -fbfloat16-excess-precision=fast \
59 // RUN: -emit-llvm -ffp-eval-method=double -o - %s \
60 // RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s
62 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
63 // RUN: -fbfloat16-excess-precision=fast -target-feature +fullbf16 \
64 // RUN: -emit-llvm -ffp-eval-method=double -o - %s \
65 // RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s
67 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
68 // RUN: -fbfloat16-excess-precision=standard \
69 // RUN: -emit-llvm -ffp-eval-method=double -o - %s \
70 // RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s
72 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
73 // RUN: -fbfloat16-excess-precision=standard -target-feature +fullbf16 \
74 // RUN: -emit-llvm -ffp-eval-method=double -o - %s \
75 // RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s
77 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
78 // RUN: -fbfloat16-excess-precision=none \
79 // RUN: -emit-llvm -ffp-eval-method=double -o - %s \
80 // RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s
82 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
83 // RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \
84 // RUN: -emit-llvm -ffp-eval-method=double -o - %s \
85 // RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s
87 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
88 // RUN: -fbfloat16-excess-precision=fast \
89 // RUN: -emit-llvm -ffp-eval-method=extended -o - %s \
90 // RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s
92 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
93 // RUN: -fbfloat16-excess-precision=fast -target-feature +fullbf16 \
94 // RUN: -emit-llvm -ffp-eval-method=extended -o - %s \
95 // RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s
97 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
98 // RUN: -fbfloat16-excess-precision=standard \
99 // RUN: -emit-llvm -ffp-eval-method=extended -o - %s \
100 // RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s
102 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
103 // RUN: -fbfloat16-excess-precision=standard -target-feature +fullbf16 \
104 // RUN: -emit-llvm -ffp-eval-method=extended -o - %s \
105 // RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s
107 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
108 // RUN: -fbfloat16-excess-precision=none \
109 // RUN: -emit-llvm -ffp-eval-method=extended -o - %s \
110 // RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s
112 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
113 // RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \
114 // RUN: -emit-llvm -ffp-eval-method=extended -o - %s \
115 // RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s
117 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
118 // RUN: -fbfloat16-excess-precision=none \
119 // RUN: -ffp-contract=on -emit-llvm -o - %s \
120 // RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s
122 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
123 // RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \
124 // RUN: -ffp-contract=on -emit-llvm -o - %s \
125 // RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s
127 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
128 // RUN: -fbfloat16-excess-precision=none \
129 // RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \
130 // RUN: -ffp-eval-method=source -emit-llvm -o - %s \
131 // RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s
133 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
134 // RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \
135 // RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \
136 // RUN: -ffp-eval-method=source -emit-llvm -o - %s \
137 // RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s
139 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
140 // RUN: -fbfloat16-excess-precision=none \
141 // RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \
142 // RUN: -ffp-eval-method=double -emit-llvm -o - %s \
143 // RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-DBL %s
145 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
146 // RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \
147 // RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \
148 // RUN: -ffp-eval-method=double -emit-llvm -o - %s \
149 // RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-DBL %s
151 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
152 // RUN: -fbfloat16-excess-precision=none \
153 // RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \
154 // RUN: -ffp-eval-method=extended -emit-llvm -o - %s \
155 // RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-EXT %s
157 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
158 // RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \
159 // RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \
160 // RUN: -ffp-eval-method=extended -emit-llvm -o - %s \
161 // RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-EXT %s
163 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
164 // RUN: -fbfloat16-excess-precision=none \
165 // RUN: -fapprox-func -fmath-errno -fno-signed-zeros -mreassociate \
166 // RUN: -freciprocal-math -ffp-contract=on -fno-rounding-math \
167 // RUN: -funsafe-math-optimizations -emit-llvm -o - %s \
168 // RUN: | FileCheck -check-prefixes=CHECK-UNSAFE %s
170 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
171 // RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \
172 // RUN: -fapprox-func -fmath-errno -fno-signed-zeros -mreassociate \
173 // RUN: -freciprocal-math -ffp-contract=on -fno-rounding-math \
174 // RUN: -funsafe-math-optimizations -emit-llvm -o - %s \
175 // RUN: | FileCheck -check-prefixes=CHECK-UNSAFE %s
177 // CHECK-EXT-LABEL: define dso_local bfloat @f
178 // CHECK-EXT-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
179 // CHECK-EXT-NEXT: entry:
180 // CHECK-EXT-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
181 // CHECK-EXT-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
182 // CHECK-EXT-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
183 // CHECK-EXT-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
184 // CHECK-EXT-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
185 // CHECK-EXT-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
186 // CHECK-EXT-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
187 // CHECK-EXT-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
188 // CHECK-EXT-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
189 // CHECK-EXT-NEXT: [[EXT:%.*]] = fpext bfloat [[TMP0]] to float
190 // CHECK-EXT-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
191 // CHECK-EXT-NEXT: [[EXT1:%.*]] = fpext bfloat [[TMP1]] to float
192 // CHECK-EXT-NEXT: [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]]
193 // CHECK-EXT-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
194 // CHECK-EXT-NEXT: [[EXT2:%.*]] = fpext bfloat [[TMP2]] to float
195 // CHECK-EXT-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
196 // CHECK-EXT-NEXT: [[EXT3:%.*]] = fpext bfloat [[TMP3]] to float
197 // CHECK-EXT-NEXT: [[MUL4:%.*]] = fmul float [[EXT2]], [[EXT3]]
198 // CHECK-EXT-NEXT: [[ADD:%.*]] = fadd float [[MUL]], [[MUL4]]
199 // CHECK-EXT-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to bfloat
200 // CHECK-EXT-NEXT: ret bfloat [[UNPROMOTION]]
202 // CHECK-NO-EXT-LABEL: define dso_local bfloat @f
203 // CHECK-NO-EXT-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
204 // CHECK-NO-EXT-NEXT: entry:
205 // CHECK-NO-EXT-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
206 // CHECK-NO-EXT-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
207 // CHECK-NO-EXT-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
208 // CHECK-NO-EXT-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
209 // CHECK-NO-EXT-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
210 // CHECK-NO-EXT-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
211 // CHECK-NO-EXT-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
212 // CHECK-NO-EXT-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
213 // CHECK-NO-EXT-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
214 // CHECK-NO-EXT-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
215 // CHECK-NO-EXT-NEXT: [[MUL:%.*]] = fmul bfloat [[TMP0]], [[TMP1]]
216 // CHECK-NO-EXT-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
217 // CHECK-NO-EXT-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
218 // CHECK-NO-EXT-NEXT: [[MUL1:%.*]] = fmul bfloat [[TMP2]], [[TMP3]]
219 // CHECK-NO-EXT-NEXT: [[ADD:%.*]] = fadd bfloat [[MUL]], [[MUL1]]
220 // CHECK-NO-EXT-NEXT: ret bfloat [[ADD]]
222 // CHECK-EXT-DBL-LABEL: define dso_local bfloat @f
223 // CHECK-EXT-DBL-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
224 // CHECK-EXT-DBL-NEXT: entry:
225 // CHECK-EXT-DBL-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
226 // CHECK-EXT-DBL-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
227 // CHECK-EXT-DBL-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
228 // CHECK-EXT-DBL-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
229 // CHECK-EXT-DBL-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
230 // CHECK-EXT-DBL-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
231 // CHECK-EXT-DBL-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
232 // CHECK-EXT-DBL-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
233 // CHECK-EXT-DBL-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
234 // CHECK-EXT-DBL-NEXT: [[CONV:%.*]] = fpext bfloat [[TMP0]] to double
235 // CHECK-EXT-DBL-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
236 // CHECK-EXT-DBL-NEXT: [[CONV1:%.*]] = fpext bfloat [[TMP1]] to double
237 // CHECK-EXT-DBL-NEXT: [[MUL:%.*]] = fmul double [[CONV]], [[CONV1]]
238 // CHECK-EXT-DBL-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
239 // CHECK-EXT-DBL-NEXT: [[CONV2:%.*]] = fpext bfloat [[TMP2]] to double
240 // CHECK-EXT-DBL-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
241 // CHECK-EXT-DBL-NEXT: [[CONV3:%.*]] = fpext bfloat [[TMP3]] to double
242 // CHECK-EXT-DBL-NEXT: [[MUL4:%.*]] = fmul double [[CONV2]], [[CONV3]]
243 // CHECK-EXT-DBL-NEXT: [[ADD:%.*]] = fadd double [[MUL]], [[MUL4]]
244 // CHECK-EXT-DBL-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD]] to bfloat
245 // CHECK-EXT-DBL-NEXT: ret bfloat [[CONV5]]
247 // CHECK-EXT-FP80-LABEL: define dso_local bfloat @f
248 // CHECK-EXT-FP80-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
249 // CHECK-EXT-FP80-NEXT: entry:
250 // CHECK-EXT-FP80-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
251 // CHECK-EXT-FP80-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
252 // CHECK-EXT-FP80-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
253 // CHECK-EXT-FP80-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
254 // CHECK-EXT-FP80-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
255 // CHECK-EXT-FP80-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
256 // CHECK-EXT-FP80-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
257 // CHECK-EXT-FP80-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
258 // CHECK-EXT-FP80-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
259 // CHECK-EXT-FP80-NEXT: [[CONV:%.*]] = fpext bfloat [[TMP0]] to x86_fp80
260 // CHECK-EXT-FP80-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
261 // CHECK-EXT-FP80-NEXT: [[CONV1:%.*]] = fpext bfloat [[TMP1]] to x86_fp80
262 // CHECK-EXT-FP80-NEXT: [[MUL:%.*]] = fmul x86_fp80 [[CONV]], [[CONV1]]
263 // CHECK-EXT-FP80-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
264 // CHECK-EXT-FP80-NEXT: [[CONV2:%.*]] = fpext bfloat [[TMP2]] to x86_fp80
265 // CHECK-EXT-FP80-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
266 // CHECK-EXT-FP80-NEXT: [[CONV3:%.*]] = fpext bfloat [[TMP3]] to x86_fp80
267 // CHECK-EXT-FP80-NEXT: [[MUL4:%.*]] = fmul x86_fp80 [[CONV2]], [[CONV3]]
268 // CHECK-EXT-FP80-NEXT: [[ADD:%.*]] = fadd x86_fp80 [[MUL]], [[MUL4]]
269 // CHECK-EXT-FP80-NEXT: [[CONV5:%.*]] = fptrunc x86_fp80 [[ADD]] to bfloat
270 // CHECK-EXT-FP80-NEXT: ret bfloat [[CONV5]]
272 // CHECK-CONTRACT-LABEL: define dso_local bfloat @f
273 // CHECK-CONTRACT-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
274 // CHECK-CONTRACT-NEXT: entry:
275 // CHECK-CONTRACT-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
276 // CHECK-CONTRACT-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
277 // CHECK-CONTRACT-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
278 // CHECK-CONTRACT-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
279 // CHECK-CONTRACT-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
280 // CHECK-CONTRACT-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
281 // CHECK-CONTRACT-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
282 // CHECK-CONTRACT-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
283 // CHECK-CONTRACT-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
284 // CHECK-CONTRACT-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
285 // CHECK-CONTRACT-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
286 // CHECK-CONTRACT-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
287 // CHECK-CONTRACT-NEXT: [[MUL1:%.*]] = fmul bfloat [[TMP2]], [[TMP3]]
288 // CHECK-CONTRACT-NEXT: [[TMP4:%.*]] = call bfloat @llvm.fmuladd.bf16(bfloat [[TMP0]], bfloat [[TMP1]], bfloat [[MUL1]])
289 // CHECK-CONTRACT-NEXT: ret bfloat [[TMP4]]
291 // CHECK-CONTRACT-DBL-LABEL: define dso_local bfloat @f
292 // CHECK-CONTRACT-DBL-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
293 // CHECK-CONTRACT-DBL-NEXT: entry:
294 // CHECK-CONTRACT-DBL-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
295 // CHECK-CONTRACT-DBL-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
296 // CHECK-CONTRACT-DBL-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
297 // CHECK-CONTRACT-DBL-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
298 // CHECK-CONTRACT-DBL-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
299 // CHECK-CONTRACT-DBL-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
300 // CHECK-CONTRACT-DBL-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
301 // CHECK-CONTRACT-DBL-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
302 // CHECK-CONTRACT-DBL-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
303 // CHECK-CONTRACT-DBL-NEXT: [[CONV:%.*]] = fpext bfloat [[TMP0]] to double
304 // CHECK-CONTRACT-DBL-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
305 // CHECK-CONTRACT-DBL-NEXT: [[CONV1:%.*]] = fpext bfloat [[TMP1]] to double
306 // CHECK-CONTRACT-DBL-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
307 // CHECK-CONTRACT-DBL-NEXT: [[CONV2:%.*]] = fpext bfloat [[TMP2]] to double
308 // CHECK-CONTRACT-DBL-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
309 // CHECK-CONTRACT-DBL-NEXT: [[CONV3:%.*]] = fpext bfloat [[TMP3]] to double
310 // CHECK-CONTRACT-DBL-NEXT: [[MUL4:%.*]] = fmul double [[CONV2]], [[CONV3]]
311 // CHECK-CONTRACT-DBL-NEXT: [[TMP4:%.*]] = call double @llvm.fmuladd.f64(double [[CONV]], double [[CONV1]], double [[MUL4]])
312 // CHECK-CONTRACT-DBL-NEXT: [[CONV5:%.*]] = fptrunc double [[TMP4]] to bfloat
313 // CHECK-CONTRACT-DBL-NEXT: ret bfloat [[CONV5]]
315 // CHECK-CONTRACT-EXT-LABEL: define dso_local bfloat @f
316 // CHECK-CONTRACT-EXT-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
317 // CHECK-CONTRACT-EXT-NEXT: entry:
318 // CHECK-CONTRACT-EXT-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
319 // CHECK-CONTRACT-EXT-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
320 // CHECK-CONTRACT-EXT-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
321 // CHECK-CONTRACT-EXT-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
322 // CHECK-CONTRACT-EXT-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
323 // CHECK-CONTRACT-EXT-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
324 // CHECK-CONTRACT-EXT-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
325 // CHECK-CONTRACT-EXT-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
326 // CHECK-CONTRACT-EXT-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
327 // CHECK-CONTRACT-EXT-NEXT: [[CONV:%.*]] = fpext bfloat [[TMP0]] to x86_fp80
328 // CHECK-CONTRACT-EXT-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
329 // CHECK-CONTRACT-EXT-NEXT: [[CONV1:%.*]] = fpext bfloat [[TMP1]] to x86_fp80
330 // CHECK-CONTRACT-EXT-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
331 // CHECK-CONTRACT-EXT-NEXT: [[CONV2:%.*]] = fpext bfloat [[TMP2]] to x86_fp80
332 // CHECK-CONTRACT-EXT-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
333 // CHECK-CONTRACT-EXT-NEXT: [[CONV3:%.*]] = fpext bfloat [[TMP3]] to x86_fp80
334 // CHECK-CONTRACT-EXT-NEXT: [[MUL4:%.*]] = fmul x86_fp80 [[CONV2]], [[CONV3]]
335 // CHECK-CONTRACT-EXT-NEXT: [[TMP4:%.*]] = call x86_fp80 @llvm.fmuladd.f80(x86_fp80 [[CONV]], x86_fp80 [[CONV1]], x86_fp80 [[MUL4]])
336 // CHECK-CONTRACT-EXT-NEXT: [[CONV5:%.*]] = fptrunc x86_fp80 [[TMP4]] to bfloat
337 // CHECK-CONTRACT-EXT-NEXT: ret bfloat [[CONV5]]
339 // CHECK-UNSAFE-LABEL: define dso_local bfloat @f
340 // CHECK-UNSAFE-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
341 // CHECK-UNSAFE-NEXT: entry:
342 // CHECK-UNSAFE-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
343 // CHECK-UNSAFE-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
344 // CHECK-UNSAFE-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
345 // CHECK-UNSAFE-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
346 // CHECK-UNSAFE-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
347 // CHECK-UNSAFE-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
348 // CHECK-UNSAFE-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
349 // CHECK-UNSAFE-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
350 // CHECK-UNSAFE-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
351 // CHECK-UNSAFE-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
352 // CHECK-UNSAFE-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
353 // CHECK-UNSAFE-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
354 // CHECK-UNSAFE-NEXT: [[MUL1:%.*]] = fmul reassoc nsz arcp afn bfloat [[TMP2]], [[TMP3]]
355 // CHECK-UNSAFE-NEXT: [[TMP4:%.*]] = call reassoc nsz arcp afn bfloat @llvm.fmuladd.bf16(bfloat [[TMP0]], bfloat [[TMP1]], bfloat [[MUL1]])
356 // CHECK-UNSAFE-NEXT: ret bfloat [[TMP4]]
358 __bf16 f(__bf16 a, __bf16 b, __bf16 c, __bf16 d) {
359 return a * b + c * d;