1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
2 // RUN: %clang_cc1 -triple riscv64 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK-RV64
3 // RUN: %clang_cc1 -triple riscv32 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK-RV32
9 // CHECK-RV64-LABEL: define dso_local i64 @h1
10 // CHECK-RV64-SAME: (bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
11 // CHECK-RV64-NEXT: entry:
12 // CHECK-RV64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT1:%.*]], align 2
13 // CHECK-RV64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
14 // CHECK-RV64-NEXT: [[COERCE_DIVE_COERCE:%.*]] = alloca i64, align 8
15 // CHECK-RV64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
16 // CHECK-RV64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
17 // CHECK-RV64-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT1]], ptr [[RETVAL]], i32 0, i32 0
18 // CHECK-RV64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
19 // CHECK-RV64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT1]], ptr [[RETVAL]], i32 0, i32 0
20 // CHECK-RV64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[COERCE_DIVE_COERCE]], ptr align 2 [[COERCE_DIVE]], i64 2, i1 false)
21 // CHECK-RV64-NEXT: [[TMP1:%.*]] = load i64, ptr [[COERCE_DIVE_COERCE]], align 8
22 // CHECK-RV64-NEXT: ret i64 [[TMP1]]
24 // CHECK-RV32-LABEL: define dso_local i32 @h1
25 // CHECK-RV32-SAME: (bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
26 // CHECK-RV32-NEXT: entry:
27 // CHECK-RV32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT1:%.*]], align 2
28 // CHECK-RV32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
29 // CHECK-RV32-NEXT: [[COERCE_DIVE_COERCE:%.*]] = alloca i32, align 4
30 // CHECK-RV32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
31 // CHECK-RV32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
32 // CHECK-RV32-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT1]], ptr [[RETVAL]], i32 0, i32 0
33 // CHECK-RV32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
34 // CHECK-RV32-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT1]], ptr [[RETVAL]], i32 0, i32 0
35 // CHECK-RV32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[COERCE_DIVE_COERCE]], ptr align 2 [[COERCE_DIVE]], i32 2, i1 false)
36 // CHECK-RV32-NEXT: [[TMP1:%.*]] = load i32, ptr [[COERCE_DIVE_COERCE]], align 4
37 // CHECK-RV32-NEXT: ret i32 [[TMP1]]
39 struct bfloat1
h1(__bf16 a
) {
50 // CHECK-RV64-LABEL: define dso_local i64 @h2
51 // CHECK-RV64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] {
52 // CHECK-RV64-NEXT: entry:
53 // CHECK-RV64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2:%.*]], align 2
54 // CHECK-RV64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
55 // CHECK-RV64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
56 // CHECK-RV64-NEXT: [[RETVAL_COERCE:%.*]] = alloca i64, align 8
57 // CHECK-RV64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
58 // CHECK-RV64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
59 // CHECK-RV64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
60 // CHECK-RV64-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 0
61 // CHECK-RV64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
62 // CHECK-RV64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
63 // CHECK-RV64-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 1
64 // CHECK-RV64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
65 // CHECK-RV64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i64 4, i1 false)
66 // CHECK-RV64-NEXT: [[TMP2:%.*]] = load i64, ptr [[RETVAL_COERCE]], align 8
67 // CHECK-RV64-NEXT: ret i64 [[TMP2]]
69 // CHECK-RV32-LABEL: define dso_local i32 @h2
70 // CHECK-RV32-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] {
71 // CHECK-RV32-NEXT: entry:
72 // CHECK-RV32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2:%.*]], align 2
73 // CHECK-RV32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
74 // CHECK-RV32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
75 // CHECK-RV32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
76 // CHECK-RV32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
77 // CHECK-RV32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
78 // CHECK-RV32-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 0
79 // CHECK-RV32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
80 // CHECK-RV32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
81 // CHECK-RV32-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 1
82 // CHECK-RV32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
83 // CHECK-RV32-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 2
84 // CHECK-RV32-NEXT: ret i32 [[TMP2]]
86 struct bfloat2
h2(__bf16 a
, __bf16 b
) {
99 // CHECK-RV64-LABEL: define dso_local i64 @h3
100 // CHECK-RV64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] {
101 // CHECK-RV64-NEXT: entry:
102 // CHECK-RV64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT3:%.*]], align 2
103 // CHECK-RV64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
104 // CHECK-RV64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
105 // CHECK-RV64-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
106 // CHECK-RV64-NEXT: [[RETVAL_COERCE:%.*]] = alloca i64, align 8
107 // CHECK-RV64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
108 // CHECK-RV64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
109 // CHECK-RV64-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
110 // CHECK-RV64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
111 // CHECK-RV64-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 0
112 // CHECK-RV64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
113 // CHECK-RV64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
114 // CHECK-RV64-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 1
115 // CHECK-RV64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
116 // CHECK-RV64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
117 // CHECK-RV64-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 2
118 // CHECK-RV64-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2
119 // CHECK-RV64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i64 6, i1 false)
120 // CHECK-RV64-NEXT: [[TMP3:%.*]] = load i64, ptr [[RETVAL_COERCE]], align 8
121 // CHECK-RV64-NEXT: ret i64 [[TMP3]]
123 // CHECK-RV32-LABEL: define dso_local [2 x i32] @h3
124 // CHECK-RV32-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] {
125 // CHECK-RV32-NEXT: entry:
126 // CHECK-RV32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT3:%.*]], align 2
127 // CHECK-RV32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
128 // CHECK-RV32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
129 // CHECK-RV32-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
130 // CHECK-RV32-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i32], align 4
131 // CHECK-RV32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
132 // CHECK-RV32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
133 // CHECK-RV32-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
134 // CHECK-RV32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
135 // CHECK-RV32-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 0
136 // CHECK-RV32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
137 // CHECK-RV32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
138 // CHECK-RV32-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 1
139 // CHECK-RV32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
140 // CHECK-RV32-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
141 // CHECK-RV32-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 2
142 // CHECK-RV32-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2
143 // CHECK-RV32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i32 6, i1 false)
144 // CHECK-RV32-NEXT: [[TMP3:%.*]] = load [2 x i32], ptr [[RETVAL_COERCE]], align 4
145 // CHECK-RV32-NEXT: ret [2 x i32] [[TMP3]]
147 struct bfloat3
h3(__bf16 a
, __bf16 b
, __bf16 c
) {
162 // CHECK-RV64-LABEL: define dso_local i64 @h4
163 // CHECK-RV64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] {
164 // CHECK-RV64-NEXT: entry:
165 // CHECK-RV64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT4:%.*]], align 2
166 // CHECK-RV64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
167 // CHECK-RV64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
168 // CHECK-RV64-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
169 // CHECK-RV64-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
170 // CHECK-RV64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
171 // CHECK-RV64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
172 // CHECK-RV64-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
173 // CHECK-RV64-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
174 // CHECK-RV64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
175 // CHECK-RV64-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 0
176 // CHECK-RV64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
177 // CHECK-RV64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
178 // CHECK-RV64-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 1
179 // CHECK-RV64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
180 // CHECK-RV64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
181 // CHECK-RV64-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 2
182 // CHECK-RV64-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2
183 // CHECK-RV64-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
184 // CHECK-RV64-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 3
185 // CHECK-RV64-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2
186 // CHECK-RV64-NEXT: [[TMP4:%.*]] = load i64, ptr [[RETVAL]], align 2
187 // CHECK-RV64-NEXT: ret i64 [[TMP4]]
189 // CHECK-RV32-LABEL: define dso_local [2 x i32] @h4
190 // CHECK-RV32-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] {
191 // CHECK-RV32-NEXT: entry:
192 // CHECK-RV32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT4:%.*]], align 2
193 // CHECK-RV32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
194 // CHECK-RV32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
195 // CHECK-RV32-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
196 // CHECK-RV32-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
197 // CHECK-RV32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
198 // CHECK-RV32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
199 // CHECK-RV32-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
200 // CHECK-RV32-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
201 // CHECK-RV32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
202 // CHECK-RV32-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 0
203 // CHECK-RV32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
204 // CHECK-RV32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
205 // CHECK-RV32-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 1
206 // CHECK-RV32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
207 // CHECK-RV32-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
208 // CHECK-RV32-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 2
209 // CHECK-RV32-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2
210 // CHECK-RV32-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
211 // CHECK-RV32-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 3
212 // CHECK-RV32-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2
213 // CHECK-RV32-NEXT: [[TMP4:%.*]] = load [2 x i32], ptr [[RETVAL]], align 2
214 // CHECK-RV32-NEXT: ret [2 x i32] [[TMP4]]
216 struct bfloat4
h4(__bf16 a
, __bf16 b
, __bf16 c
, __bf16 d
) {
230 // CHECK-RV64-LABEL: define dso_local i64 @fh
231 // CHECK-RV64-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] {
232 // CHECK-RV64-NEXT: entry:
233 // CHECK-RV64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT:%.*]], align 4
234 // CHECK-RV64-NEXT: [[A_ADDR:%.*]] = alloca float, align 4
235 // CHECK-RV64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
236 // CHECK-RV64-NEXT: store float [[A]], ptr [[A_ADDR]], align 4
237 // CHECK-RV64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
238 // CHECK-RV64-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
239 // CHECK-RV64-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 0
240 // CHECK-RV64-NEXT: store float [[TMP0]], ptr [[A1]], align 4
241 // CHECK-RV64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
242 // CHECK-RV64-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 1
243 // CHECK-RV64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4
244 // CHECK-RV64-NEXT: [[TMP2:%.*]] = load i64, ptr [[RETVAL]], align 4
245 // CHECK-RV64-NEXT: ret i64 [[TMP2]]
247 // CHECK-RV32-LABEL: define dso_local [2 x i32] @fh
248 // CHECK-RV32-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] {
249 // CHECK-RV32-NEXT: entry:
250 // CHECK-RV32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT:%.*]], align 4
251 // CHECK-RV32-NEXT: [[A_ADDR:%.*]] = alloca float, align 4
252 // CHECK-RV32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
253 // CHECK-RV32-NEXT: store float [[A]], ptr [[A_ADDR]], align 4
254 // CHECK-RV32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
255 // CHECK-RV32-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
256 // CHECK-RV32-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 0
257 // CHECK-RV32-NEXT: store float [[TMP0]], ptr [[A1]], align 4
258 // CHECK-RV32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
259 // CHECK-RV32-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 1
260 // CHECK-RV32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4
261 // CHECK-RV32-NEXT: [[TMP2:%.*]] = load [2 x i32], ptr [[RETVAL]], align 4
262 // CHECK-RV32-NEXT: ret [2 x i32] [[TMP2]]
264 struct floatbfloat
fh(float a
, __bf16 b
) {
265 struct floatbfloat x
;
271 struct floatbfloat2
{
277 // CHECK-RV64-LABEL: define dso_local i64 @fh2
278 // CHECK-RV64-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] {
279 // CHECK-RV64-NEXT: entry:
280 // CHECK-RV64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT2:%.*]], align 4
281 // CHECK-RV64-NEXT: [[A_ADDR:%.*]] = alloca float, align 4
282 // CHECK-RV64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
283 // CHECK-RV64-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
284 // CHECK-RV64-NEXT: store float [[A]], ptr [[A_ADDR]], align 4
285 // CHECK-RV64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
286 // CHECK-RV64-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
287 // CHECK-RV64-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
288 // CHECK-RV64-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 0
289 // CHECK-RV64-NEXT: store float [[TMP0]], ptr [[A1]], align 4
290 // CHECK-RV64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
291 // CHECK-RV64-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 1
292 // CHECK-RV64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4
293 // CHECK-RV64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
294 // CHECK-RV64-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 2
295 // CHECK-RV64-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2
296 // CHECK-RV64-NEXT: [[TMP3:%.*]] = load i64, ptr [[RETVAL]], align 4
297 // CHECK-RV64-NEXT: ret i64 [[TMP3]]
299 // CHECK-RV32-LABEL: define dso_local [2 x i32] @fh2
300 // CHECK-RV32-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] {
301 // CHECK-RV32-NEXT: entry:
302 // CHECK-RV32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT2:%.*]], align 4
303 // CHECK-RV32-NEXT: [[A_ADDR:%.*]] = alloca float, align 4
304 // CHECK-RV32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
305 // CHECK-RV32-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
306 // CHECK-RV32-NEXT: store float [[A]], ptr [[A_ADDR]], align 4
307 // CHECK-RV32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
308 // CHECK-RV32-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
309 // CHECK-RV32-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
310 // CHECK-RV32-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 0
311 // CHECK-RV32-NEXT: store float [[TMP0]], ptr [[A1]], align 4
312 // CHECK-RV32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
313 // CHECK-RV32-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 1
314 // CHECK-RV32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4
315 // CHECK-RV32-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
316 // CHECK-RV32-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 2
317 // CHECK-RV32-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2
318 // CHECK-RV32-NEXT: [[TMP3:%.*]] = load [2 x i32], ptr [[RETVAL]], align 4
319 // CHECK-RV32-NEXT: ret [2 x i32] [[TMP3]]
321 struct floatbfloat2
fh2(float a
, __bf16 b
, __bf16 c
) {
322 struct floatbfloat2 x
;
334 // CHECK-RV64-LABEL: define dso_local i64 @hf
335 // CHECK-RV64-SAME: (bfloat noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
336 // CHECK-RV64-NEXT: entry:
337 // CHECK-RV64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOATFLOAT:%.*]], align 4
338 // CHECK-RV64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
339 // CHECK-RV64-NEXT: [[B_ADDR:%.*]] = alloca float, align 4
340 // CHECK-RV64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
341 // CHECK-RV64-NEXT: store float [[B]], ptr [[B_ADDR]], align 4
342 // CHECK-RV64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
343 // CHECK-RV64-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOATFLOAT]], ptr [[RETVAL]], i32 0, i32 0
344 // CHECK-RV64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 4
345 // CHECK-RV64-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4
346 // CHECK-RV64-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOATFLOAT]], ptr [[RETVAL]], i32 0, i32 1
347 // CHECK-RV64-NEXT: store float [[TMP1]], ptr [[B2]], align 4
348 // CHECK-RV64-NEXT: [[TMP2:%.*]] = load i64, ptr [[RETVAL]], align 4
349 // CHECK-RV64-NEXT: ret i64 [[TMP2]]
351 // CHECK-RV32-LABEL: define dso_local [2 x i32] @hf
352 // CHECK-RV32-SAME: (bfloat noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
353 // CHECK-RV32-NEXT: entry:
354 // CHECK-RV32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOATFLOAT:%.*]], align 4
355 // CHECK-RV32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
356 // CHECK-RV32-NEXT: [[B_ADDR:%.*]] = alloca float, align 4
357 // CHECK-RV32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
358 // CHECK-RV32-NEXT: store float [[B]], ptr [[B_ADDR]], align 4
359 // CHECK-RV32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
360 // CHECK-RV32-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOATFLOAT]], ptr [[RETVAL]], i32 0, i32 0
361 // CHECK-RV32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 4
362 // CHECK-RV32-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR]], align 4
363 // CHECK-RV32-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOATFLOAT]], ptr [[RETVAL]], i32 0, i32 1
364 // CHECK-RV32-NEXT: store float [[TMP1]], ptr [[B2]], align 4
365 // CHECK-RV32-NEXT: [[TMP2:%.*]] = load [2 x i32], ptr [[RETVAL]], align 4
366 // CHECK-RV32-NEXT: ret [2 x i32] [[TMP2]]
368 struct bfloatfloat
hf(__bf16 a
, float b
) {
369 struct bfloatfloat x
;
375 struct bfloat2float
{
381 // CHECK-RV64-LABEL: define dso_local i64 @h2f
382 // CHECK-RV64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] {
383 // CHECK-RV64-NEXT: entry:
384 // CHECK-RV64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2FLOAT:%.*]], align 4
385 // CHECK-RV64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
386 // CHECK-RV64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
387 // CHECK-RV64-NEXT: [[C_ADDR:%.*]] = alloca float, align 4
388 // CHECK-RV64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
389 // CHECK-RV64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
390 // CHECK-RV64-NEXT: store float [[C]], ptr [[C_ADDR]], align 4
391 // CHECK-RV64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
392 // CHECK-RV64-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 0
393 // CHECK-RV64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 4
394 // CHECK-RV64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
395 // CHECK-RV64-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 1
396 // CHECK-RV64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
397 // CHECK-RV64-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR]], align 4
398 // CHECK-RV64-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 2
399 // CHECK-RV64-NEXT: store float [[TMP2]], ptr [[C3]], align 4
400 // CHECK-RV64-NEXT: [[TMP3:%.*]] = load i64, ptr [[RETVAL]], align 4
401 // CHECK-RV64-NEXT: ret i64 [[TMP3]]
403 // CHECK-RV32-LABEL: define dso_local [2 x i32] @h2f
404 // CHECK-RV32-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] {
405 // CHECK-RV32-NEXT: entry:
406 // CHECK-RV32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2FLOAT:%.*]], align 4
407 // CHECK-RV32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
408 // CHECK-RV32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
409 // CHECK-RV32-NEXT: [[C_ADDR:%.*]] = alloca float, align 4
410 // CHECK-RV32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
411 // CHECK-RV32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
412 // CHECK-RV32-NEXT: store float [[C]], ptr [[C_ADDR]], align 4
413 // CHECK-RV32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
414 // CHECK-RV32-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 0
415 // CHECK-RV32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 4
416 // CHECK-RV32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
417 // CHECK-RV32-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 1
418 // CHECK-RV32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
419 // CHECK-RV32-NEXT: [[TMP2:%.*]] = load float, ptr [[C_ADDR]], align 4
420 // CHECK-RV32-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT2FLOAT]], ptr [[RETVAL]], i32 0, i32 2
421 // CHECK-RV32-NEXT: store float [[TMP2]], ptr [[C3]], align 4
422 // CHECK-RV32-NEXT: [[TMP3:%.*]] = load [2 x i32], ptr [[RETVAL]], align 4
423 // CHECK-RV32-NEXT: ret [2 x i32] [[TMP3]]
425 struct bfloat2float
h2f(__bf16 a
, __bf16 b
, float c
) {
426 struct bfloat2float x
;
433 struct floatbfloat3
{
440 // CHECK-RV64-LABEL: define dso_local [2 x i64] @fh3
441 // CHECK-RV64-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] {
442 // CHECK-RV64-NEXT: entry:
443 // CHECK-RV64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT3:%.*]], align 4
444 // CHECK-RV64-NEXT: [[A_ADDR:%.*]] = alloca float, align 4
445 // CHECK-RV64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
446 // CHECK-RV64-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
447 // CHECK-RV64-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
448 // CHECK-RV64-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i64], align 8
449 // CHECK-RV64-NEXT: store float [[A]], ptr [[A_ADDR]], align 4
450 // CHECK-RV64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
451 // CHECK-RV64-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
452 // CHECK-RV64-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
453 // CHECK-RV64-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
454 // CHECK-RV64-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 0
455 // CHECK-RV64-NEXT: store float [[TMP0]], ptr [[A1]], align 4
456 // CHECK-RV64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
457 // CHECK-RV64-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 1
458 // CHECK-RV64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4
459 // CHECK-RV64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
460 // CHECK-RV64-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 2
461 // CHECK-RV64-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2
462 // CHECK-RV64-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
463 // CHECK-RV64-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[RETVAL]], i32 0, i32 3
464 // CHECK-RV64-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 4
465 // CHECK-RV64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 12, i1 false)
466 // CHECK-RV64-NEXT: [[TMP4:%.*]] = load [2 x i64], ptr [[RETVAL_COERCE]], align 8
467 // CHECK-RV64-NEXT: ret [2 x i64] [[TMP4]]
469 // CHECK-RV32-LABEL: define dso_local void @fh3
470 // CHECK-RV32-SAME: (ptr noalias sret([[STRUCT_FLOATBFLOAT3:%.*]]) align 4 [[AGG_RESULT:%.*]], float noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] {
471 // CHECK-RV32-NEXT: entry:
472 // CHECK-RV32-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4
473 // CHECK-RV32-NEXT: [[A_ADDR:%.*]] = alloca float, align 4
474 // CHECK-RV32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
475 // CHECK-RV32-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
476 // CHECK-RV32-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
477 // CHECK-RV32-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
478 // CHECK-RV32-NEXT: store float [[A]], ptr [[A_ADDR]], align 4
479 // CHECK-RV32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
480 // CHECK-RV32-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
481 // CHECK-RV32-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
482 // CHECK-RV32-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
483 // CHECK-RV32-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[AGG_RESULT]], i32 0, i32 0
484 // CHECK-RV32-NEXT: store float [[TMP0]], ptr [[A1]], align 4
485 // CHECK-RV32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
486 // CHECK-RV32-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[AGG_RESULT]], i32 0, i32 1
487 // CHECK-RV32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4
488 // CHECK-RV32-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
489 // CHECK-RV32-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[AGG_RESULT]], i32 0, i32 2
490 // CHECK-RV32-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2
491 // CHECK-RV32-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
492 // CHECK-RV32-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_FLOATBFLOAT3]], ptr [[AGG_RESULT]], i32 0, i32 3
493 // CHECK-RV32-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 4
494 // CHECK-RV32-NEXT: ret void
496 struct floatbfloat3
fh3(float a
, __bf16 b
, __bf16 c
, __bf16 d
) {
497 struct floatbfloat3 x
;
513 // CHECK-RV64-LABEL: define dso_local [2 x i64] @h5
514 // CHECK-RV64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]], bfloat noundef [[E:%.*]]) #[[ATTR0]] {
515 // CHECK-RV64-NEXT: entry:
516 // CHECK-RV64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT5:%.*]], align 2
517 // CHECK-RV64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
518 // CHECK-RV64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
519 // CHECK-RV64-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
520 // CHECK-RV64-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
521 // CHECK-RV64-NEXT: [[E_ADDR:%.*]] = alloca bfloat, align 2
522 // CHECK-RV64-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i64], align 8
523 // CHECK-RV64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
524 // CHECK-RV64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
525 // CHECK-RV64-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
526 // CHECK-RV64-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
527 // CHECK-RV64-NEXT: store bfloat [[E]], ptr [[E_ADDR]], align 2
528 // CHECK-RV64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
529 // CHECK-RV64-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[RETVAL]], i32 0, i32 0
530 // CHECK-RV64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
531 // CHECK-RV64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
532 // CHECK-RV64-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[RETVAL]], i32 0, i32 1
533 // CHECK-RV64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
534 // CHECK-RV64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
535 // CHECK-RV64-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[RETVAL]], i32 0, i32 2
536 // CHECK-RV64-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2
537 // CHECK-RV64-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
538 // CHECK-RV64-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[RETVAL]], i32 0, i32 3
539 // CHECK-RV64-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2
540 // CHECK-RV64-NEXT: [[TMP4:%.*]] = load bfloat, ptr [[E_ADDR]], align 2
541 // CHECK-RV64-NEXT: [[E5:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[RETVAL]], i32 0, i32 4
542 // CHECK-RV64-NEXT: store bfloat [[TMP4]], ptr [[E5]], align 2
543 // CHECK-RV64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i64 10, i1 false)
544 // CHECK-RV64-NEXT: [[TMP5:%.*]] = load [2 x i64], ptr [[RETVAL_COERCE]], align 8
545 // CHECK-RV64-NEXT: ret [2 x i64] [[TMP5]]
547 // CHECK-RV32-LABEL: define dso_local void @h5
548 // CHECK-RV32-SAME: (ptr noalias sret([[STRUCT_BFLOAT5:%.*]]) align 2 [[AGG_RESULT:%.*]], bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]], bfloat noundef [[E:%.*]]) #[[ATTR0]] {
549 // CHECK-RV32-NEXT: entry:
550 // CHECK-RV32-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4
551 // CHECK-RV32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2
552 // CHECK-RV32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2
553 // CHECK-RV32-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2
554 // CHECK-RV32-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2
555 // CHECK-RV32-NEXT: [[E_ADDR:%.*]] = alloca bfloat, align 2
556 // CHECK-RV32-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
557 // CHECK-RV32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2
558 // CHECK-RV32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2
559 // CHECK-RV32-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2
560 // CHECK-RV32-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2
561 // CHECK-RV32-NEXT: store bfloat [[E]], ptr [[E_ADDR]], align 2
562 // CHECK-RV32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
563 // CHECK-RV32-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 0
564 // CHECK-RV32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2
565 // CHECK-RV32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
566 // CHECK-RV32-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 1
567 // CHECK-RV32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2
568 // CHECK-RV32-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2
569 // CHECK-RV32-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 2
570 // CHECK-RV32-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2
571 // CHECK-RV32-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2
572 // CHECK-RV32-NEXT: [[D4:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 3
573 // CHECK-RV32-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2
574 // CHECK-RV32-NEXT: [[TMP4:%.*]] = load bfloat, ptr [[E_ADDR]], align 2
575 // CHECK-RV32-NEXT: [[E5:%.*]] = getelementptr inbounds [[STRUCT_BFLOAT5]], ptr [[AGG_RESULT]], i32 0, i32 4
576 // CHECK-RV32-NEXT: store bfloat [[TMP4]], ptr [[E5]], align 2
577 // CHECK-RV32-NEXT: ret void
579 struct bfloat5
h5(__bf16 a
, __bf16 b
, __bf16 c
, __bf16 d
, __bf16 e
) {