1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
3 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
4 ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
5 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
7 ; CHECK-GI: warning: Instruction selection used fallback path for stofp_i64_bf16
8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i64_bf16
9 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_i32_bf16
10 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i32_bf16
11 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_i16_bf16
12 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i16_bf16
13 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_i8_bf16
14 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i8_bf16
15 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i64_v2bf16
16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i64_v2bf16
17 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i64_v3bf16
18 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i64_v3bf16
19 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i64_v4bf16
20 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i64_v4bf16
21 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i64_v8bf16
22 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i64_v8bf16
23 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i64_v16bf16
24 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i64_v16bf16
25 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i64_v32bf16
26 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i64_v32bf16
27 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i32_v2bf16
28 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i32_v2bf16
29 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i32_v3bf16
30 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i32_v3bf16
31 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i32_v4bf16
32 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i32_v4bf16
33 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i32_v8bf16
34 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i32_v8bf16
35 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i32_v16bf16
36 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i32_v16bf16
37 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i32_v32bf16
38 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i32_v32bf16
39 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i16_v2bf16
40 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i16_v2bf16
41 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i16_v3bf16
42 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i16_v3bf16
43 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i16_v4bf16
44 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i16_v4bf16
45 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i16_v8bf16
46 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i16_v8bf16
47 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i16_v16bf16
48 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i16_v16bf16
49 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i16_v32bf16
50 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i16_v32bf16
51 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i8_v2bf16
52 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i8_v2bf16
53 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i8_v3bf16
54 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i8_v3bf16
55 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i8_v4bf16
56 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i8_v4bf16
57 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i8_v8bf16
58 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i8_v8bf16
59 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i8_v16bf16
60 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i8_v16bf16
61 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i8_v32bf16
62 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i8_v32bf16
64 define bfloat @stofp_i64_bf16(i64 %a) {
65 ; CHECK-LABEL: stofp_i64_bf16:
66 ; CHECK: // %bb.0: // %entry
67 ; CHECK-NEXT: cmp x0, #0
68 ; CHECK-NEXT: and x11, x0, #0x8000000000000000
69 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
70 ; CHECK-NEXT: cneg x9, x0, mi
71 ; CHECK-NEXT: lsr x10, x9, #53
72 ; CHECK-NEXT: cmp x10, #0
73 ; CHECK-NEXT: and x10, x9, #0xfffffffffffff000
74 ; CHECK-NEXT: csel x10, x10, x9, ne
75 ; CHECK-NEXT: scvtf d0, x10
76 ; CHECK-NEXT: cset w10, ne
77 ; CHECK-NEXT: tst x9, #0xfff
78 ; CHECK-NEXT: csel w10, wzr, w10, eq
79 ; CHECK-NEXT: fmov x9, d0
80 ; CHECK-NEXT: orr x9, x9, x11
81 ; CHECK-NEXT: orr x9, x9, x10
82 ; CHECK-NEXT: fmov d0, x9
83 ; CHECK-NEXT: fcvtxn s0, d0
84 ; CHECK-NEXT: fmov w9, s0
85 ; CHECK-NEXT: ubfx w10, w9, #16, #1
86 ; CHECK-NEXT: add w8, w9, w8
87 ; CHECK-NEXT: add w8, w10, w8
88 ; CHECK-NEXT: lsr w8, w8, #16
89 ; CHECK-NEXT: fmov s0, w8
90 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
93 %c = sitofp i64 %a to bfloat
97 define bfloat @utofp_i64_bf16(i64 %a) {
98 ; CHECK-LABEL: utofp_i64_bf16:
99 ; CHECK: // %bb.0: // %entry
100 ; CHECK-NEXT: lsr x9, x0, #53
101 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
102 ; CHECK-NEXT: cmp x9, #0
103 ; CHECK-NEXT: and x9, x0, #0xfffffffffffff000
104 ; CHECK-NEXT: csel x9, x9, x0, ne
105 ; CHECK-NEXT: ucvtf d0, x9
106 ; CHECK-NEXT: cset w9, ne
107 ; CHECK-NEXT: tst x0, #0xfff
108 ; CHECK-NEXT: csel w9, wzr, w9, eq
109 ; CHECK-NEXT: fmov x10, d0
110 ; CHECK-NEXT: orr x9, x10, x9
111 ; CHECK-NEXT: fmov d0, x9
112 ; CHECK-NEXT: fcvtxn s0, d0
113 ; CHECK-NEXT: fmov w9, s0
114 ; CHECK-NEXT: ubfx w10, w9, #16, #1
115 ; CHECK-NEXT: add w8, w9, w8
116 ; CHECK-NEXT: add w8, w10, w8
117 ; CHECK-NEXT: lsr w8, w8, #16
118 ; CHECK-NEXT: fmov s0, w8
119 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
122 %c = uitofp i64 %a to bfloat
126 define bfloat @stofp_i32_bf16(i32 %a) {
127 ; CHECK-LABEL: stofp_i32_bf16:
128 ; CHECK: // %bb.0: // %entry
129 ; CHECK-NEXT: scvtf d0, w0
130 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
131 ; CHECK-NEXT: fcvtxn s0, d0
132 ; CHECK-NEXT: fmov w9, s0
133 ; CHECK-NEXT: ubfx w10, w9, #16, #1
134 ; CHECK-NEXT: add w8, w9, w8
135 ; CHECK-NEXT: add w8, w10, w8
136 ; CHECK-NEXT: lsr w8, w8, #16
137 ; CHECK-NEXT: fmov s0, w8
138 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
141 %c = sitofp i32 %a to bfloat
145 define bfloat @utofp_i32_bf16(i32 %a) {
146 ; CHECK-LABEL: utofp_i32_bf16:
147 ; CHECK: // %bb.0: // %entry
148 ; CHECK-NEXT: ucvtf d0, w0
149 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
150 ; CHECK-NEXT: fcvtxn s0, d0
151 ; CHECK-NEXT: fmov w9, s0
152 ; CHECK-NEXT: ubfx w10, w9, #16, #1
153 ; CHECK-NEXT: add w8, w9, w8
154 ; CHECK-NEXT: add w8, w10, w8
155 ; CHECK-NEXT: lsr w8, w8, #16
156 ; CHECK-NEXT: fmov s0, w8
157 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
160 %c = uitofp i32 %a to bfloat
164 define bfloat @stofp_i16_bf16(i16 %a) {
165 ; CHECK-LABEL: stofp_i16_bf16:
166 ; CHECK: // %bb.0: // %entry
167 ; CHECK-NEXT: sxth w9, w0
168 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
169 ; CHECK-NEXT: scvtf s0, w9
170 ; CHECK-NEXT: fmov w9, s0
171 ; CHECK-NEXT: ubfx w10, w9, #16, #1
172 ; CHECK-NEXT: add w8, w9, w8
173 ; CHECK-NEXT: add w8, w10, w8
174 ; CHECK-NEXT: lsr w8, w8, #16
175 ; CHECK-NEXT: fmov s0, w8
176 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
179 %c = sitofp i16 %a to bfloat
183 define bfloat @utofp_i16_bf16(i16 %a) {
184 ; CHECK-LABEL: utofp_i16_bf16:
185 ; CHECK: // %bb.0: // %entry
186 ; CHECK-NEXT: and w9, w0, #0xffff
187 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
188 ; CHECK-NEXT: ucvtf s0, w9
189 ; CHECK-NEXT: fmov w9, s0
190 ; CHECK-NEXT: ubfx w10, w9, #16, #1
191 ; CHECK-NEXT: add w8, w9, w8
192 ; CHECK-NEXT: add w8, w10, w8
193 ; CHECK-NEXT: lsr w8, w8, #16
194 ; CHECK-NEXT: fmov s0, w8
195 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
198 %c = uitofp i16 %a to bfloat
202 define bfloat @stofp_i8_bf16(i8 %a) {
203 ; CHECK-LABEL: stofp_i8_bf16:
204 ; CHECK: // %bb.0: // %entry
205 ; CHECK-NEXT: sxtb w9, w0
206 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
207 ; CHECK-NEXT: scvtf s0, w9
208 ; CHECK-NEXT: fmov w9, s0
209 ; CHECK-NEXT: ubfx w10, w9, #16, #1
210 ; CHECK-NEXT: add w8, w9, w8
211 ; CHECK-NEXT: add w8, w10, w8
212 ; CHECK-NEXT: lsr w8, w8, #16
213 ; CHECK-NEXT: fmov s0, w8
214 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
217 %c = sitofp i8 %a to bfloat
221 define bfloat @utofp_i8_bf16(i8 %a) {
222 ; CHECK-LABEL: utofp_i8_bf16:
223 ; CHECK: // %bb.0: // %entry
224 ; CHECK-NEXT: and w9, w0, #0xff
225 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
226 ; CHECK-NEXT: ucvtf s0, w9
227 ; CHECK-NEXT: fmov w9, s0
228 ; CHECK-NEXT: ubfx w10, w9, #16, #1
229 ; CHECK-NEXT: add w8, w9, w8
230 ; CHECK-NEXT: add w8, w10, w8
231 ; CHECK-NEXT: lsr w8, w8, #16
232 ; CHECK-NEXT: fmov s0, w8
233 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
236 %c = uitofp i8 %a to bfloat
240 define <2 x bfloat> @stofp_v2i64_v2bf16(<2 x i64> %a) {
241 ; CHECK-LABEL: stofp_v2i64_v2bf16:
242 ; CHECK: // %bb.0: // %entry
243 ; CHECK-NEXT: mov x9, v0.d[1]
244 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
245 ; CHECK-NEXT: cmp x9, #0
246 ; CHECK-NEXT: cneg x10, x9, mi
247 ; CHECK-NEXT: and x9, x9, #0x8000000000000000
248 ; CHECK-NEXT: lsr x11, x10, #53
249 ; CHECK-NEXT: and x12, x10, #0xfffffffffffff000
250 ; CHECK-NEXT: cmp x11, #0
251 ; CHECK-NEXT: csel x11, x12, x10, ne
252 ; CHECK-NEXT: cset w12, ne
253 ; CHECK-NEXT: tst x10, #0xfff
254 ; CHECK-NEXT: fmov x10, d0
255 ; CHECK-NEXT: csel w12, wzr, w12, eq
256 ; CHECK-NEXT: scvtf d0, x11
257 ; CHECK-NEXT: cmp x10, #0
258 ; CHECK-NEXT: cneg x13, x10, mi
259 ; CHECK-NEXT: and x10, x10, #0x8000000000000000
260 ; CHECK-NEXT: lsr x14, x13, #53
261 ; CHECK-NEXT: cmp x14, #0
262 ; CHECK-NEXT: and x14, x13, #0xfffffffffffff000
263 ; CHECK-NEXT: csel x11, x14, x13, ne
264 ; CHECK-NEXT: cset w14, ne
265 ; CHECK-NEXT: tst x13, #0xfff
266 ; CHECK-NEXT: scvtf d1, x11
267 ; CHECK-NEXT: fmov x11, d0
268 ; CHECK-NEXT: orr x9, x11, x9
269 ; CHECK-NEXT: csel w11, wzr, w14, eq
270 ; CHECK-NEXT: fmov x13, d1
271 ; CHECK-NEXT: orr x9, x9, x12
272 ; CHECK-NEXT: fmov d0, x9
273 ; CHECK-NEXT: orr x10, x13, x10
274 ; CHECK-NEXT: orr x10, x10, x11
275 ; CHECK-NEXT: fcvtxn s0, d0
276 ; CHECK-NEXT: fmov d1, x10
277 ; CHECK-NEXT: fcvtxn s1, d1
278 ; CHECK-NEXT: fmov w9, s0
279 ; CHECK-NEXT: ubfx w11, w9, #16, #1
280 ; CHECK-NEXT: add w9, w9, w8
281 ; CHECK-NEXT: fmov w10, s1
282 ; CHECK-NEXT: add w9, w11, w9
283 ; CHECK-NEXT: lsr w9, w9, #16
284 ; CHECK-NEXT: ubfx w12, w10, #16, #1
285 ; CHECK-NEXT: add w8, w10, w8
286 ; CHECK-NEXT: fmov s1, w9
287 ; CHECK-NEXT: add w8, w12, w8
288 ; CHECK-NEXT: lsr w8, w8, #16
289 ; CHECK-NEXT: fmov s0, w8
290 ; CHECK-NEXT: mov v0.h[1], v1.h[0]
291 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
294 %c = sitofp <2 x i64> %a to <2 x bfloat>
298 define <2 x bfloat> @utofp_v2i64_v2bf16(<2 x i64> %a) {
299 ; CHECK-LABEL: utofp_v2i64_v2bf16:
300 ; CHECK: // %bb.0: // %entry
301 ; CHECK-NEXT: mov x9, v0.d[1]
302 ; CHECK-NEXT: fmov x11, d0
303 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
304 ; CHECK-NEXT: lsr x10, x9, #53
305 ; CHECK-NEXT: and x12, x9, #0xfffffffffffff000
306 ; CHECK-NEXT: cmp x10, #0
307 ; CHECK-NEXT: lsr x10, x11, #53
308 ; CHECK-NEXT: csel x12, x12, x9, ne
309 ; CHECK-NEXT: cset w13, ne
310 ; CHECK-NEXT: tst x9, #0xfff
311 ; CHECK-NEXT: csel w9, wzr, w13, eq
312 ; CHECK-NEXT: cmp x10, #0
313 ; CHECK-NEXT: and x10, x11, #0xfffffffffffff000
314 ; CHECK-NEXT: csel x10, x10, x11, ne
315 ; CHECK-NEXT: ucvtf d0, x12
316 ; CHECK-NEXT: ucvtf d1, x10
317 ; CHECK-NEXT: cset w10, ne
318 ; CHECK-NEXT: tst x11, #0xfff
319 ; CHECK-NEXT: csel w10, wzr, w10, eq
320 ; CHECK-NEXT: fmov x11, d0
321 ; CHECK-NEXT: fmov x12, d1
322 ; CHECK-NEXT: orr x9, x11, x9
323 ; CHECK-NEXT: orr x10, x12, x10
324 ; CHECK-NEXT: fmov d0, x9
325 ; CHECK-NEXT: fmov d1, x10
326 ; CHECK-NEXT: fcvtxn s0, d0
327 ; CHECK-NEXT: fcvtxn s1, d1
328 ; CHECK-NEXT: fmov w9, s0
329 ; CHECK-NEXT: fmov w10, s1
330 ; CHECK-NEXT: ubfx w11, w9, #16, #1
331 ; CHECK-NEXT: add w9, w9, w8
332 ; CHECK-NEXT: ubfx w12, w10, #16, #1
333 ; CHECK-NEXT: add w8, w10, w8
334 ; CHECK-NEXT: add w9, w11, w9
335 ; CHECK-NEXT: add w8, w12, w8
336 ; CHECK-NEXT: lsr w9, w9, #16
337 ; CHECK-NEXT: lsr w8, w8, #16
338 ; CHECK-NEXT: fmov s1, w9
339 ; CHECK-NEXT: fmov s0, w8
340 ; CHECK-NEXT: mov v0.h[1], v1.h[0]
341 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
344 %c = uitofp <2 x i64> %a to <2 x bfloat>
348 define <3 x bfloat> @stofp_v3i64_v3bf16(<3 x i64> %a) {
349 ; CHECK-LABEL: stofp_v3i64_v3bf16:
350 ; CHECK: // %bb.0: // %entry
351 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
352 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
353 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
354 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
355 ; CHECK-NEXT: scvtf v1.2d, v2.2d
356 ; CHECK-NEXT: movi v2.4s, #127, msl #8
357 ; CHECK-NEXT: scvtf v0.2d, v0.2d
358 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
359 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
360 ; CHECK-NEXT: movi v1.4s, #1
361 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
362 ; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
363 ; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
364 ; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
365 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
366 ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
367 ; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
368 ; CHECK-NEXT: shrn v0.4h, v0.4s, #16
371 %c = sitofp <3 x i64> %a to <3 x bfloat>
375 define <3 x bfloat> @utofp_v3i64_v3bf16(<3 x i64> %a) {
376 ; CHECK-LABEL: utofp_v3i64_v3bf16:
377 ; CHECK: // %bb.0: // %entry
378 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
379 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
380 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
381 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
382 ; CHECK-NEXT: ucvtf v1.2d, v2.2d
383 ; CHECK-NEXT: movi v2.4s, #127, msl #8
384 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
385 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
386 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
387 ; CHECK-NEXT: movi v1.4s, #1
388 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
389 ; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
390 ; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
391 ; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
392 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
393 ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
394 ; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
395 ; CHECK-NEXT: shrn v0.4h, v0.4s, #16
398 %c = uitofp <3 x i64> %a to <3 x bfloat>
402 define <4 x bfloat> @stofp_v4i64_v4bf16(<4 x i64> %a) {
403 ; CHECK-LABEL: stofp_v4i64_v4bf16:
404 ; CHECK: // %bb.0: // %entry
405 ; CHECK-NEXT: scvtf v0.2d, v0.2d
406 ; CHECK-NEXT: scvtf v1.2d, v1.2d
407 ; CHECK-NEXT: movi v2.4s, #127, msl #8
408 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
409 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
410 ; CHECK-NEXT: movi v1.4s, #1
411 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
412 ; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
413 ; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
414 ; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
415 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
416 ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
417 ; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
418 ; CHECK-NEXT: shrn v0.4h, v0.4s, #16
421 %c = sitofp <4 x i64> %a to <4 x bfloat>
425 define <4 x bfloat> @utofp_v4i64_v4bf16(<4 x i64> %a) {
426 ; CHECK-LABEL: utofp_v4i64_v4bf16:
427 ; CHECK: // %bb.0: // %entry
428 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
429 ; CHECK-NEXT: ucvtf v1.2d, v1.2d
430 ; CHECK-NEXT: movi v2.4s, #127, msl #8
431 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
432 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
433 ; CHECK-NEXT: movi v1.4s, #1
434 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
435 ; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
436 ; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
437 ; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
438 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
439 ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
440 ; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
441 ; CHECK-NEXT: shrn v0.4h, v0.4s, #16
444 %c = uitofp <4 x i64> %a to <4 x bfloat>
448 define <8 x bfloat> @stofp_v8i64_v8bf16(<8 x i64> %a) {
449 ; CHECK-LABEL: stofp_v8i64_v8bf16:
450 ; CHECK: // %bb.0: // %entry
451 ; CHECK-NEXT: scvtf v2.2d, v2.2d
452 ; CHECK-NEXT: scvtf v0.2d, v0.2d
453 ; CHECK-NEXT: scvtf v3.2d, v3.2d
454 ; CHECK-NEXT: scvtf v1.2d, v1.2d
455 ; CHECK-NEXT: fcvtn v2.2s, v2.2d
456 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
457 ; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
458 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
459 ; CHECK-NEXT: movi v1.4s, #1
460 ; CHECK-NEXT: movi v3.4s, #127, msl #8
461 ; CHECK-NEXT: ushr v4.4s, v2.4s, #16
462 ; CHECK-NEXT: ushr v5.4s, v0.4s, #16
463 ; CHECK-NEXT: add v6.4s, v2.4s, v3.4s
464 ; CHECK-NEXT: add v3.4s, v0.4s, v3.4s
465 ; CHECK-NEXT: and v4.16b, v4.16b, v1.16b
466 ; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
467 ; CHECK-NEXT: fcmeq v5.4s, v2.4s, v2.4s
468 ; CHECK-NEXT: orr v2.4s, #64, lsl #16
469 ; CHECK-NEXT: add v4.4s, v4.4s, v6.4s
470 ; CHECK-NEXT: fcmeq v6.4s, v0.4s, v0.4s
471 ; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
472 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
473 ; CHECK-NEXT: bit v2.16b, v4.16b, v5.16b
474 ; CHECK-NEXT: bit v0.16b, v1.16b, v6.16b
475 ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
478 %c = sitofp <8 x i64> %a to <8 x bfloat>
482 define <8 x bfloat> @utofp_v8i64_v8bf16(<8 x i64> %a) {
483 ; CHECK-LABEL: utofp_v8i64_v8bf16:
484 ; CHECK: // %bb.0: // %entry
485 ; CHECK-NEXT: ucvtf v2.2d, v2.2d
486 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
487 ; CHECK-NEXT: ucvtf v3.2d, v3.2d
488 ; CHECK-NEXT: ucvtf v1.2d, v1.2d
489 ; CHECK-NEXT: fcvtn v2.2s, v2.2d
490 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
491 ; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
492 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
493 ; CHECK-NEXT: movi v1.4s, #1
494 ; CHECK-NEXT: movi v3.4s, #127, msl #8
495 ; CHECK-NEXT: ushr v4.4s, v2.4s, #16
496 ; CHECK-NEXT: ushr v5.4s, v0.4s, #16
497 ; CHECK-NEXT: add v6.4s, v2.4s, v3.4s
498 ; CHECK-NEXT: add v3.4s, v0.4s, v3.4s
499 ; CHECK-NEXT: and v4.16b, v4.16b, v1.16b
500 ; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
501 ; CHECK-NEXT: fcmeq v5.4s, v2.4s, v2.4s
502 ; CHECK-NEXT: orr v2.4s, #64, lsl #16
503 ; CHECK-NEXT: add v4.4s, v4.4s, v6.4s
504 ; CHECK-NEXT: fcmeq v6.4s, v0.4s, v0.4s
505 ; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
506 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
507 ; CHECK-NEXT: bit v2.16b, v4.16b, v5.16b
508 ; CHECK-NEXT: bit v0.16b, v1.16b, v6.16b
509 ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
512 %c = uitofp <8 x i64> %a to <8 x bfloat>
516 define <16 x bfloat> @stofp_v16i64_v16bf16(<16 x i64> %a) {
517 ; CHECK-LABEL: stofp_v16i64_v16bf16:
518 ; CHECK: // %bb.0: // %entry
519 ; CHECK-NEXT: scvtf v0.2d, v0.2d
520 ; CHECK-NEXT: scvtf v2.2d, v2.2d
521 ; CHECK-NEXT: scvtf v6.2d, v6.2d
522 ; CHECK-NEXT: scvtf v4.2d, v4.2d
523 ; CHECK-NEXT: scvtf v1.2d, v1.2d
524 ; CHECK-NEXT: scvtf v3.2d, v3.2d
525 ; CHECK-NEXT: scvtf v7.2d, v7.2d
526 ; CHECK-NEXT: scvtf v5.2d, v5.2d
527 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
528 ; CHECK-NEXT: fcvtn v2.2s, v2.2d
529 ; CHECK-NEXT: fcvtn v6.2s, v6.2d
530 ; CHECK-NEXT: fcvtn v4.2s, v4.2d
531 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
532 ; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
533 ; CHECK-NEXT: fcvtn2 v6.4s, v7.2d
534 ; CHECK-NEXT: fcvtn2 v4.4s, v5.2d
535 ; CHECK-NEXT: movi v1.4s, #1
536 ; CHECK-NEXT: movi v3.4s, #127, msl #8
537 ; CHECK-NEXT: ushr v7.4s, v0.4s, #16
538 ; CHECK-NEXT: ushr v5.4s, v2.4s, #16
539 ; CHECK-NEXT: ushr v16.4s, v6.4s, #16
540 ; CHECK-NEXT: ushr v17.4s, v4.4s, #16
541 ; CHECK-NEXT: add v19.4s, v0.4s, v3.4s
542 ; CHECK-NEXT: add v18.4s, v2.4s, v3.4s
543 ; CHECK-NEXT: add v20.4s, v6.4s, v3.4s
544 ; CHECK-NEXT: add v3.4s, v4.4s, v3.4s
545 ; CHECK-NEXT: and v7.16b, v7.16b, v1.16b
546 ; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
547 ; CHECK-NEXT: and v16.16b, v16.16b, v1.16b
548 ; CHECK-NEXT: and v1.16b, v17.16b, v1.16b
549 ; CHECK-NEXT: fcmeq v17.4s, v2.4s, v2.4s
550 ; CHECK-NEXT: orr v2.4s, #64, lsl #16
551 ; CHECK-NEXT: add v7.4s, v7.4s, v19.4s
552 ; CHECK-NEXT: fcmeq v19.4s, v6.4s, v6.4s
553 ; CHECK-NEXT: add v5.4s, v5.4s, v18.4s
554 ; CHECK-NEXT: fcmeq v18.4s, v0.4s, v0.4s
555 ; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
556 ; CHECK-NEXT: fcmeq v3.4s, v4.4s, v4.4s
557 ; CHECK-NEXT: add v16.4s, v16.4s, v20.4s
558 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
559 ; CHECK-NEXT: orr v6.4s, #64, lsl #16
560 ; CHECK-NEXT: orr v4.4s, #64, lsl #16
561 ; CHECK-NEXT: bit v2.16b, v5.16b, v17.16b
562 ; CHECK-NEXT: mov v5.16b, v19.16b
563 ; CHECK-NEXT: bit v0.16b, v7.16b, v18.16b
564 ; CHECK-NEXT: bif v1.16b, v4.16b, v3.16b
565 ; CHECK-NEXT: bsl v5.16b, v16.16b, v6.16b
566 ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
567 ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v5.8h
570 %c = sitofp <16 x i64> %a to <16 x bfloat>
574 define <16 x bfloat> @utofp_v16i64_v16bf16(<16 x i64> %a) {
575 ; CHECK-LABEL: utofp_v16i64_v16bf16:
576 ; CHECK: // %bb.0: // %entry
577 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
578 ; CHECK-NEXT: ucvtf v2.2d, v2.2d
579 ; CHECK-NEXT: ucvtf v6.2d, v6.2d
580 ; CHECK-NEXT: ucvtf v4.2d, v4.2d
581 ; CHECK-NEXT: ucvtf v1.2d, v1.2d
582 ; CHECK-NEXT: ucvtf v3.2d, v3.2d
583 ; CHECK-NEXT: ucvtf v7.2d, v7.2d
584 ; CHECK-NEXT: ucvtf v5.2d, v5.2d
585 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
586 ; CHECK-NEXT: fcvtn v2.2s, v2.2d
587 ; CHECK-NEXT: fcvtn v6.2s, v6.2d
588 ; CHECK-NEXT: fcvtn v4.2s, v4.2d
589 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
590 ; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
591 ; CHECK-NEXT: fcvtn2 v6.4s, v7.2d
592 ; CHECK-NEXT: fcvtn2 v4.4s, v5.2d
593 ; CHECK-NEXT: movi v1.4s, #1
594 ; CHECK-NEXT: movi v3.4s, #127, msl #8
595 ; CHECK-NEXT: ushr v7.4s, v0.4s, #16
596 ; CHECK-NEXT: ushr v5.4s, v2.4s, #16
597 ; CHECK-NEXT: ushr v16.4s, v6.4s, #16
598 ; CHECK-NEXT: ushr v17.4s, v4.4s, #16
599 ; CHECK-NEXT: add v19.4s, v0.4s, v3.4s
600 ; CHECK-NEXT: add v18.4s, v2.4s, v3.4s
601 ; CHECK-NEXT: add v20.4s, v6.4s, v3.4s
602 ; CHECK-NEXT: add v3.4s, v4.4s, v3.4s
603 ; CHECK-NEXT: and v7.16b, v7.16b, v1.16b
604 ; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
605 ; CHECK-NEXT: and v16.16b, v16.16b, v1.16b
606 ; CHECK-NEXT: and v1.16b, v17.16b, v1.16b
607 ; CHECK-NEXT: fcmeq v17.4s, v2.4s, v2.4s
608 ; CHECK-NEXT: orr v2.4s, #64, lsl #16
609 ; CHECK-NEXT: add v7.4s, v7.4s, v19.4s
610 ; CHECK-NEXT: fcmeq v19.4s, v6.4s, v6.4s
611 ; CHECK-NEXT: add v5.4s, v5.4s, v18.4s
612 ; CHECK-NEXT: fcmeq v18.4s, v0.4s, v0.4s
613 ; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
614 ; CHECK-NEXT: fcmeq v3.4s, v4.4s, v4.4s
615 ; CHECK-NEXT: add v16.4s, v16.4s, v20.4s
616 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
617 ; CHECK-NEXT: orr v6.4s, #64, lsl #16
618 ; CHECK-NEXT: orr v4.4s, #64, lsl #16
619 ; CHECK-NEXT: bit v2.16b, v5.16b, v17.16b
620 ; CHECK-NEXT: mov v5.16b, v19.16b
621 ; CHECK-NEXT: bit v0.16b, v7.16b, v18.16b
622 ; CHECK-NEXT: bif v1.16b, v4.16b, v3.16b
623 ; CHECK-NEXT: bsl v5.16b, v16.16b, v6.16b
624 ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
625 ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v5.8h
628 %c = uitofp <16 x i64> %a to <16 x bfloat>
632 define <32 x bfloat> @stofp_v32i64_v32bf16(<32 x i64> %a) {
633 ; CHECK-LABEL: stofp_v32i64_v32bf16:
634 ; CHECK: // %bb.0: // %entry
635 ; CHECK-NEXT: scvtf v17.2d, v2.2d
636 ; CHECK-NEXT: scvtf v18.2d, v0.2d
637 ; CHECK-NEXT: scvtf v19.2d, v3.2d
638 ; CHECK-NEXT: scvtf v3.2d, v6.2d
639 ; CHECK-NEXT: ldp q21, q20, [sp, #32]
640 ; CHECK-NEXT: scvtf v4.2d, v4.2d
641 ; CHECK-NEXT: scvtf v6.2d, v7.2d
642 ; CHECK-NEXT: scvtf v5.2d, v5.2d
643 ; CHECK-NEXT: ldp q24, q23, [sp, #64]
644 ; CHECK-NEXT: movi v16.4s, #1
645 ; CHECK-NEXT: fcvtn v0.2s, v17.2d
646 ; CHECK-NEXT: scvtf v17.2d, v1.2d
647 ; CHECK-NEXT: fcvtn v1.2s, v18.2d
648 ; CHECK-NEXT: fcvtn v3.2s, v3.2d
649 ; CHECK-NEXT: ldp q18, q7, [sp]
650 ; CHECK-NEXT: scvtf v21.2d, v21.2d
651 ; CHECK-NEXT: fcvtn v4.2s, v4.2d
652 ; CHECK-NEXT: movi v2.4s, #127, msl #8
653 ; CHECK-NEXT: scvtf v20.2d, v20.2d
654 ; CHECK-NEXT: fcvtn2 v0.4s, v19.2d
655 ; CHECK-NEXT: ldp q22, q19, [sp, #96]
656 ; CHECK-NEXT: fcvtn2 v1.4s, v17.2d
657 ; CHECK-NEXT: fcvtn2 v3.4s, v6.2d
658 ; CHECK-NEXT: scvtf v18.2d, v18.2d
659 ; CHECK-NEXT: scvtf v17.2d, v24.2d
660 ; CHECK-NEXT: fcvtn v6.2s, v21.2d
661 ; CHECK-NEXT: fcvtn2 v4.4s, v5.2d
662 ; CHECK-NEXT: scvtf v22.2d, v22.2d
663 ; CHECK-NEXT: scvtf v21.2d, v23.2d
664 ; CHECK-NEXT: scvtf v7.2d, v7.2d
665 ; CHECK-NEXT: ushr v24.4s, v0.4s, #16
666 ; CHECK-NEXT: add v5.4s, v0.4s, v2.4s
667 ; CHECK-NEXT: scvtf v19.2d, v19.2d
668 ; CHECK-NEXT: ushr v23.4s, v1.4s, #16
669 ; CHECK-NEXT: ushr v25.4s, v3.4s, #16
670 ; CHECK-NEXT: fcvtn v18.2s, v18.2d
671 ; CHECK-NEXT: fcvtn2 v6.4s, v20.2d
672 ; CHECK-NEXT: add v26.4s, v1.4s, v2.4s
673 ; CHECK-NEXT: fcvtn v17.2s, v17.2d
674 ; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
675 ; CHECK-NEXT: fcvtn v22.2s, v22.2d
676 ; CHECK-NEXT: fcmeq v20.4s, v0.4s, v0.4s
677 ; CHECK-NEXT: and v23.16b, v23.16b, v16.16b
678 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
679 ; CHECK-NEXT: fcmeq v27.4s, v3.4s, v3.4s
680 ; CHECK-NEXT: fcvtn2 v18.4s, v7.2d
681 ; CHECK-NEXT: add v7.4s, v3.4s, v2.4s
682 ; CHECK-NEXT: orr v3.4s, #64, lsl #16
683 ; CHECK-NEXT: add v5.4s, v24.4s, v5.4s
684 ; CHECK-NEXT: and v24.16b, v25.16b, v16.16b
685 ; CHECK-NEXT: ushr v25.4s, v4.4s, #16
686 ; CHECK-NEXT: fcvtn2 v22.4s, v19.2d
687 ; CHECK-NEXT: add v19.4s, v23.4s, v26.4s
688 ; CHECK-NEXT: ushr v26.4s, v6.4s, #16
689 ; CHECK-NEXT: fcvtn2 v17.4s, v21.2d
690 ; CHECK-NEXT: fcmeq v21.4s, v1.4s, v1.4s
691 ; CHECK-NEXT: orr v1.4s, #64, lsl #16
692 ; CHECK-NEXT: and v23.16b, v25.16b, v16.16b
693 ; CHECK-NEXT: add v25.4s, v4.4s, v2.4s
694 ; CHECK-NEXT: add v7.4s, v24.4s, v7.4s
695 ; CHECK-NEXT: ushr v24.4s, v18.4s, #16
696 ; CHECK-NEXT: add v30.4s, v18.4s, v2.4s
697 ; CHECK-NEXT: bit v0.16b, v5.16b, v20.16b
698 ; CHECK-NEXT: ushr v28.4s, v22.4s, #16
699 ; CHECK-NEXT: add v31.4s, v22.4s, v2.4s
700 ; CHECK-NEXT: add v23.4s, v23.4s, v25.4s
701 ; CHECK-NEXT: and v25.16b, v26.16b, v16.16b
702 ; CHECK-NEXT: add v26.4s, v6.4s, v2.4s
703 ; CHECK-NEXT: ushr v29.4s, v17.4s, #16
704 ; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
705 ; CHECK-NEXT: add v2.4s, v17.4s, v2.4s
706 ; CHECK-NEXT: and v28.16b, v28.16b, v16.16b
707 ; CHECK-NEXT: bit v3.16b, v7.16b, v27.16b
708 ; CHECK-NEXT: bit v1.16b, v19.16b, v21.16b
709 ; CHECK-NEXT: add v25.4s, v25.4s, v26.4s
710 ; CHECK-NEXT: fcmeq v26.4s, v6.4s, v6.4s
711 ; CHECK-NEXT: orr v6.4s, #64, lsl #16
712 ; CHECK-NEXT: and v16.16b, v29.16b, v16.16b
713 ; CHECK-NEXT: add v24.4s, v24.4s, v30.4s
714 ; CHECK-NEXT: fcmeq v30.4s, v18.4s, v18.4s
715 ; CHECK-NEXT: add v28.4s, v28.4s, v31.4s
716 ; CHECK-NEXT: fcmeq v31.4s, v22.4s, v22.4s
717 ; CHECK-NEXT: fcmeq v29.4s, v4.4s, v4.4s
718 ; CHECK-NEXT: orr v4.4s, #64, lsl #16
719 ; CHECK-NEXT: orr v18.4s, #64, lsl #16
720 ; CHECK-NEXT: orr v22.4s, #64, lsl #16
721 ; CHECK-NEXT: mov v5.16b, v26.16b
722 ; CHECK-NEXT: add v2.4s, v16.4s, v2.4s
723 ; CHECK-NEXT: fcmeq v16.4s, v17.4s, v17.4s
724 ; CHECK-NEXT: orr v17.4s, #64, lsl #16
725 ; CHECK-NEXT: uzp2 v0.8h, v1.8h, v0.8h
726 ; CHECK-NEXT: mov v7.16b, v31.16b
727 ; CHECK-NEXT: bit v4.16b, v23.16b, v29.16b
728 ; CHECK-NEXT: bsl v5.16b, v25.16b, v6.16b
729 ; CHECK-NEXT: mov v6.16b, v30.16b
730 ; CHECK-NEXT: bsl v16.16b, v2.16b, v17.16b
731 ; CHECK-NEXT: bsl v7.16b, v28.16b, v22.16b
732 ; CHECK-NEXT: bsl v6.16b, v24.16b, v18.16b
733 ; CHECK-NEXT: uzp2 v1.8h, v4.8h, v3.8h
734 ; CHECK-NEXT: uzp2 v3.8h, v16.8h, v7.8h
735 ; CHECK-NEXT: uzp2 v2.8h, v6.8h, v5.8h
738 %c = sitofp <32 x i64> %a to <32 x bfloat>
742 define <32 x bfloat> @utofp_v32i64_v32bf16(<32 x i64> %a) {
743 ; CHECK-LABEL: utofp_v32i64_v32bf16:
744 ; CHECK: // %bb.0: // %entry
745 ; CHECK-NEXT: ucvtf v17.2d, v2.2d
746 ; CHECK-NEXT: ucvtf v18.2d, v0.2d
747 ; CHECK-NEXT: ucvtf v19.2d, v3.2d
748 ; CHECK-NEXT: ucvtf v3.2d, v6.2d
749 ; CHECK-NEXT: ldp q21, q20, [sp, #32]
750 ; CHECK-NEXT: ucvtf v4.2d, v4.2d
751 ; CHECK-NEXT: ucvtf v6.2d, v7.2d
752 ; CHECK-NEXT: ucvtf v5.2d, v5.2d
753 ; CHECK-NEXT: ldp q24, q23, [sp, #64]
754 ; CHECK-NEXT: movi v16.4s, #1
755 ; CHECK-NEXT: fcvtn v0.2s, v17.2d
756 ; CHECK-NEXT: ucvtf v17.2d, v1.2d
757 ; CHECK-NEXT: fcvtn v1.2s, v18.2d
758 ; CHECK-NEXT: fcvtn v3.2s, v3.2d
759 ; CHECK-NEXT: ldp q18, q7, [sp]
760 ; CHECK-NEXT: ucvtf v21.2d, v21.2d
761 ; CHECK-NEXT: fcvtn v4.2s, v4.2d
762 ; CHECK-NEXT: movi v2.4s, #127, msl #8
763 ; CHECK-NEXT: ucvtf v20.2d, v20.2d
764 ; CHECK-NEXT: fcvtn2 v0.4s, v19.2d
765 ; CHECK-NEXT: ldp q22, q19, [sp, #96]
766 ; CHECK-NEXT: fcvtn2 v1.4s, v17.2d
767 ; CHECK-NEXT: fcvtn2 v3.4s, v6.2d
768 ; CHECK-NEXT: ucvtf v18.2d, v18.2d
769 ; CHECK-NEXT: ucvtf v17.2d, v24.2d
770 ; CHECK-NEXT: fcvtn v6.2s, v21.2d
771 ; CHECK-NEXT: fcvtn2 v4.4s, v5.2d
772 ; CHECK-NEXT: ucvtf v22.2d, v22.2d
773 ; CHECK-NEXT: ucvtf v21.2d, v23.2d
774 ; CHECK-NEXT: ucvtf v7.2d, v7.2d
775 ; CHECK-NEXT: ushr v24.4s, v0.4s, #16
776 ; CHECK-NEXT: add v5.4s, v0.4s, v2.4s
777 ; CHECK-NEXT: ucvtf v19.2d, v19.2d
778 ; CHECK-NEXT: ushr v23.4s, v1.4s, #16
779 ; CHECK-NEXT: ushr v25.4s, v3.4s, #16
780 ; CHECK-NEXT: fcvtn v18.2s, v18.2d
781 ; CHECK-NEXT: fcvtn2 v6.4s, v20.2d
782 ; CHECK-NEXT: add v26.4s, v1.4s, v2.4s
783 ; CHECK-NEXT: fcvtn v17.2s, v17.2d
784 ; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
785 ; CHECK-NEXT: fcvtn v22.2s, v22.2d
786 ; CHECK-NEXT: fcmeq v20.4s, v0.4s, v0.4s
787 ; CHECK-NEXT: and v23.16b, v23.16b, v16.16b
788 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
789 ; CHECK-NEXT: fcmeq v27.4s, v3.4s, v3.4s
790 ; CHECK-NEXT: fcvtn2 v18.4s, v7.2d
791 ; CHECK-NEXT: add v7.4s, v3.4s, v2.4s
792 ; CHECK-NEXT: orr v3.4s, #64, lsl #16
793 ; CHECK-NEXT: add v5.4s, v24.4s, v5.4s
794 ; CHECK-NEXT: and v24.16b, v25.16b, v16.16b
795 ; CHECK-NEXT: ushr v25.4s, v4.4s, #16
796 ; CHECK-NEXT: fcvtn2 v22.4s, v19.2d
797 ; CHECK-NEXT: add v19.4s, v23.4s, v26.4s
798 ; CHECK-NEXT: ushr v26.4s, v6.4s, #16
799 ; CHECK-NEXT: fcvtn2 v17.4s, v21.2d
800 ; CHECK-NEXT: fcmeq v21.4s, v1.4s, v1.4s
801 ; CHECK-NEXT: orr v1.4s, #64, lsl #16
802 ; CHECK-NEXT: and v23.16b, v25.16b, v16.16b
803 ; CHECK-NEXT: add v25.4s, v4.4s, v2.4s
804 ; CHECK-NEXT: add v7.4s, v24.4s, v7.4s
805 ; CHECK-NEXT: ushr v24.4s, v18.4s, #16
806 ; CHECK-NEXT: add v30.4s, v18.4s, v2.4s
807 ; CHECK-NEXT: bit v0.16b, v5.16b, v20.16b
808 ; CHECK-NEXT: ushr v28.4s, v22.4s, #16
809 ; CHECK-NEXT: add v31.4s, v22.4s, v2.4s
810 ; CHECK-NEXT: add v23.4s, v23.4s, v25.4s
811 ; CHECK-NEXT: and v25.16b, v26.16b, v16.16b
812 ; CHECK-NEXT: add v26.4s, v6.4s, v2.4s
813 ; CHECK-NEXT: ushr v29.4s, v17.4s, #16
814 ; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
815 ; CHECK-NEXT: add v2.4s, v17.4s, v2.4s
816 ; CHECK-NEXT: and v28.16b, v28.16b, v16.16b
817 ; CHECK-NEXT: bit v3.16b, v7.16b, v27.16b
818 ; CHECK-NEXT: bit v1.16b, v19.16b, v21.16b
819 ; CHECK-NEXT: add v25.4s, v25.4s, v26.4s
820 ; CHECK-NEXT: fcmeq v26.4s, v6.4s, v6.4s
821 ; CHECK-NEXT: orr v6.4s, #64, lsl #16
822 ; CHECK-NEXT: and v16.16b, v29.16b, v16.16b
823 ; CHECK-NEXT: add v24.4s, v24.4s, v30.4s
824 ; CHECK-NEXT: fcmeq v30.4s, v18.4s, v18.4s
825 ; CHECK-NEXT: add v28.4s, v28.4s, v31.4s
826 ; CHECK-NEXT: fcmeq v31.4s, v22.4s, v22.4s
827 ; CHECK-NEXT: fcmeq v29.4s, v4.4s, v4.4s
828 ; CHECK-NEXT: orr v4.4s, #64, lsl #16
829 ; CHECK-NEXT: orr v18.4s, #64, lsl #16
830 ; CHECK-NEXT: orr v22.4s, #64, lsl #16
831 ; CHECK-NEXT: mov v5.16b, v26.16b
832 ; CHECK-NEXT: add v2.4s, v16.4s, v2.4s
833 ; CHECK-NEXT: fcmeq v16.4s, v17.4s, v17.4s
834 ; CHECK-NEXT: orr v17.4s, #64, lsl #16
835 ; CHECK-NEXT: uzp2 v0.8h, v1.8h, v0.8h
836 ; CHECK-NEXT: mov v7.16b, v31.16b
837 ; CHECK-NEXT: bit v4.16b, v23.16b, v29.16b
838 ; CHECK-NEXT: bsl v5.16b, v25.16b, v6.16b
839 ; CHECK-NEXT: mov v6.16b, v30.16b
840 ; CHECK-NEXT: bsl v16.16b, v2.16b, v17.16b
841 ; CHECK-NEXT: bsl v7.16b, v28.16b, v22.16b
842 ; CHECK-NEXT: bsl v6.16b, v24.16b, v18.16b
843 ; CHECK-NEXT: uzp2 v1.8h, v4.8h, v3.8h
844 ; CHECK-NEXT: uzp2 v3.8h, v16.8h, v7.8h
845 ; CHECK-NEXT: uzp2 v2.8h, v6.8h, v5.8h
848 %c = uitofp <32 x i64> %a to <32 x bfloat>
852 define <2 x bfloat> @stofp_v2i32_v2bf16(<2 x i32> %a) {
853 ; CHECK-LABEL: stofp_v2i32_v2bf16:
854 ; CHECK: // %bb.0: // %entry
855 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
856 ; CHECK-NEXT: movi v1.4s, #1
857 ; CHECK-NEXT: scvtf v0.4s, v0.4s
858 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
859 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
860 ; CHECK-NEXT: movi v2.4s, #127, msl #8
861 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
862 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
865 %c = sitofp <2 x i32> %a to <2 x bfloat>
869 define <2 x bfloat> @utofp_v2i32_v2bf16(<2 x i32> %a) {
870 ; CHECK-LABEL: utofp_v2i32_v2bf16:
871 ; CHECK: // %bb.0: // %entry
872 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
873 ; CHECK-NEXT: movi v1.4s, #1
874 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
875 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
876 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
877 ; CHECK-NEXT: movi v2.4s, #127, msl #8
878 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
879 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
882 %c = uitofp <2 x i32> %a to <2 x bfloat>
886 define <3 x bfloat> @stofp_v3i32_v3bf16(<3 x i32> %a) {
887 ; CHECK-LABEL: stofp_v3i32_v3bf16:
888 ; CHECK: // %bb.0: // %entry
889 ; CHECK-NEXT: scvtf v0.4s, v0.4s
890 ; CHECK-NEXT: movi v1.4s, #1
891 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
892 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
893 ; CHECK-NEXT: movi v2.4s, #127, msl #8
894 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
895 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
898 %c = sitofp <3 x i32> %a to <3 x bfloat>
902 define <3 x bfloat> @utofp_v3i32_v3bf16(<3 x i32> %a) {
903 ; CHECK-LABEL: utofp_v3i32_v3bf16:
904 ; CHECK: // %bb.0: // %entry
905 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
906 ; CHECK-NEXT: movi v1.4s, #1
907 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
908 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
909 ; CHECK-NEXT: movi v2.4s, #127, msl #8
910 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
911 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
914 %c = uitofp <3 x i32> %a to <3 x bfloat>
918 define <4 x bfloat> @stofp_v4i32_v4bf16(<4 x i32> %a) {
919 ; CHECK-LABEL: stofp_v4i32_v4bf16:
920 ; CHECK: // %bb.0: // %entry
921 ; CHECK-NEXT: scvtf v0.4s, v0.4s
922 ; CHECK-NEXT: movi v1.4s, #1
923 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
924 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
925 ; CHECK-NEXT: movi v2.4s, #127, msl #8
926 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
927 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
930 %c = sitofp <4 x i32> %a to <4 x bfloat>
934 define <4 x bfloat> @utofp_v4i32_v4bf16(<4 x i32> %a) {
935 ; CHECK-LABEL: utofp_v4i32_v4bf16:
936 ; CHECK: // %bb.0: // %entry
937 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
938 ; CHECK-NEXT: movi v1.4s, #1
939 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
940 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
941 ; CHECK-NEXT: movi v2.4s, #127, msl #8
942 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
943 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
946 %c = uitofp <4 x i32> %a to <4 x bfloat>
950 define <8 x bfloat> @stofp_v8i32_v8bf16(<8 x i32> %a) {
951 ; CHECK-LABEL: stofp_v8i32_v8bf16:
952 ; CHECK: // %bb.0: // %entry
953 ; CHECK-NEXT: scvtf v0.4s, v0.4s
954 ; CHECK-NEXT: movi v2.4s, #1
955 ; CHECK-NEXT: scvtf v1.4s, v1.4s
956 ; CHECK-NEXT: movi v5.4s, #127, msl #8
957 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
958 ; CHECK-NEXT: ushr v4.4s, v1.4s, #16
959 ; CHECK-NEXT: and v3.16b, v3.16b, v2.16b
960 ; CHECK-NEXT: and v2.16b, v4.16b, v2.16b
961 ; CHECK-NEXT: add v0.4s, v3.4s, v0.4s
962 ; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
963 ; CHECK-NEXT: addhn v0.4h, v0.4s, v5.4s
964 ; CHECK-NEXT: addhn2 v0.8h, v1.4s, v5.4s
967 %c = sitofp <8 x i32> %a to <8 x bfloat>
971 define <8 x bfloat> @utofp_v8i32_v8bf16(<8 x i32> %a) {
972 ; CHECK-LABEL: utofp_v8i32_v8bf16:
973 ; CHECK: // %bb.0: // %entry
974 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
975 ; CHECK-NEXT: movi v2.4s, #1
976 ; CHECK-NEXT: ucvtf v1.4s, v1.4s
977 ; CHECK-NEXT: movi v5.4s, #127, msl #8
978 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
979 ; CHECK-NEXT: ushr v4.4s, v1.4s, #16
980 ; CHECK-NEXT: and v3.16b, v3.16b, v2.16b
981 ; CHECK-NEXT: and v2.16b, v4.16b, v2.16b
982 ; CHECK-NEXT: add v0.4s, v3.4s, v0.4s
983 ; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
984 ; CHECK-NEXT: addhn v0.4h, v0.4s, v5.4s
985 ; CHECK-NEXT: addhn2 v0.8h, v1.4s, v5.4s
988 %c = uitofp <8 x i32> %a to <8 x bfloat>
992 define <16 x bfloat> @stofp_v16i32_v16bf16(<16 x i32> %a) {
993 ; CHECK-LABEL: stofp_v16i32_v16bf16:
994 ; CHECK: // %bb.0: // %entry
995 ; CHECK-NEXT: scvtf v2.4s, v2.4s
996 ; CHECK-NEXT: scvtf v0.4s, v0.4s
997 ; CHECK-NEXT: scvtf v4.4s, v1.4s
998 ; CHECK-NEXT: movi v1.4s, #1
999 ; CHECK-NEXT: scvtf v3.4s, v3.4s
1000 ; CHECK-NEXT: movi v17.4s, #127, msl #8
1001 ; CHECK-NEXT: ushr v5.4s, v0.4s, #16
1002 ; CHECK-NEXT: ushr v6.4s, v2.4s, #16
1003 ; CHECK-NEXT: ushr v7.4s, v4.4s, #16
1004 ; CHECK-NEXT: ushr v16.4s, v3.4s, #16
1005 ; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
1006 ; CHECK-NEXT: and v6.16b, v6.16b, v1.16b
1007 ; CHECK-NEXT: add v0.4s, v5.4s, v0.4s
1008 ; CHECK-NEXT: add v2.4s, v6.4s, v2.4s
1009 ; CHECK-NEXT: and v5.16b, v7.16b, v1.16b
1010 ; CHECK-NEXT: and v6.16b, v16.16b, v1.16b
1011 ; CHECK-NEXT: addhn v0.4h, v0.4s, v17.4s
1012 ; CHECK-NEXT: addhn v1.4h, v2.4s, v17.4s
1013 ; CHECK-NEXT: add v2.4s, v5.4s, v4.4s
1014 ; CHECK-NEXT: add v3.4s, v6.4s, v3.4s
1015 ; CHECK-NEXT: addhn2 v0.8h, v2.4s, v17.4s
1016 ; CHECK-NEXT: addhn2 v1.8h, v3.4s, v17.4s
1019 %c = sitofp <16 x i32> %a to <16 x bfloat>
1020 ret <16 x bfloat> %c
1023 define <16 x bfloat> @utofp_v16i32_v16bf16(<16 x i32> %a) {
1024 ; CHECK-LABEL: utofp_v16i32_v16bf16:
1025 ; CHECK: // %bb.0: // %entry
1026 ; CHECK-NEXT: ucvtf v2.4s, v2.4s
1027 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
1028 ; CHECK-NEXT: ucvtf v4.4s, v1.4s
1029 ; CHECK-NEXT: movi v1.4s, #1
1030 ; CHECK-NEXT: ucvtf v3.4s, v3.4s
1031 ; CHECK-NEXT: movi v17.4s, #127, msl #8
1032 ; CHECK-NEXT: ushr v5.4s, v0.4s, #16
1033 ; CHECK-NEXT: ushr v6.4s, v2.4s, #16
1034 ; CHECK-NEXT: ushr v7.4s, v4.4s, #16
1035 ; CHECK-NEXT: ushr v16.4s, v3.4s, #16
1036 ; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
1037 ; CHECK-NEXT: and v6.16b, v6.16b, v1.16b
1038 ; CHECK-NEXT: add v0.4s, v5.4s, v0.4s
1039 ; CHECK-NEXT: add v2.4s, v6.4s, v2.4s
1040 ; CHECK-NEXT: and v5.16b, v7.16b, v1.16b
1041 ; CHECK-NEXT: and v6.16b, v16.16b, v1.16b
1042 ; CHECK-NEXT: addhn v0.4h, v0.4s, v17.4s
1043 ; CHECK-NEXT: addhn v1.4h, v2.4s, v17.4s
1044 ; CHECK-NEXT: add v2.4s, v5.4s, v4.4s
1045 ; CHECK-NEXT: add v3.4s, v6.4s, v3.4s
1046 ; CHECK-NEXT: addhn2 v0.8h, v2.4s, v17.4s
1047 ; CHECK-NEXT: addhn2 v1.8h, v3.4s, v17.4s
1050 %c = uitofp <16 x i32> %a to <16 x bfloat>
1051 ret <16 x bfloat> %c
1054 define <32 x bfloat> @stofp_v32i32_v32bf16(<32 x i32> %a) {
1055 ; CHECK-LABEL: stofp_v32i32_v32bf16:
1056 ; CHECK: // %bb.0: // %entry
1057 ; CHECK-NEXT: scvtf v0.4s, v0.4s
1058 ; CHECK-NEXT: scvtf v2.4s, v2.4s
1059 ; CHECK-NEXT: scvtf v4.4s, v4.4s
1060 ; CHECK-NEXT: scvtf v6.4s, v6.4s
1061 ; CHECK-NEXT: movi v16.4s, #1
1062 ; CHECK-NEXT: scvtf v1.4s, v1.4s
1063 ; CHECK-NEXT: scvtf v17.4s, v3.4s
1064 ; CHECK-NEXT: scvtf v5.4s, v5.4s
1065 ; CHECK-NEXT: scvtf v7.4s, v7.4s
1066 ; CHECK-NEXT: movi v21.4s, #127, msl #8
1067 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
1068 ; CHECK-NEXT: ushr v18.4s, v2.4s, #16
1069 ; CHECK-NEXT: ushr v19.4s, v4.4s, #16
1070 ; CHECK-NEXT: ushr v20.4s, v6.4s, #16
1071 ; CHECK-NEXT: ushr v22.4s, v1.4s, #16
1072 ; CHECK-NEXT: ushr v23.4s, v17.4s, #16
1073 ; CHECK-NEXT: ushr v24.4s, v5.4s, #16
1074 ; CHECK-NEXT: ushr v25.4s, v7.4s, #16
1075 ; CHECK-NEXT: and v3.16b, v3.16b, v16.16b
1076 ; CHECK-NEXT: and v18.16b, v18.16b, v16.16b
1077 ; CHECK-NEXT: and v19.16b, v19.16b, v16.16b
1078 ; CHECK-NEXT: and v20.16b, v20.16b, v16.16b
1079 ; CHECK-NEXT: add v0.4s, v3.4s, v0.4s
1080 ; CHECK-NEXT: and v3.16b, v22.16b, v16.16b
1081 ; CHECK-NEXT: add v2.4s, v18.4s, v2.4s
1082 ; CHECK-NEXT: add v4.4s, v19.4s, v4.4s
1083 ; CHECK-NEXT: add v6.4s, v20.4s, v6.4s
1084 ; CHECK-NEXT: and v18.16b, v23.16b, v16.16b
1085 ; CHECK-NEXT: and v19.16b, v24.16b, v16.16b
1086 ; CHECK-NEXT: and v16.16b, v25.16b, v16.16b
1087 ; CHECK-NEXT: add v20.4s, v3.4s, v1.4s
1088 ; CHECK-NEXT: addhn v0.4h, v0.4s, v21.4s
1089 ; CHECK-NEXT: addhn v1.4h, v2.4s, v21.4s
1090 ; CHECK-NEXT: addhn v2.4h, v4.4s, v21.4s
1091 ; CHECK-NEXT: addhn v3.4h, v6.4s, v21.4s
1092 ; CHECK-NEXT: add v4.4s, v18.4s, v17.4s
1093 ; CHECK-NEXT: add v5.4s, v19.4s, v5.4s
1094 ; CHECK-NEXT: add v6.4s, v16.4s, v7.4s
1095 ; CHECK-NEXT: addhn2 v0.8h, v20.4s, v21.4s
1096 ; CHECK-NEXT: addhn2 v1.8h, v4.4s, v21.4s
1097 ; CHECK-NEXT: addhn2 v2.8h, v5.4s, v21.4s
1098 ; CHECK-NEXT: addhn2 v3.8h, v6.4s, v21.4s
1101 %c = sitofp <32 x i32> %a to <32 x bfloat>
1102 ret <32 x bfloat> %c
1105 define <32 x bfloat> @utofp_v32i32_v32bf16(<32 x i32> %a) {
1106 ; CHECK-LABEL: utofp_v32i32_v32bf16:
1107 ; CHECK: // %bb.0: // %entry
1108 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
1109 ; CHECK-NEXT: ucvtf v2.4s, v2.4s
1110 ; CHECK-NEXT: ucvtf v4.4s, v4.4s
1111 ; CHECK-NEXT: ucvtf v6.4s, v6.4s
1112 ; CHECK-NEXT: movi v16.4s, #1
1113 ; CHECK-NEXT: ucvtf v1.4s, v1.4s
1114 ; CHECK-NEXT: ucvtf v17.4s, v3.4s
1115 ; CHECK-NEXT: ucvtf v5.4s, v5.4s
1116 ; CHECK-NEXT: ucvtf v7.4s, v7.4s
1117 ; CHECK-NEXT: movi v21.4s, #127, msl #8
1118 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
1119 ; CHECK-NEXT: ushr v18.4s, v2.4s, #16
1120 ; CHECK-NEXT: ushr v19.4s, v4.4s, #16
1121 ; CHECK-NEXT: ushr v20.4s, v6.4s, #16
1122 ; CHECK-NEXT: ushr v22.4s, v1.4s, #16
1123 ; CHECK-NEXT: ushr v23.4s, v17.4s, #16
1124 ; CHECK-NEXT: ushr v24.4s, v5.4s, #16
1125 ; CHECK-NEXT: ushr v25.4s, v7.4s, #16
1126 ; CHECK-NEXT: and v3.16b, v3.16b, v16.16b
1127 ; CHECK-NEXT: and v18.16b, v18.16b, v16.16b
1128 ; CHECK-NEXT: and v19.16b, v19.16b, v16.16b
1129 ; CHECK-NEXT: and v20.16b, v20.16b, v16.16b
1130 ; CHECK-NEXT: add v0.4s, v3.4s, v0.4s
1131 ; CHECK-NEXT: and v3.16b, v22.16b, v16.16b
1132 ; CHECK-NEXT: add v2.4s, v18.4s, v2.4s
1133 ; CHECK-NEXT: add v4.4s, v19.4s, v4.4s
1134 ; CHECK-NEXT: add v6.4s, v20.4s, v6.4s
1135 ; CHECK-NEXT: and v18.16b, v23.16b, v16.16b
1136 ; CHECK-NEXT: and v19.16b, v24.16b, v16.16b
1137 ; CHECK-NEXT: and v16.16b, v25.16b, v16.16b
1138 ; CHECK-NEXT: add v20.4s, v3.4s, v1.4s
1139 ; CHECK-NEXT: addhn v0.4h, v0.4s, v21.4s
1140 ; CHECK-NEXT: addhn v1.4h, v2.4s, v21.4s
1141 ; CHECK-NEXT: addhn v2.4h, v4.4s, v21.4s
1142 ; CHECK-NEXT: addhn v3.4h, v6.4s, v21.4s
1143 ; CHECK-NEXT: add v4.4s, v18.4s, v17.4s
1144 ; CHECK-NEXT: add v5.4s, v19.4s, v5.4s
1145 ; CHECK-NEXT: add v6.4s, v16.4s, v7.4s
1146 ; CHECK-NEXT: addhn2 v0.8h, v20.4s, v21.4s
1147 ; CHECK-NEXT: addhn2 v1.8h, v4.4s, v21.4s
1148 ; CHECK-NEXT: addhn2 v2.8h, v5.4s, v21.4s
1149 ; CHECK-NEXT: addhn2 v3.8h, v6.4s, v21.4s
1152 %c = uitofp <32 x i32> %a to <32 x bfloat>
1153 ret <32 x bfloat> %c
1156 define <2 x bfloat> @stofp_v2i16_v2bf16(<2 x i16> %a) {
1157 ; CHECK-LABEL: stofp_v2i16_v2bf16:
1158 ; CHECK: // %bb.0: // %entry
1159 ; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h
1160 ; CHECK-NEXT: movi v1.4s, #1
1161 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
1162 ; CHECK-NEXT: scvtf v0.4s, v0.4s
1163 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1164 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1165 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1166 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1167 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1170 %c = sitofp <2 x i16> %a to <2 x bfloat>
1174 define <2 x bfloat> @utofp_v2i16_v2bf16(<2 x i16> %a) {
1175 ; CHECK-LABEL: utofp_v2i16_v2bf16:
1176 ; CHECK: // %bb.0: // %entry
1177 ; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h
1178 ; CHECK-NEXT: movi v1.4s, #1
1179 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
1180 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
1181 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1182 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1183 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1184 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1185 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1188 %c = uitofp <2 x i16> %a to <2 x bfloat>
1192 define <3 x bfloat> @stofp_v3i16_v3bf16(<3 x i16> %a) {
1193 ; CHECK-LABEL: stofp_v3i16_v3bf16:
1194 ; CHECK: // %bb.0: // %entry
1195 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
1196 ; CHECK-NEXT: movi v1.4s, #1
1197 ; CHECK-NEXT: scvtf v0.4s, v0.4s
1198 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1199 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1200 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1201 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1202 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1205 %c = sitofp <3 x i16> %a to <3 x bfloat>
1209 define <3 x bfloat> @utofp_v3i16_v3bf16(<3 x i16> %a) {
1210 ; CHECK-LABEL: utofp_v3i16_v3bf16:
1211 ; CHECK: // %bb.0: // %entry
1212 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
1213 ; CHECK-NEXT: movi v1.4s, #1
1214 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
1215 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1216 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1217 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1218 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1219 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1222 %c = uitofp <3 x i16> %a to <3 x bfloat>
1226 define <4 x bfloat> @stofp_v4i16_v4bf16(<4 x i16> %a) {
1227 ; CHECK-LABEL: stofp_v4i16_v4bf16:
1228 ; CHECK: // %bb.0: // %entry
1229 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
1230 ; CHECK-NEXT: movi v1.4s, #1
1231 ; CHECK-NEXT: scvtf v0.4s, v0.4s
1232 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1233 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1234 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1235 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1236 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1239 %c = sitofp <4 x i16> %a to <4 x bfloat>
1243 define <4 x bfloat> @utofp_v4i16_v4bf16(<4 x i16> %a) {
1244 ; CHECK-LABEL: utofp_v4i16_v4bf16:
1245 ; CHECK: // %bb.0: // %entry
1246 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
1247 ; CHECK-NEXT: movi v1.4s, #1
1248 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
1249 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1250 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1251 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1252 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1253 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1256 %c = uitofp <4 x i16> %a to <4 x bfloat>
1260 define <8 x bfloat> @stofp_v8i16_v8bf16(<8 x i16> %a) {
1261 ; CHECK-LABEL: stofp_v8i16_v8bf16:
1262 ; CHECK: // %bb.0: // %entry
1263 ; CHECK-NEXT: sshll v2.4s, v0.4h, #0
1264 ; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
1265 ; CHECK-NEXT: movi v1.4s, #1
1266 ; CHECK-NEXT: movi v4.4s, #127, msl #8
1267 ; CHECK-NEXT: scvtf v2.4s, v2.4s
1268 ; CHECK-NEXT: scvtf v3.4s, v0.4s
1269 ; CHECK-NEXT: ushr v0.4s, v2.4s, #16
1270 ; CHECK-NEXT: ushr v5.4s, v3.4s, #16
1271 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
1272 ; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
1273 ; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
1274 ; CHECK-NEXT: add v1.4s, v1.4s, v4.4s
1275 ; CHECK-NEXT: addhn v0.4h, v2.4s, v0.4s
1276 ; CHECK-NEXT: addhn2 v0.8h, v3.4s, v1.4s
1279 %c = sitofp <8 x i16> %a to <8 x bfloat>
1283 define <8 x bfloat> @utofp_v8i16_v8bf16(<8 x i16> %a) {
1284 ; CHECK-LABEL: utofp_v8i16_v8bf16:
1285 ; CHECK: // %bb.0: // %entry
1286 ; CHECK-NEXT: ushll v2.4s, v0.4h, #0
1287 ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1288 ; CHECK-NEXT: movi v1.4s, #1
1289 ; CHECK-NEXT: movi v4.4s, #127, msl #8
1290 ; CHECK-NEXT: ucvtf v2.4s, v2.4s
1291 ; CHECK-NEXT: ucvtf v3.4s, v0.4s
1292 ; CHECK-NEXT: ushr v0.4s, v2.4s, #16
1293 ; CHECK-NEXT: ushr v5.4s, v3.4s, #16
1294 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
1295 ; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
1296 ; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
1297 ; CHECK-NEXT: add v1.4s, v1.4s, v4.4s
1298 ; CHECK-NEXT: addhn v0.4h, v2.4s, v0.4s
1299 ; CHECK-NEXT: addhn2 v0.8h, v3.4s, v1.4s
1302 %c = uitofp <8 x i16> %a to <8 x bfloat>
1306 define <16 x bfloat> @stofp_v16i16_v16bf16(<16 x i16> %a) {
1307 ; CHECK-LABEL: stofp_v16i16_v16bf16:
1308 ; CHECK: // %bb.0: // %entry
1309 ; CHECK-NEXT: sshll v3.4s, v0.4h, #0
1310 ; CHECK-NEXT: sshll v4.4s, v1.4h, #0
1311 ; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
1312 ; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
1313 ; CHECK-NEXT: movi v2.4s, #1
1314 ; CHECK-NEXT: movi v7.4s, #127, msl #8
1315 ; CHECK-NEXT: scvtf v3.4s, v3.4s
1316 ; CHECK-NEXT: scvtf v4.4s, v4.4s
1317 ; CHECK-NEXT: scvtf v5.4s, v0.4s
1318 ; CHECK-NEXT: scvtf v6.4s, v1.4s
1319 ; CHECK-NEXT: ushr v0.4s, v3.4s, #16
1320 ; CHECK-NEXT: ushr v1.4s, v4.4s, #16
1321 ; CHECK-NEXT: ushr v16.4s, v5.4s, #16
1322 ; CHECK-NEXT: ushr v17.4s, v6.4s, #16
1323 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
1324 ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
1325 ; CHECK-NEXT: and v16.16b, v16.16b, v2.16b
1326 ; CHECK-NEXT: and v2.16b, v17.16b, v2.16b
1327 ; CHECK-NEXT: add v0.4s, v0.4s, v7.4s
1328 ; CHECK-NEXT: add v1.4s, v1.4s, v7.4s
1329 ; CHECK-NEXT: add v2.4s, v2.4s, v7.4s
1330 ; CHECK-NEXT: addhn v0.4h, v3.4s, v0.4s
1331 ; CHECK-NEXT: addhn v1.4h, v4.4s, v1.4s
1332 ; CHECK-NEXT: add v3.4s, v16.4s, v7.4s
1333 ; CHECK-NEXT: addhn2 v0.8h, v5.4s, v3.4s
1334 ; CHECK-NEXT: addhn2 v1.8h, v6.4s, v2.4s
1337 %c = sitofp <16 x i16> %a to <16 x bfloat>
1338 ret <16 x bfloat> %c
1341 define <16 x bfloat> @utofp_v16i16_v16bf16(<16 x i16> %a) {
1342 ; CHECK-LABEL: utofp_v16i16_v16bf16:
1343 ; CHECK: // %bb.0: // %entry
1344 ; CHECK-NEXT: ushll v3.4s, v0.4h, #0
1345 ; CHECK-NEXT: ushll v4.4s, v1.4h, #0
1346 ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1347 ; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
1348 ; CHECK-NEXT: movi v2.4s, #1
1349 ; CHECK-NEXT: movi v7.4s, #127, msl #8
1350 ; CHECK-NEXT: ucvtf v3.4s, v3.4s
1351 ; CHECK-NEXT: ucvtf v4.4s, v4.4s
1352 ; CHECK-NEXT: ucvtf v5.4s, v0.4s
1353 ; CHECK-NEXT: ucvtf v6.4s, v1.4s
1354 ; CHECK-NEXT: ushr v0.4s, v3.4s, #16
1355 ; CHECK-NEXT: ushr v1.4s, v4.4s, #16
1356 ; CHECK-NEXT: ushr v16.4s, v5.4s, #16
1357 ; CHECK-NEXT: ushr v17.4s, v6.4s, #16
1358 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
1359 ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
1360 ; CHECK-NEXT: and v16.16b, v16.16b, v2.16b
1361 ; CHECK-NEXT: and v2.16b, v17.16b, v2.16b
1362 ; CHECK-NEXT: add v0.4s, v0.4s, v7.4s
1363 ; CHECK-NEXT: add v1.4s, v1.4s, v7.4s
1364 ; CHECK-NEXT: add v2.4s, v2.4s, v7.4s
1365 ; CHECK-NEXT: addhn v0.4h, v3.4s, v0.4s
1366 ; CHECK-NEXT: addhn v1.4h, v4.4s, v1.4s
1367 ; CHECK-NEXT: add v3.4s, v16.4s, v7.4s
1368 ; CHECK-NEXT: addhn2 v0.8h, v5.4s, v3.4s
1369 ; CHECK-NEXT: addhn2 v1.8h, v6.4s, v2.4s
1372 %c = uitofp <16 x i16> %a to <16 x bfloat>
1373 ret <16 x bfloat> %c
1376 define <32 x bfloat> @stofp_v32i16_v32bf16(<32 x i16> %a) {
1377 ; CHECK-LABEL: stofp_v32i16_v32bf16:
1378 ; CHECK: // %bb.0: // %entry
1379 ; CHECK-NEXT: sshll v4.4s, v1.4h, #0
1380 ; CHECK-NEXT: sshll v5.4s, v0.4h, #0
1381 ; CHECK-NEXT: sshll v6.4s, v2.4h, #0
1382 ; CHECK-NEXT: sshll v7.4s, v3.4h, #0
1383 ; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
1384 ; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
1385 ; CHECK-NEXT: sshll2 v2.4s, v2.8h, #0
1386 ; CHECK-NEXT: sshll2 v3.4s, v3.8h, #0
1387 ; CHECK-NEXT: movi v16.4s, #1
1388 ; CHECK-NEXT: scvtf v5.4s, v5.4s
1389 ; CHECK-NEXT: scvtf v4.4s, v4.4s
1390 ; CHECK-NEXT: scvtf v6.4s, v6.4s
1391 ; CHECK-NEXT: scvtf v7.4s, v7.4s
1392 ; CHECK-NEXT: scvtf v17.4s, v0.4s
1393 ; CHECK-NEXT: scvtf v18.4s, v1.4s
1394 ; CHECK-NEXT: scvtf v19.4s, v2.4s
1395 ; CHECK-NEXT: scvtf v20.4s, v3.4s
1396 ; CHECK-NEXT: movi v21.4s, #127, msl #8
1397 ; CHECK-NEXT: ushr v0.4s, v5.4s, #16
1398 ; CHECK-NEXT: ushr v1.4s, v4.4s, #16
1399 ; CHECK-NEXT: ushr v2.4s, v6.4s, #16
1400 ; CHECK-NEXT: ushr v3.4s, v7.4s, #16
1401 ; CHECK-NEXT: ushr v22.4s, v17.4s, #16
1402 ; CHECK-NEXT: ushr v23.4s, v18.4s, #16
1403 ; CHECK-NEXT: ushr v24.4s, v19.4s, #16
1404 ; CHECK-NEXT: ushr v25.4s, v20.4s, #16
1405 ; CHECK-NEXT: and v0.16b, v0.16b, v16.16b
1406 ; CHECK-NEXT: and v1.16b, v1.16b, v16.16b
1407 ; CHECK-NEXT: and v2.16b, v2.16b, v16.16b
1408 ; CHECK-NEXT: and v3.16b, v3.16b, v16.16b
1409 ; CHECK-NEXT: and v22.16b, v22.16b, v16.16b
1410 ; CHECK-NEXT: and v23.16b, v23.16b, v16.16b
1411 ; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
1412 ; CHECK-NEXT: and v16.16b, v25.16b, v16.16b
1413 ; CHECK-NEXT: add v0.4s, v0.4s, v21.4s
1414 ; CHECK-NEXT: add v1.4s, v1.4s, v21.4s
1415 ; CHECK-NEXT: add v2.4s, v2.4s, v21.4s
1416 ; CHECK-NEXT: add v3.4s, v3.4s, v21.4s
1417 ; CHECK-NEXT: addhn v0.4h, v5.4s, v0.4s
1418 ; CHECK-NEXT: addhn v1.4h, v4.4s, v1.4s
1419 ; CHECK-NEXT: addhn v2.4h, v6.4s, v2.4s
1420 ; CHECK-NEXT: addhn v3.4h, v7.4s, v3.4s
1421 ; CHECK-NEXT: add v4.4s, v22.4s, v21.4s
1422 ; CHECK-NEXT: add v5.4s, v23.4s, v21.4s
1423 ; CHECK-NEXT: add v6.4s, v24.4s, v21.4s
1424 ; CHECK-NEXT: add v7.4s, v16.4s, v21.4s
1425 ; CHECK-NEXT: addhn2 v0.8h, v17.4s, v4.4s
1426 ; CHECK-NEXT: addhn2 v1.8h, v18.4s, v5.4s
1427 ; CHECK-NEXT: addhn2 v2.8h, v19.4s, v6.4s
1428 ; CHECK-NEXT: addhn2 v3.8h, v20.4s, v7.4s
1431 %c = sitofp <32 x i16> %a to <32 x bfloat>
1432 ret <32 x bfloat> %c
1435 define <32 x bfloat> @utofp_v32i16_v32bf16(<32 x i16> %a) {
1436 ; CHECK-LABEL: utofp_v32i16_v32bf16:
1437 ; CHECK: // %bb.0: // %entry
1438 ; CHECK-NEXT: ushll v4.4s, v1.4h, #0
1439 ; CHECK-NEXT: ushll v5.4s, v0.4h, #0
1440 ; CHECK-NEXT: ushll v6.4s, v2.4h, #0
1441 ; CHECK-NEXT: ushll v7.4s, v3.4h, #0
1442 ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1443 ; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
1444 ; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0
1445 ; CHECK-NEXT: ushll2 v3.4s, v3.8h, #0
1446 ; CHECK-NEXT: movi v16.4s, #1
1447 ; CHECK-NEXT: ucvtf v5.4s, v5.4s
1448 ; CHECK-NEXT: ucvtf v4.4s, v4.4s
1449 ; CHECK-NEXT: ucvtf v6.4s, v6.4s
1450 ; CHECK-NEXT: ucvtf v7.4s, v7.4s
1451 ; CHECK-NEXT: ucvtf v17.4s, v0.4s
1452 ; CHECK-NEXT: ucvtf v18.4s, v1.4s
1453 ; CHECK-NEXT: ucvtf v19.4s, v2.4s
1454 ; CHECK-NEXT: ucvtf v20.4s, v3.4s
1455 ; CHECK-NEXT: movi v21.4s, #127, msl #8
1456 ; CHECK-NEXT: ushr v0.4s, v5.4s, #16
1457 ; CHECK-NEXT: ushr v1.4s, v4.4s, #16
1458 ; CHECK-NEXT: ushr v2.4s, v6.4s, #16
1459 ; CHECK-NEXT: ushr v3.4s, v7.4s, #16
1460 ; CHECK-NEXT: ushr v22.4s, v17.4s, #16
1461 ; CHECK-NEXT: ushr v23.4s, v18.4s, #16
1462 ; CHECK-NEXT: ushr v24.4s, v19.4s, #16
1463 ; CHECK-NEXT: ushr v25.4s, v20.4s, #16
1464 ; CHECK-NEXT: and v0.16b, v0.16b, v16.16b
1465 ; CHECK-NEXT: and v1.16b, v1.16b, v16.16b
1466 ; CHECK-NEXT: and v2.16b, v2.16b, v16.16b
1467 ; CHECK-NEXT: and v3.16b, v3.16b, v16.16b
1468 ; CHECK-NEXT: and v22.16b, v22.16b, v16.16b
1469 ; CHECK-NEXT: and v23.16b, v23.16b, v16.16b
1470 ; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
1471 ; CHECK-NEXT: and v16.16b, v25.16b, v16.16b
1472 ; CHECK-NEXT: add v0.4s, v0.4s, v21.4s
1473 ; CHECK-NEXT: add v1.4s, v1.4s, v21.4s
1474 ; CHECK-NEXT: add v2.4s, v2.4s, v21.4s
1475 ; CHECK-NEXT: add v3.4s, v3.4s, v21.4s
1476 ; CHECK-NEXT: addhn v0.4h, v5.4s, v0.4s
1477 ; CHECK-NEXT: addhn v1.4h, v4.4s, v1.4s
1478 ; CHECK-NEXT: addhn v2.4h, v6.4s, v2.4s
1479 ; CHECK-NEXT: addhn v3.4h, v7.4s, v3.4s
1480 ; CHECK-NEXT: add v4.4s, v22.4s, v21.4s
1481 ; CHECK-NEXT: add v5.4s, v23.4s, v21.4s
1482 ; CHECK-NEXT: add v6.4s, v24.4s, v21.4s
1483 ; CHECK-NEXT: add v7.4s, v16.4s, v21.4s
1484 ; CHECK-NEXT: addhn2 v0.8h, v17.4s, v4.4s
1485 ; CHECK-NEXT: addhn2 v1.8h, v18.4s, v5.4s
1486 ; CHECK-NEXT: addhn2 v2.8h, v19.4s, v6.4s
1487 ; CHECK-NEXT: addhn2 v3.8h, v20.4s, v7.4s
1490 %c = uitofp <32 x i16> %a to <32 x bfloat>
1491 ret <32 x bfloat> %c
1494 define <2 x bfloat> @stofp_v2i8_v2bf16(<2 x i8> %a) {
1495 ; CHECK-LABEL: stofp_v2i8_v2bf16:
1496 ; CHECK: // %bb.0: // %entry
1497 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1498 ; CHECK-NEXT: mov w9, v0.s[1]
1499 ; CHECK-NEXT: fmov w10, s0
1500 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
1501 ; CHECK-NEXT: sxtb w10, w10
1502 ; CHECK-NEXT: sxtb w9, w9
1503 ; CHECK-NEXT: scvtf s1, w10
1504 ; CHECK-NEXT: scvtf s0, w9
1505 ; CHECK-NEXT: fmov w10, s1
1506 ; CHECK-NEXT: fmov w9, s0
1507 ; CHECK-NEXT: ubfx w12, w10, #16, #1
1508 ; CHECK-NEXT: ubfx w11, w9, #16, #1
1509 ; CHECK-NEXT: add w9, w9, w8
1510 ; CHECK-NEXT: add w8, w10, w8
1511 ; CHECK-NEXT: add w8, w12, w8
1512 ; CHECK-NEXT: add w9, w11, w9
1513 ; CHECK-NEXT: lsr w8, w8, #16
1514 ; CHECK-NEXT: lsr w9, w9, #16
1515 ; CHECK-NEXT: fmov s0, w8
1516 ; CHECK-NEXT: fmov s1, w9
1517 ; CHECK-NEXT: mov v0.h[1], v1.h[0]
1518 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1521 %c = sitofp <2 x i8> %a to <2 x bfloat>
1525 define <2 x bfloat> @utofp_v2i8_v2bf16(<2 x i8> %a) {
1526 ; CHECK-LABEL: utofp_v2i8_v2bf16:
1527 ; CHECK: // %bb.0: // %entry
1528 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1529 ; CHECK-NEXT: mov w9, v0.s[1]
1530 ; CHECK-NEXT: fmov w10, s0
1531 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
1532 ; CHECK-NEXT: and w10, w10, #0xff
1533 ; CHECK-NEXT: and w9, w9, #0xff
1534 ; CHECK-NEXT: ucvtf s1, w10
1535 ; CHECK-NEXT: ucvtf s0, w9
1536 ; CHECK-NEXT: fmov w10, s1
1537 ; CHECK-NEXT: fmov w9, s0
1538 ; CHECK-NEXT: ubfx w12, w10, #16, #1
1539 ; CHECK-NEXT: ubfx w11, w9, #16, #1
1540 ; CHECK-NEXT: add w9, w9, w8
1541 ; CHECK-NEXT: add w8, w10, w8
1542 ; CHECK-NEXT: add w8, w12, w8
1543 ; CHECK-NEXT: add w9, w11, w9
1544 ; CHECK-NEXT: lsr w8, w8, #16
1545 ; CHECK-NEXT: lsr w9, w9, #16
1546 ; CHECK-NEXT: fmov s0, w8
1547 ; CHECK-NEXT: fmov s1, w9
1548 ; CHECK-NEXT: mov v0.h[1], v1.h[0]
1549 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1552 %c = uitofp <2 x i8> %a to <2 x bfloat>
1556 define <3 x bfloat> @stofp_v3i8_v3bf16(<3 x i8> %a) {
1557 ; CHECK-LABEL: stofp_v3i8_v3bf16:
1558 ; CHECK: // %bb.0: // %entry
1559 ; CHECK-NEXT: fmov s0, w0
1560 ; CHECK-NEXT: movi v1.4s, #1
1561 ; CHECK-NEXT: mov v0.h[1], w1
1562 ; CHECK-NEXT: mov v0.h[2], w2
1563 ; CHECK-NEXT: shl v0.4h, v0.4h, #8
1564 ; CHECK-NEXT: sshr v0.4h, v0.4h, #8
1565 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
1566 ; CHECK-NEXT: scvtf v0.4s, v0.4s
1567 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1568 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1569 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1570 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1571 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1574 %c = sitofp <3 x i8> %a to <3 x bfloat>
1578 define <3 x bfloat> @utofp_v3i8_v3bf16(<3 x i8> %a) {
1579 ; CHECK-LABEL: utofp_v3i8_v3bf16:
1580 ; CHECK: // %bb.0: // %entry
1581 ; CHECK-NEXT: fmov s0, w0
1582 ; CHECK-NEXT: movi v1.4s, #1
1583 ; CHECK-NEXT: mov v0.h[1], w1
1584 ; CHECK-NEXT: mov v0.h[2], w2
1585 ; CHECK-NEXT: bic v0.4h, #255, lsl #8
1586 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
1587 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
1588 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1589 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1590 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1591 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1592 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1595 %c = uitofp <3 x i8> %a to <3 x bfloat>
1599 define <4 x bfloat> @stofp_v4i8_v4bf16(<4 x i8> %a) {
1600 ; CHECK-LABEL: stofp_v4i8_v4bf16:
1601 ; CHECK: // %bb.0: // %entry
1602 ; CHECK-NEXT: shl v0.4h, v0.4h, #8
1603 ; CHECK-NEXT: movi v1.4s, #1
1604 ; CHECK-NEXT: sshr v0.4h, v0.4h, #8
1605 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
1606 ; CHECK-NEXT: scvtf v0.4s, v0.4s
1607 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1608 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1609 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1610 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1611 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1614 %c = sitofp <4 x i8> %a to <4 x bfloat>
1618 define <4 x bfloat> @utofp_v4i8_v4bf16(<4 x i8> %a) {
1619 ; CHECK-LABEL: utofp_v4i8_v4bf16:
1620 ; CHECK: // %bb.0: // %entry
1621 ; CHECK-NEXT: bic v0.4h, #255, lsl #8
1622 ; CHECK-NEXT: movi v1.4s, #1
1623 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
1624 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
1625 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1626 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1627 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1628 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1629 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1632 %c = uitofp <4 x i8> %a to <4 x bfloat>
1636 define <8 x bfloat> @stofp_v8i8_v8bf16(<8 x i8> %a) {
1637 ; CHECK-LABEL: stofp_v8i8_v8bf16:
1638 ; CHECK: // %bb.0: // %entry
1639 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
1640 ; CHECK-NEXT: movi v1.4s, #1
1641 ; CHECK-NEXT: movi v4.4s, #127, msl #8
1642 ; CHECK-NEXT: sshll v2.4s, v0.4h, #0
1643 ; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
1644 ; CHECK-NEXT: scvtf v2.4s, v2.4s
1645 ; CHECK-NEXT: scvtf v3.4s, v0.4s
1646 ; CHECK-NEXT: ushr v0.4s, v2.4s, #16
1647 ; CHECK-NEXT: ushr v5.4s, v3.4s, #16
1648 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
1649 ; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
1650 ; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
1651 ; CHECK-NEXT: add v1.4s, v1.4s, v4.4s
1652 ; CHECK-NEXT: addhn v0.4h, v2.4s, v0.4s
1653 ; CHECK-NEXT: addhn2 v0.8h, v3.4s, v1.4s
1656 %c = sitofp <8 x i8> %a to <8 x bfloat>
1660 define <8 x bfloat> @utofp_v8i8_v8bf16(<8 x i8> %a) {
1661 ; CHECK-LABEL: utofp_v8i8_v8bf16:
1662 ; CHECK: // %bb.0: // %entry
1663 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
1664 ; CHECK-NEXT: movi v1.4s, #1
1665 ; CHECK-NEXT: movi v4.4s, #127, msl #8
1666 ; CHECK-NEXT: ushll v2.4s, v0.4h, #0
1667 ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1668 ; CHECK-NEXT: ucvtf v2.4s, v2.4s
1669 ; CHECK-NEXT: ucvtf v3.4s, v0.4s
1670 ; CHECK-NEXT: ushr v0.4s, v2.4s, #16
1671 ; CHECK-NEXT: ushr v5.4s, v3.4s, #16
1672 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
1673 ; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
1674 ; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
1675 ; CHECK-NEXT: add v1.4s, v1.4s, v4.4s
1676 ; CHECK-NEXT: addhn v0.4h, v2.4s, v0.4s
1677 ; CHECK-NEXT: addhn2 v0.8h, v3.4s, v1.4s
1680 %c = uitofp <8 x i8> %a to <8 x bfloat>
1684 define <16 x bfloat> @stofp_v16i8_v16bf16(<16 x i8> %a) {
1685 ; CHECK-LABEL: stofp_v16i8_v16bf16:
1686 ; CHECK: // %bb.0: // %entry
1687 ; CHECK-NEXT: sshll2 v2.8h, v0.16b, #0
1688 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
1689 ; CHECK-NEXT: movi v1.4s, #1
1690 ; CHECK-NEXT: movi v7.4s, #127, msl #8
1691 ; CHECK-NEXT: sshll v3.4s, v2.4h, #0
1692 ; CHECK-NEXT: sshll v4.4s, v0.4h, #0
1693 ; CHECK-NEXT: sshll2 v2.4s, v2.8h, #0
1694 ; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
1695 ; CHECK-NEXT: scvtf v3.4s, v3.4s
1696 ; CHECK-NEXT: scvtf v4.4s, v4.4s
1697 ; CHECK-NEXT: scvtf v2.4s, v2.4s
1698 ; CHECK-NEXT: scvtf v6.4s, v0.4s
1699 ; CHECK-NEXT: ushr v5.4s, v3.4s, #16
1700 ; CHECK-NEXT: ushr v0.4s, v4.4s, #16
1701 ; CHECK-NEXT: ushr v16.4s, v2.4s, #16
1702 ; CHECK-NEXT: ushr v17.4s, v6.4s, #16
1703 ; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
1704 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
1705 ; CHECK-NEXT: and v16.16b, v16.16b, v1.16b
1706 ; CHECK-NEXT: and v17.16b, v17.16b, v1.16b
1707 ; CHECK-NEXT: add v5.4s, v5.4s, v7.4s
1708 ; CHECK-NEXT: add v0.4s, v0.4s, v7.4s
1709 ; CHECK-NEXT: addhn v1.4h, v3.4s, v5.4s
1710 ; CHECK-NEXT: addhn v0.4h, v4.4s, v0.4s
1711 ; CHECK-NEXT: add v3.4s, v16.4s, v7.4s
1712 ; CHECK-NEXT: add v4.4s, v17.4s, v7.4s
1713 ; CHECK-NEXT: addhn2 v1.8h, v2.4s, v3.4s
1714 ; CHECK-NEXT: addhn2 v0.8h, v6.4s, v4.4s
1717 %c = sitofp <16 x i8> %a to <16 x bfloat>
1718 ret <16 x bfloat> %c
1721 define <16 x bfloat> @utofp_v16i8_v16bf16(<16 x i8> %a) {
1722 ; CHECK-LABEL: utofp_v16i8_v16bf16:
1723 ; CHECK: // %bb.0: // %entry
1724 ; CHECK-NEXT: ushll2 v2.8h, v0.16b, #0
1725 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
1726 ; CHECK-NEXT: movi v1.4s, #1
1727 ; CHECK-NEXT: movi v7.4s, #127, msl #8
1728 ; CHECK-NEXT: ushll v3.4s, v2.4h, #0
1729 ; CHECK-NEXT: ushll v4.4s, v0.4h, #0
1730 ; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0
1731 ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1732 ; CHECK-NEXT: ucvtf v3.4s, v3.4s
1733 ; CHECK-NEXT: ucvtf v4.4s, v4.4s
1734 ; CHECK-NEXT: ucvtf v2.4s, v2.4s
1735 ; CHECK-NEXT: ucvtf v6.4s, v0.4s
1736 ; CHECK-NEXT: ushr v5.4s, v3.4s, #16
1737 ; CHECK-NEXT: ushr v0.4s, v4.4s, #16
1738 ; CHECK-NEXT: ushr v16.4s, v2.4s, #16
1739 ; CHECK-NEXT: ushr v17.4s, v6.4s, #16
1740 ; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
1741 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
1742 ; CHECK-NEXT: and v16.16b, v16.16b, v1.16b
1743 ; CHECK-NEXT: and v17.16b, v17.16b, v1.16b
1744 ; CHECK-NEXT: add v5.4s, v5.4s, v7.4s
1745 ; CHECK-NEXT: add v0.4s, v0.4s, v7.4s
1746 ; CHECK-NEXT: addhn v1.4h, v3.4s, v5.4s
1747 ; CHECK-NEXT: addhn v0.4h, v4.4s, v0.4s
1748 ; CHECK-NEXT: add v3.4s, v16.4s, v7.4s
1749 ; CHECK-NEXT: add v4.4s, v17.4s, v7.4s
1750 ; CHECK-NEXT: addhn2 v1.8h, v2.4s, v3.4s
1751 ; CHECK-NEXT: addhn2 v0.8h, v6.4s, v4.4s
1754 %c = uitofp <16 x i8> %a to <16 x bfloat>
1755 ret <16 x bfloat> %c
1758 define <32 x bfloat> @stofp_v32i8_v32bf16(<32 x i8> %a) {
1759 ; CHECK-LABEL: stofp_v32i8_v32bf16:
1760 ; CHECK: // %bb.0: // %entry
1761 ; CHECK-NEXT: sshll2 v3.8h, v0.16b, #0
1762 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
1763 ; CHECK-NEXT: sshll2 v4.8h, v1.16b, #0
1764 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0
1765 ; CHECK-NEXT: movi v2.4s, #1
1766 ; CHECK-NEXT: movi v21.4s, #127, msl #8
1767 ; CHECK-NEXT: sshll v5.4s, v3.4h, #0
1768 ; CHECK-NEXT: sshll v6.4s, v0.4h, #0
1769 ; CHECK-NEXT: sshll v7.4s, v4.4h, #0
1770 ; CHECK-NEXT: sshll v16.4s, v1.4h, #0
1771 ; CHECK-NEXT: sshll2 v3.4s, v3.8h, #0
1772 ; CHECK-NEXT: sshll2 v4.4s, v4.8h, #0
1773 ; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
1774 ; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
1775 ; CHECK-NEXT: scvtf v5.4s, v5.4s
1776 ; CHECK-NEXT: scvtf v6.4s, v6.4s
1777 ; CHECK-NEXT: scvtf v7.4s, v7.4s
1778 ; CHECK-NEXT: scvtf v16.4s, v16.4s
1779 ; CHECK-NEXT: scvtf v17.4s, v3.4s
1780 ; CHECK-NEXT: scvtf v4.4s, v4.4s
1781 ; CHECK-NEXT: scvtf v18.4s, v0.4s
1782 ; CHECK-NEXT: scvtf v19.4s, v1.4s
1783 ; CHECK-NEXT: ushr v0.4s, v5.4s, #16
1784 ; CHECK-NEXT: ushr v3.4s, v6.4s, #16
1785 ; CHECK-NEXT: ushr v1.4s, v7.4s, #16
1786 ; CHECK-NEXT: ushr v20.4s, v16.4s, #16
1787 ; CHECK-NEXT: ushr v23.4s, v17.4s, #16
1788 ; CHECK-NEXT: ushr v24.4s, v4.4s, #16
1789 ; CHECK-NEXT: ushr v22.4s, v18.4s, #16
1790 ; CHECK-NEXT: ushr v25.4s, v19.4s, #16
1791 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
1792 ; CHECK-NEXT: and v3.16b, v3.16b, v2.16b
1793 ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
1794 ; CHECK-NEXT: and v20.16b, v20.16b, v2.16b
1795 ; CHECK-NEXT: and v23.16b, v23.16b, v2.16b
1796 ; CHECK-NEXT: and v24.16b, v24.16b, v2.16b
1797 ; CHECK-NEXT: and v22.16b, v22.16b, v2.16b
1798 ; CHECK-NEXT: and v25.16b, v25.16b, v2.16b
1799 ; CHECK-NEXT: add v0.4s, v0.4s, v21.4s
1800 ; CHECK-NEXT: add v3.4s, v3.4s, v21.4s
1801 ; CHECK-NEXT: add v26.4s, v1.4s, v21.4s
1802 ; CHECK-NEXT: add v20.4s, v20.4s, v21.4s
1803 ; CHECK-NEXT: addhn v1.4h, v5.4s, v0.4s
1804 ; CHECK-NEXT: addhn v0.4h, v6.4s, v3.4s
1805 ; CHECK-NEXT: addhn v3.4h, v7.4s, v26.4s
1806 ; CHECK-NEXT: addhn v2.4h, v16.4s, v20.4s
1807 ; CHECK-NEXT: add v5.4s, v22.4s, v21.4s
1808 ; CHECK-NEXT: add v6.4s, v23.4s, v21.4s
1809 ; CHECK-NEXT: add v7.4s, v24.4s, v21.4s
1810 ; CHECK-NEXT: add v16.4s, v25.4s, v21.4s
1811 ; CHECK-NEXT: addhn2 v0.8h, v18.4s, v5.4s
1812 ; CHECK-NEXT: addhn2 v1.8h, v17.4s, v6.4s
1813 ; CHECK-NEXT: addhn2 v3.8h, v4.4s, v7.4s
1814 ; CHECK-NEXT: addhn2 v2.8h, v19.4s, v16.4s
1817 %c = sitofp <32 x i8> %a to <32 x bfloat>
1818 ret <32 x bfloat> %c
1821 define <32 x bfloat> @utofp_v32i8_v32bf16(<32 x i8> %a) {
1822 ; CHECK-LABEL: utofp_v32i8_v32bf16:
1823 ; CHECK: // %bb.0: // %entry
1824 ; CHECK-NEXT: ushll2 v3.8h, v0.16b, #0
1825 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
1826 ; CHECK-NEXT: ushll2 v4.8h, v1.16b, #0
1827 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0
1828 ; CHECK-NEXT: movi v2.4s, #1
1829 ; CHECK-NEXT: movi v21.4s, #127, msl #8
1830 ; CHECK-NEXT: ushll v5.4s, v3.4h, #0
1831 ; CHECK-NEXT: ushll v6.4s, v0.4h, #0
1832 ; CHECK-NEXT: ushll v7.4s, v4.4h, #0
1833 ; CHECK-NEXT: ushll v16.4s, v1.4h, #0
1834 ; CHECK-NEXT: ushll2 v3.4s, v3.8h, #0
1835 ; CHECK-NEXT: ushll2 v4.4s, v4.8h, #0
1836 ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1837 ; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
1838 ; CHECK-NEXT: ucvtf v5.4s, v5.4s
1839 ; CHECK-NEXT: ucvtf v6.4s, v6.4s
1840 ; CHECK-NEXT: ucvtf v7.4s, v7.4s
1841 ; CHECK-NEXT: ucvtf v16.4s, v16.4s
1842 ; CHECK-NEXT: ucvtf v17.4s, v3.4s
1843 ; CHECK-NEXT: ucvtf v4.4s, v4.4s
1844 ; CHECK-NEXT: ucvtf v18.4s, v0.4s
1845 ; CHECK-NEXT: ucvtf v19.4s, v1.4s
1846 ; CHECK-NEXT: ushr v0.4s, v5.4s, #16
1847 ; CHECK-NEXT: ushr v3.4s, v6.4s, #16
1848 ; CHECK-NEXT: ushr v1.4s, v7.4s, #16
1849 ; CHECK-NEXT: ushr v20.4s, v16.4s, #16
1850 ; CHECK-NEXT: ushr v23.4s, v17.4s, #16
1851 ; CHECK-NEXT: ushr v24.4s, v4.4s, #16
1852 ; CHECK-NEXT: ushr v22.4s, v18.4s, #16
1853 ; CHECK-NEXT: ushr v25.4s, v19.4s, #16
1854 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
1855 ; CHECK-NEXT: and v3.16b, v3.16b, v2.16b
1856 ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
1857 ; CHECK-NEXT: and v20.16b, v20.16b, v2.16b
1858 ; CHECK-NEXT: and v23.16b, v23.16b, v2.16b
1859 ; CHECK-NEXT: and v24.16b, v24.16b, v2.16b
1860 ; CHECK-NEXT: and v22.16b, v22.16b, v2.16b
1861 ; CHECK-NEXT: and v25.16b, v25.16b, v2.16b
1862 ; CHECK-NEXT: add v0.4s, v0.4s, v21.4s
1863 ; CHECK-NEXT: add v3.4s, v3.4s, v21.4s
1864 ; CHECK-NEXT: add v26.4s, v1.4s, v21.4s
1865 ; CHECK-NEXT: add v20.4s, v20.4s, v21.4s
1866 ; CHECK-NEXT: addhn v1.4h, v5.4s, v0.4s
1867 ; CHECK-NEXT: addhn v0.4h, v6.4s, v3.4s
1868 ; CHECK-NEXT: addhn v3.4h, v7.4s, v26.4s
1869 ; CHECK-NEXT: addhn v2.4h, v16.4s, v20.4s
1870 ; CHECK-NEXT: add v5.4s, v22.4s, v21.4s
1871 ; CHECK-NEXT: add v6.4s, v23.4s, v21.4s
1872 ; CHECK-NEXT: add v7.4s, v24.4s, v21.4s
1873 ; CHECK-NEXT: add v16.4s, v25.4s, v21.4s
1874 ; CHECK-NEXT: addhn2 v0.8h, v18.4s, v5.4s
1875 ; CHECK-NEXT: addhn2 v1.8h, v17.4s, v6.4s
1876 ; CHECK-NEXT: addhn2 v3.8h, v4.4s, v7.4s
1877 ; CHECK-NEXT: addhn2 v2.8h, v19.4s, v16.4s
1880 %c = uitofp <32 x i8> %a to <32 x bfloat>
1881 ret <32 x bfloat> %c
1883 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1885 ; CHECK-GI-FP16: {{.*}}
1886 ; CHECK-GI-NOFP16: {{.*}}
1888 ; CHECK-SD-FP16: {{.*}}
1889 ; CHECK-SD-NOFP16: {{.*}}