1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
3 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
4 ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
5 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
7 define bfloat @stofp_i64_bf16(i64 %a) {
8 ; CHECK-LABEL: stofp_i64_bf16:
9 ; CHECK: // %bb.0: // %entry
10 ; CHECK-NEXT: cmp x0, #0
11 ; CHECK-NEXT: and x11, x0, #0x8000000000000000
12 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
13 ; CHECK-NEXT: cneg x9, x0, mi
14 ; CHECK-NEXT: lsr x10, x9, #53
15 ; CHECK-NEXT: cmp x10, #0
16 ; CHECK-NEXT: and x10, x9, #0xfffffffffffff000
17 ; CHECK-NEXT: csel x10, x10, x9, ne
18 ; CHECK-NEXT: scvtf d0, x10
19 ; CHECK-NEXT: cset w10, ne
20 ; CHECK-NEXT: tst x9, #0xfff
21 ; CHECK-NEXT: csel w10, wzr, w10, eq
22 ; CHECK-NEXT: fmov x9, d0
23 ; CHECK-NEXT: orr x9, x9, x11
24 ; CHECK-NEXT: orr x9, x9, x10
25 ; CHECK-NEXT: fmov d0, x9
26 ; CHECK-NEXT: fcvtxn s0, d0
27 ; CHECK-NEXT: fmov w9, s0
28 ; CHECK-NEXT: ubfx w10, w9, #16, #1
29 ; CHECK-NEXT: add w8, w9, w8
30 ; CHECK-NEXT: add w8, w10, w8
31 ; CHECK-NEXT: lsr w8, w8, #16
32 ; CHECK-NEXT: fmov s0, w8
33 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
36 %c = sitofp i64 %a to bfloat
40 define bfloat @utofp_i64_bf16(i64 %a) {
41 ; CHECK-LABEL: utofp_i64_bf16:
42 ; CHECK: // %bb.0: // %entry
43 ; CHECK-NEXT: lsr x9, x0, #53
44 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
45 ; CHECK-NEXT: cmp x9, #0
46 ; CHECK-NEXT: and x9, x0, #0xfffffffffffff000
47 ; CHECK-NEXT: csel x9, x9, x0, ne
48 ; CHECK-NEXT: ucvtf d0, x9
49 ; CHECK-NEXT: cset w9, ne
50 ; CHECK-NEXT: tst x0, #0xfff
51 ; CHECK-NEXT: csel w9, wzr, w9, eq
52 ; CHECK-NEXT: fmov x10, d0
53 ; CHECK-NEXT: orr x9, x10, x9
54 ; CHECK-NEXT: fmov d0, x9
55 ; CHECK-NEXT: fcvtxn s0, d0
56 ; CHECK-NEXT: fmov w9, s0
57 ; CHECK-NEXT: ubfx w10, w9, #16, #1
58 ; CHECK-NEXT: add w8, w9, w8
59 ; CHECK-NEXT: add w8, w10, w8
60 ; CHECK-NEXT: lsr w8, w8, #16
61 ; CHECK-NEXT: fmov s0, w8
62 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
65 %c = uitofp i64 %a to bfloat
69 define bfloat @stofp_i32_bf16(i32 %a) {
70 ; CHECK-LABEL: stofp_i32_bf16:
71 ; CHECK: // %bb.0: // %entry
72 ; CHECK-NEXT: scvtf d0, w0
73 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
74 ; CHECK-NEXT: fcvtxn s0, d0
75 ; CHECK-NEXT: fmov w9, s0
76 ; CHECK-NEXT: ubfx w10, w9, #16, #1
77 ; CHECK-NEXT: add w8, w9, w8
78 ; CHECK-NEXT: add w8, w10, w8
79 ; CHECK-NEXT: lsr w8, w8, #16
80 ; CHECK-NEXT: fmov s0, w8
81 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
84 %c = sitofp i32 %a to bfloat
88 define bfloat @utofp_i32_bf16(i32 %a) {
89 ; CHECK-LABEL: utofp_i32_bf16:
90 ; CHECK: // %bb.0: // %entry
91 ; CHECK-NEXT: ucvtf d0, w0
92 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
93 ; CHECK-NEXT: fcvtxn s0, d0
94 ; CHECK-NEXT: fmov w9, s0
95 ; CHECK-NEXT: ubfx w10, w9, #16, #1
96 ; CHECK-NEXT: add w8, w9, w8
97 ; CHECK-NEXT: add w8, w10, w8
98 ; CHECK-NEXT: lsr w8, w8, #16
99 ; CHECK-NEXT: fmov s0, w8
100 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
103 %c = uitofp i32 %a to bfloat
107 define bfloat @stofp_i16_bf16(i16 %a) {
108 ; CHECK-LABEL: stofp_i16_bf16:
109 ; CHECK: // %bb.0: // %entry
110 ; CHECK-NEXT: sxth w9, w0
111 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
112 ; CHECK-NEXT: scvtf s0, w9
113 ; CHECK-NEXT: fmov w9, s0
114 ; CHECK-NEXT: ubfx w10, w9, #16, #1
115 ; CHECK-NEXT: add w8, w9, w8
116 ; CHECK-NEXT: add w8, w10, w8
117 ; CHECK-NEXT: lsr w8, w8, #16
118 ; CHECK-NEXT: fmov s0, w8
119 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
122 %c = sitofp i16 %a to bfloat
126 define bfloat @utofp_i16_bf16(i16 %a) {
127 ; CHECK-LABEL: utofp_i16_bf16:
128 ; CHECK: // %bb.0: // %entry
129 ; CHECK-NEXT: and w9, w0, #0xffff
130 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
131 ; CHECK-NEXT: ucvtf s0, w9
132 ; CHECK-NEXT: fmov w9, s0
133 ; CHECK-NEXT: ubfx w10, w9, #16, #1
134 ; CHECK-NEXT: add w8, w9, w8
135 ; CHECK-NEXT: add w8, w10, w8
136 ; CHECK-NEXT: lsr w8, w8, #16
137 ; CHECK-NEXT: fmov s0, w8
138 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
141 %c = uitofp i16 %a to bfloat
145 define bfloat @stofp_i8_bf16(i8 %a) {
146 ; CHECK-LABEL: stofp_i8_bf16:
147 ; CHECK: // %bb.0: // %entry
148 ; CHECK-NEXT: sxtb w9, w0
149 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
150 ; CHECK-NEXT: scvtf s0, w9
151 ; CHECK-NEXT: fmov w9, s0
152 ; CHECK-NEXT: ubfx w10, w9, #16, #1
153 ; CHECK-NEXT: add w8, w9, w8
154 ; CHECK-NEXT: add w8, w10, w8
155 ; CHECK-NEXT: lsr w8, w8, #16
156 ; CHECK-NEXT: fmov s0, w8
157 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
160 %c = sitofp i8 %a to bfloat
164 define bfloat @utofp_i8_bf16(i8 %a) {
165 ; CHECK-LABEL: utofp_i8_bf16:
166 ; CHECK: // %bb.0: // %entry
167 ; CHECK-NEXT: and w9, w0, #0xff
168 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
169 ; CHECK-NEXT: ucvtf s0, w9
170 ; CHECK-NEXT: fmov w9, s0
171 ; CHECK-NEXT: ubfx w10, w9, #16, #1
172 ; CHECK-NEXT: add w8, w9, w8
173 ; CHECK-NEXT: add w8, w10, w8
174 ; CHECK-NEXT: lsr w8, w8, #16
175 ; CHECK-NEXT: fmov s0, w8
176 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
179 %c = uitofp i8 %a to bfloat
183 define <2 x bfloat> @stofp_v2i64_v2bf16(<2 x i64> %a) {
184 ; CHECK-LABEL: stofp_v2i64_v2bf16:
185 ; CHECK: // %bb.0: // %entry
186 ; CHECK-NEXT: mov x9, v0.d[1]
187 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
188 ; CHECK-NEXT: cmp x9, #0
189 ; CHECK-NEXT: cneg x10, x9, mi
190 ; CHECK-NEXT: and x9, x9, #0x8000000000000000
191 ; CHECK-NEXT: lsr x11, x10, #53
192 ; CHECK-NEXT: and x12, x10, #0xfffffffffffff000
193 ; CHECK-NEXT: cmp x11, #0
194 ; CHECK-NEXT: csel x11, x12, x10, ne
195 ; CHECK-NEXT: cset w12, ne
196 ; CHECK-NEXT: tst x10, #0xfff
197 ; CHECK-NEXT: fmov x10, d0
198 ; CHECK-NEXT: csel w12, wzr, w12, eq
199 ; CHECK-NEXT: scvtf d0, x11
200 ; CHECK-NEXT: cmp x10, #0
201 ; CHECK-NEXT: cneg x13, x10, mi
202 ; CHECK-NEXT: and x10, x10, #0x8000000000000000
203 ; CHECK-NEXT: lsr x14, x13, #53
204 ; CHECK-NEXT: cmp x14, #0
205 ; CHECK-NEXT: and x14, x13, #0xfffffffffffff000
206 ; CHECK-NEXT: csel x11, x14, x13, ne
207 ; CHECK-NEXT: cset w14, ne
208 ; CHECK-NEXT: tst x13, #0xfff
209 ; CHECK-NEXT: scvtf d1, x11
210 ; CHECK-NEXT: fmov x11, d0
211 ; CHECK-NEXT: orr x9, x11, x9
212 ; CHECK-NEXT: csel w11, wzr, w14, eq
213 ; CHECK-NEXT: fmov x13, d1
214 ; CHECK-NEXT: orr x9, x9, x12
215 ; CHECK-NEXT: fmov d0, x9
216 ; CHECK-NEXT: orr x10, x13, x10
217 ; CHECK-NEXT: orr x10, x10, x11
218 ; CHECK-NEXT: fcvtxn s0, d0
219 ; CHECK-NEXT: fmov d1, x10
220 ; CHECK-NEXT: fcvtxn s1, d1
221 ; CHECK-NEXT: fmov w9, s0
222 ; CHECK-NEXT: ubfx w11, w9, #16, #1
223 ; CHECK-NEXT: add w9, w9, w8
224 ; CHECK-NEXT: fmov w10, s1
225 ; CHECK-NEXT: add w9, w11, w9
226 ; CHECK-NEXT: lsr w9, w9, #16
227 ; CHECK-NEXT: ubfx w12, w10, #16, #1
228 ; CHECK-NEXT: add w8, w10, w8
229 ; CHECK-NEXT: fmov s1, w9
230 ; CHECK-NEXT: add w8, w12, w8
231 ; CHECK-NEXT: lsr w8, w8, #16
232 ; CHECK-NEXT: fmov s0, w8
233 ; CHECK-NEXT: mov v0.h[1], v1.h[0]
234 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
237 %c = sitofp <2 x i64> %a to <2 x bfloat>
241 define <2 x bfloat> @utofp_v2i64_v2bf16(<2 x i64> %a) {
242 ; CHECK-LABEL: utofp_v2i64_v2bf16:
243 ; CHECK: // %bb.0: // %entry
244 ; CHECK-NEXT: mov x9, v0.d[1]
245 ; CHECK-NEXT: fmov x11, d0
246 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
247 ; CHECK-NEXT: lsr x10, x9, #53
248 ; CHECK-NEXT: and x12, x9, #0xfffffffffffff000
249 ; CHECK-NEXT: cmp x10, #0
250 ; CHECK-NEXT: lsr x10, x11, #53
251 ; CHECK-NEXT: csel x12, x12, x9, ne
252 ; CHECK-NEXT: cset w13, ne
253 ; CHECK-NEXT: tst x9, #0xfff
254 ; CHECK-NEXT: csel w9, wzr, w13, eq
255 ; CHECK-NEXT: cmp x10, #0
256 ; CHECK-NEXT: and x10, x11, #0xfffffffffffff000
257 ; CHECK-NEXT: csel x10, x10, x11, ne
258 ; CHECK-NEXT: ucvtf d0, x12
259 ; CHECK-NEXT: ucvtf d1, x10
260 ; CHECK-NEXT: cset w10, ne
261 ; CHECK-NEXT: tst x11, #0xfff
262 ; CHECK-NEXT: csel w10, wzr, w10, eq
263 ; CHECK-NEXT: fmov x11, d0
264 ; CHECK-NEXT: fmov x12, d1
265 ; CHECK-NEXT: orr x9, x11, x9
266 ; CHECK-NEXT: orr x10, x12, x10
267 ; CHECK-NEXT: fmov d0, x9
268 ; CHECK-NEXT: fmov d1, x10
269 ; CHECK-NEXT: fcvtxn s0, d0
270 ; CHECK-NEXT: fcvtxn s1, d1
271 ; CHECK-NEXT: fmov w9, s0
272 ; CHECK-NEXT: fmov w10, s1
273 ; CHECK-NEXT: ubfx w11, w9, #16, #1
274 ; CHECK-NEXT: add w9, w9, w8
275 ; CHECK-NEXT: ubfx w12, w10, #16, #1
276 ; CHECK-NEXT: add w8, w10, w8
277 ; CHECK-NEXT: add w9, w11, w9
278 ; CHECK-NEXT: add w8, w12, w8
279 ; CHECK-NEXT: lsr w9, w9, #16
280 ; CHECK-NEXT: lsr w8, w8, #16
281 ; CHECK-NEXT: fmov s1, w9
282 ; CHECK-NEXT: fmov s0, w8
283 ; CHECK-NEXT: mov v0.h[1], v1.h[0]
284 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
287 %c = uitofp <2 x i64> %a to <2 x bfloat>
291 define <3 x bfloat> @stofp_v3i64_v3bf16(<3 x i64> %a) {
292 ; CHECK-LABEL: stofp_v3i64_v3bf16:
293 ; CHECK: // %bb.0: // %entry
294 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
295 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
296 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
297 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
298 ; CHECK-NEXT: scvtf v1.2d, v2.2d
299 ; CHECK-NEXT: movi v2.4s, #127, msl #8
300 ; CHECK-NEXT: scvtf v0.2d, v0.2d
301 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
302 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
303 ; CHECK-NEXT: movi v1.4s, #1
304 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
305 ; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
306 ; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
307 ; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
308 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
309 ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
310 ; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
311 ; CHECK-NEXT: shrn v0.4h, v0.4s, #16
314 %c = sitofp <3 x i64> %a to <3 x bfloat>
318 define <3 x bfloat> @utofp_v3i64_v3bf16(<3 x i64> %a) {
319 ; CHECK-LABEL: utofp_v3i64_v3bf16:
320 ; CHECK: // %bb.0: // %entry
321 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
322 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
323 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
324 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
325 ; CHECK-NEXT: ucvtf v1.2d, v2.2d
326 ; CHECK-NEXT: movi v2.4s, #127, msl #8
327 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
328 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
329 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
330 ; CHECK-NEXT: movi v1.4s, #1
331 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
332 ; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
333 ; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
334 ; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
335 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
336 ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
337 ; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
338 ; CHECK-NEXT: shrn v0.4h, v0.4s, #16
341 %c = uitofp <3 x i64> %a to <3 x bfloat>
345 define <4 x bfloat> @stofp_v4i64_v4bf16(<4 x i64> %a) {
346 ; CHECK-LABEL: stofp_v4i64_v4bf16:
347 ; CHECK: // %bb.0: // %entry
348 ; CHECK-NEXT: scvtf v0.2d, v0.2d
349 ; CHECK-NEXT: scvtf v1.2d, v1.2d
350 ; CHECK-NEXT: movi v2.4s, #127, msl #8
351 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
352 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
353 ; CHECK-NEXT: movi v1.4s, #1
354 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
355 ; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
356 ; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
357 ; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
358 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
359 ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
360 ; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
361 ; CHECK-NEXT: shrn v0.4h, v0.4s, #16
364 %c = sitofp <4 x i64> %a to <4 x bfloat>
368 define <4 x bfloat> @utofp_v4i64_v4bf16(<4 x i64> %a) {
369 ; CHECK-LABEL: utofp_v4i64_v4bf16:
370 ; CHECK: // %bb.0: // %entry
371 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
372 ; CHECK-NEXT: ucvtf v1.2d, v1.2d
373 ; CHECK-NEXT: movi v2.4s, #127, msl #8
374 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
375 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
376 ; CHECK-NEXT: movi v1.4s, #1
377 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
378 ; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
379 ; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
380 ; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
381 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
382 ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
383 ; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
384 ; CHECK-NEXT: shrn v0.4h, v0.4s, #16
387 %c = uitofp <4 x i64> %a to <4 x bfloat>
391 define <8 x bfloat> @stofp_v8i64_v8bf16(<8 x i64> %a) {
392 ; CHECK-LABEL: stofp_v8i64_v8bf16:
393 ; CHECK: // %bb.0: // %entry
394 ; CHECK-NEXT: scvtf v2.2d, v2.2d
395 ; CHECK-NEXT: scvtf v0.2d, v0.2d
396 ; CHECK-NEXT: scvtf v3.2d, v3.2d
397 ; CHECK-NEXT: scvtf v1.2d, v1.2d
398 ; CHECK-NEXT: fcvtn v2.2s, v2.2d
399 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
400 ; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
401 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
402 ; CHECK-NEXT: movi v1.4s, #1
403 ; CHECK-NEXT: movi v3.4s, #127, msl #8
404 ; CHECK-NEXT: ushr v4.4s, v2.4s, #16
405 ; CHECK-NEXT: ushr v5.4s, v0.4s, #16
406 ; CHECK-NEXT: add v6.4s, v2.4s, v3.4s
407 ; CHECK-NEXT: add v3.4s, v0.4s, v3.4s
408 ; CHECK-NEXT: and v4.16b, v4.16b, v1.16b
409 ; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
410 ; CHECK-NEXT: fcmeq v5.4s, v2.4s, v2.4s
411 ; CHECK-NEXT: orr v2.4s, #64, lsl #16
412 ; CHECK-NEXT: add v4.4s, v4.4s, v6.4s
413 ; CHECK-NEXT: fcmeq v6.4s, v0.4s, v0.4s
414 ; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
415 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
416 ; CHECK-NEXT: bit v2.16b, v4.16b, v5.16b
417 ; CHECK-NEXT: bit v0.16b, v1.16b, v6.16b
418 ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
421 %c = sitofp <8 x i64> %a to <8 x bfloat>
425 define <8 x bfloat> @utofp_v8i64_v8bf16(<8 x i64> %a) {
426 ; CHECK-LABEL: utofp_v8i64_v8bf16:
427 ; CHECK: // %bb.0: // %entry
428 ; CHECK-NEXT: ucvtf v2.2d, v2.2d
429 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
430 ; CHECK-NEXT: ucvtf v3.2d, v3.2d
431 ; CHECK-NEXT: ucvtf v1.2d, v1.2d
432 ; CHECK-NEXT: fcvtn v2.2s, v2.2d
433 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
434 ; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
435 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
436 ; CHECK-NEXT: movi v1.4s, #1
437 ; CHECK-NEXT: movi v3.4s, #127, msl #8
438 ; CHECK-NEXT: ushr v4.4s, v2.4s, #16
439 ; CHECK-NEXT: ushr v5.4s, v0.4s, #16
440 ; CHECK-NEXT: add v6.4s, v2.4s, v3.4s
441 ; CHECK-NEXT: add v3.4s, v0.4s, v3.4s
442 ; CHECK-NEXT: and v4.16b, v4.16b, v1.16b
443 ; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
444 ; CHECK-NEXT: fcmeq v5.4s, v2.4s, v2.4s
445 ; CHECK-NEXT: orr v2.4s, #64, lsl #16
446 ; CHECK-NEXT: add v4.4s, v4.4s, v6.4s
447 ; CHECK-NEXT: fcmeq v6.4s, v0.4s, v0.4s
448 ; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
449 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
450 ; CHECK-NEXT: bit v2.16b, v4.16b, v5.16b
451 ; CHECK-NEXT: bit v0.16b, v1.16b, v6.16b
452 ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
455 %c = uitofp <8 x i64> %a to <8 x bfloat>
459 define <16 x bfloat> @stofp_v16i64_v16bf16(<16 x i64> %a) {
460 ; CHECK-LABEL: stofp_v16i64_v16bf16:
461 ; CHECK: // %bb.0: // %entry
462 ; CHECK-NEXT: scvtf v0.2d, v0.2d
463 ; CHECK-NEXT: scvtf v2.2d, v2.2d
464 ; CHECK-NEXT: scvtf v6.2d, v6.2d
465 ; CHECK-NEXT: scvtf v4.2d, v4.2d
466 ; CHECK-NEXT: scvtf v1.2d, v1.2d
467 ; CHECK-NEXT: scvtf v3.2d, v3.2d
468 ; CHECK-NEXT: scvtf v7.2d, v7.2d
469 ; CHECK-NEXT: scvtf v5.2d, v5.2d
470 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
471 ; CHECK-NEXT: fcvtn v2.2s, v2.2d
472 ; CHECK-NEXT: fcvtn v6.2s, v6.2d
473 ; CHECK-NEXT: fcvtn v4.2s, v4.2d
474 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
475 ; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
476 ; CHECK-NEXT: fcvtn2 v6.4s, v7.2d
477 ; CHECK-NEXT: fcvtn2 v4.4s, v5.2d
478 ; CHECK-NEXT: movi v1.4s, #1
479 ; CHECK-NEXT: movi v3.4s, #127, msl #8
480 ; CHECK-NEXT: ushr v7.4s, v0.4s, #16
481 ; CHECK-NEXT: ushr v5.4s, v2.4s, #16
482 ; CHECK-NEXT: ushr v16.4s, v6.4s, #16
483 ; CHECK-NEXT: ushr v17.4s, v4.4s, #16
484 ; CHECK-NEXT: add v19.4s, v0.4s, v3.4s
485 ; CHECK-NEXT: add v18.4s, v2.4s, v3.4s
486 ; CHECK-NEXT: add v20.4s, v6.4s, v3.4s
487 ; CHECK-NEXT: add v3.4s, v4.4s, v3.4s
488 ; CHECK-NEXT: and v7.16b, v7.16b, v1.16b
489 ; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
490 ; CHECK-NEXT: and v16.16b, v16.16b, v1.16b
491 ; CHECK-NEXT: and v1.16b, v17.16b, v1.16b
492 ; CHECK-NEXT: fcmeq v17.4s, v2.4s, v2.4s
493 ; CHECK-NEXT: orr v2.4s, #64, lsl #16
494 ; CHECK-NEXT: add v7.4s, v7.4s, v19.4s
495 ; CHECK-NEXT: fcmeq v19.4s, v6.4s, v6.4s
496 ; CHECK-NEXT: add v5.4s, v5.4s, v18.4s
497 ; CHECK-NEXT: fcmeq v18.4s, v0.4s, v0.4s
498 ; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
499 ; CHECK-NEXT: fcmeq v3.4s, v4.4s, v4.4s
500 ; CHECK-NEXT: add v16.4s, v16.4s, v20.4s
501 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
502 ; CHECK-NEXT: orr v6.4s, #64, lsl #16
503 ; CHECK-NEXT: orr v4.4s, #64, lsl #16
504 ; CHECK-NEXT: bit v2.16b, v5.16b, v17.16b
505 ; CHECK-NEXT: mov v5.16b, v19.16b
506 ; CHECK-NEXT: bit v0.16b, v7.16b, v18.16b
507 ; CHECK-NEXT: bif v1.16b, v4.16b, v3.16b
508 ; CHECK-NEXT: bsl v5.16b, v16.16b, v6.16b
509 ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
510 ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v5.8h
513 %c = sitofp <16 x i64> %a to <16 x bfloat>
517 define <16 x bfloat> @utofp_v16i64_v16bf16(<16 x i64> %a) {
518 ; CHECK-LABEL: utofp_v16i64_v16bf16:
519 ; CHECK: // %bb.0: // %entry
520 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
521 ; CHECK-NEXT: ucvtf v2.2d, v2.2d
522 ; CHECK-NEXT: ucvtf v6.2d, v6.2d
523 ; CHECK-NEXT: ucvtf v4.2d, v4.2d
524 ; CHECK-NEXT: ucvtf v1.2d, v1.2d
525 ; CHECK-NEXT: ucvtf v3.2d, v3.2d
526 ; CHECK-NEXT: ucvtf v7.2d, v7.2d
527 ; CHECK-NEXT: ucvtf v5.2d, v5.2d
528 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
529 ; CHECK-NEXT: fcvtn v2.2s, v2.2d
530 ; CHECK-NEXT: fcvtn v6.2s, v6.2d
531 ; CHECK-NEXT: fcvtn v4.2s, v4.2d
532 ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
533 ; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
534 ; CHECK-NEXT: fcvtn2 v6.4s, v7.2d
535 ; CHECK-NEXT: fcvtn2 v4.4s, v5.2d
536 ; CHECK-NEXT: movi v1.4s, #1
537 ; CHECK-NEXT: movi v3.4s, #127, msl #8
538 ; CHECK-NEXT: ushr v7.4s, v0.4s, #16
539 ; CHECK-NEXT: ushr v5.4s, v2.4s, #16
540 ; CHECK-NEXT: ushr v16.4s, v6.4s, #16
541 ; CHECK-NEXT: ushr v17.4s, v4.4s, #16
542 ; CHECK-NEXT: add v19.4s, v0.4s, v3.4s
543 ; CHECK-NEXT: add v18.4s, v2.4s, v3.4s
544 ; CHECK-NEXT: add v20.4s, v6.4s, v3.4s
545 ; CHECK-NEXT: add v3.4s, v4.4s, v3.4s
546 ; CHECK-NEXT: and v7.16b, v7.16b, v1.16b
547 ; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
548 ; CHECK-NEXT: and v16.16b, v16.16b, v1.16b
549 ; CHECK-NEXT: and v1.16b, v17.16b, v1.16b
550 ; CHECK-NEXT: fcmeq v17.4s, v2.4s, v2.4s
551 ; CHECK-NEXT: orr v2.4s, #64, lsl #16
552 ; CHECK-NEXT: add v7.4s, v7.4s, v19.4s
553 ; CHECK-NEXT: fcmeq v19.4s, v6.4s, v6.4s
554 ; CHECK-NEXT: add v5.4s, v5.4s, v18.4s
555 ; CHECK-NEXT: fcmeq v18.4s, v0.4s, v0.4s
556 ; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
557 ; CHECK-NEXT: fcmeq v3.4s, v4.4s, v4.4s
558 ; CHECK-NEXT: add v16.4s, v16.4s, v20.4s
559 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
560 ; CHECK-NEXT: orr v6.4s, #64, lsl #16
561 ; CHECK-NEXT: orr v4.4s, #64, lsl #16
562 ; CHECK-NEXT: bit v2.16b, v5.16b, v17.16b
563 ; CHECK-NEXT: mov v5.16b, v19.16b
564 ; CHECK-NEXT: bit v0.16b, v7.16b, v18.16b
565 ; CHECK-NEXT: bif v1.16b, v4.16b, v3.16b
566 ; CHECK-NEXT: bsl v5.16b, v16.16b, v6.16b
567 ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
568 ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v5.8h
571 %c = uitofp <16 x i64> %a to <16 x bfloat>
575 define <32 x bfloat> @stofp_v32i64_v32bf16(<32 x i64> %a) {
576 ; CHECK-LABEL: stofp_v32i64_v32bf16:
577 ; CHECK: // %bb.0: // %entry
578 ; CHECK-NEXT: scvtf v17.2d, v2.2d
579 ; CHECK-NEXT: scvtf v18.2d, v0.2d
580 ; CHECK-NEXT: scvtf v19.2d, v3.2d
581 ; CHECK-NEXT: scvtf v3.2d, v6.2d
582 ; CHECK-NEXT: ldp q21, q20, [sp, #32]
583 ; CHECK-NEXT: scvtf v4.2d, v4.2d
584 ; CHECK-NEXT: scvtf v6.2d, v7.2d
585 ; CHECK-NEXT: scvtf v5.2d, v5.2d
586 ; CHECK-NEXT: ldp q24, q23, [sp, #64]
587 ; CHECK-NEXT: movi v16.4s, #1
588 ; CHECK-NEXT: fcvtn v0.2s, v17.2d
589 ; CHECK-NEXT: scvtf v17.2d, v1.2d
590 ; CHECK-NEXT: fcvtn v1.2s, v18.2d
591 ; CHECK-NEXT: fcvtn v3.2s, v3.2d
592 ; CHECK-NEXT: ldp q18, q7, [sp]
593 ; CHECK-NEXT: scvtf v21.2d, v21.2d
594 ; CHECK-NEXT: fcvtn v4.2s, v4.2d
595 ; CHECK-NEXT: movi v2.4s, #127, msl #8
596 ; CHECK-NEXT: scvtf v20.2d, v20.2d
597 ; CHECK-NEXT: fcvtn2 v0.4s, v19.2d
598 ; CHECK-NEXT: ldp q22, q19, [sp, #96]
599 ; CHECK-NEXT: fcvtn2 v1.4s, v17.2d
600 ; CHECK-NEXT: fcvtn2 v3.4s, v6.2d
601 ; CHECK-NEXT: scvtf v18.2d, v18.2d
602 ; CHECK-NEXT: scvtf v17.2d, v24.2d
603 ; CHECK-NEXT: fcvtn v6.2s, v21.2d
604 ; CHECK-NEXT: fcvtn2 v4.4s, v5.2d
605 ; CHECK-NEXT: scvtf v22.2d, v22.2d
606 ; CHECK-NEXT: scvtf v21.2d, v23.2d
607 ; CHECK-NEXT: scvtf v7.2d, v7.2d
608 ; CHECK-NEXT: ushr v24.4s, v0.4s, #16
609 ; CHECK-NEXT: add v5.4s, v0.4s, v2.4s
610 ; CHECK-NEXT: scvtf v19.2d, v19.2d
611 ; CHECK-NEXT: ushr v23.4s, v1.4s, #16
612 ; CHECK-NEXT: ushr v25.4s, v3.4s, #16
613 ; CHECK-NEXT: fcvtn v18.2s, v18.2d
614 ; CHECK-NEXT: fcvtn2 v6.4s, v20.2d
615 ; CHECK-NEXT: add v26.4s, v1.4s, v2.4s
616 ; CHECK-NEXT: fcvtn v17.2s, v17.2d
617 ; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
618 ; CHECK-NEXT: fcvtn v22.2s, v22.2d
619 ; CHECK-NEXT: fcmeq v20.4s, v0.4s, v0.4s
620 ; CHECK-NEXT: and v23.16b, v23.16b, v16.16b
621 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
622 ; CHECK-NEXT: fcmeq v27.4s, v3.4s, v3.4s
623 ; CHECK-NEXT: fcvtn2 v18.4s, v7.2d
624 ; CHECK-NEXT: add v7.4s, v3.4s, v2.4s
625 ; CHECK-NEXT: orr v3.4s, #64, lsl #16
626 ; CHECK-NEXT: add v5.4s, v24.4s, v5.4s
627 ; CHECK-NEXT: and v24.16b, v25.16b, v16.16b
628 ; CHECK-NEXT: ushr v25.4s, v4.4s, #16
629 ; CHECK-NEXT: fcvtn2 v22.4s, v19.2d
630 ; CHECK-NEXT: add v19.4s, v23.4s, v26.4s
631 ; CHECK-NEXT: ushr v26.4s, v6.4s, #16
632 ; CHECK-NEXT: fcvtn2 v17.4s, v21.2d
633 ; CHECK-NEXT: fcmeq v21.4s, v1.4s, v1.4s
634 ; CHECK-NEXT: orr v1.4s, #64, lsl #16
635 ; CHECK-NEXT: and v23.16b, v25.16b, v16.16b
636 ; CHECK-NEXT: add v25.4s, v4.4s, v2.4s
637 ; CHECK-NEXT: add v7.4s, v24.4s, v7.4s
638 ; CHECK-NEXT: ushr v24.4s, v18.4s, #16
639 ; CHECK-NEXT: add v30.4s, v18.4s, v2.4s
640 ; CHECK-NEXT: bit v0.16b, v5.16b, v20.16b
641 ; CHECK-NEXT: ushr v28.4s, v22.4s, #16
642 ; CHECK-NEXT: add v31.4s, v22.4s, v2.4s
643 ; CHECK-NEXT: add v23.4s, v23.4s, v25.4s
644 ; CHECK-NEXT: and v25.16b, v26.16b, v16.16b
645 ; CHECK-NEXT: add v26.4s, v6.4s, v2.4s
646 ; CHECK-NEXT: ushr v29.4s, v17.4s, #16
647 ; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
648 ; CHECK-NEXT: add v2.4s, v17.4s, v2.4s
649 ; CHECK-NEXT: and v28.16b, v28.16b, v16.16b
650 ; CHECK-NEXT: bit v3.16b, v7.16b, v27.16b
651 ; CHECK-NEXT: bit v1.16b, v19.16b, v21.16b
652 ; CHECK-NEXT: add v25.4s, v25.4s, v26.4s
653 ; CHECK-NEXT: fcmeq v26.4s, v6.4s, v6.4s
654 ; CHECK-NEXT: orr v6.4s, #64, lsl #16
655 ; CHECK-NEXT: and v16.16b, v29.16b, v16.16b
656 ; CHECK-NEXT: add v24.4s, v24.4s, v30.4s
657 ; CHECK-NEXT: fcmeq v30.4s, v18.4s, v18.4s
658 ; CHECK-NEXT: add v28.4s, v28.4s, v31.4s
659 ; CHECK-NEXT: fcmeq v31.4s, v22.4s, v22.4s
660 ; CHECK-NEXT: fcmeq v29.4s, v4.4s, v4.4s
661 ; CHECK-NEXT: orr v4.4s, #64, lsl #16
662 ; CHECK-NEXT: orr v18.4s, #64, lsl #16
663 ; CHECK-NEXT: orr v22.4s, #64, lsl #16
664 ; CHECK-NEXT: mov v5.16b, v26.16b
665 ; CHECK-NEXT: add v2.4s, v16.4s, v2.4s
666 ; CHECK-NEXT: fcmeq v16.4s, v17.4s, v17.4s
667 ; CHECK-NEXT: orr v17.4s, #64, lsl #16
668 ; CHECK-NEXT: uzp2 v0.8h, v1.8h, v0.8h
669 ; CHECK-NEXT: mov v7.16b, v31.16b
670 ; CHECK-NEXT: bit v4.16b, v23.16b, v29.16b
671 ; CHECK-NEXT: bsl v5.16b, v25.16b, v6.16b
672 ; CHECK-NEXT: mov v6.16b, v30.16b
673 ; CHECK-NEXT: bsl v16.16b, v2.16b, v17.16b
674 ; CHECK-NEXT: bsl v7.16b, v28.16b, v22.16b
675 ; CHECK-NEXT: bsl v6.16b, v24.16b, v18.16b
676 ; CHECK-NEXT: uzp2 v1.8h, v4.8h, v3.8h
677 ; CHECK-NEXT: uzp2 v3.8h, v16.8h, v7.8h
678 ; CHECK-NEXT: uzp2 v2.8h, v6.8h, v5.8h
681 %c = sitofp <32 x i64> %a to <32 x bfloat>
685 define <32 x bfloat> @utofp_v32i64_v32bf16(<32 x i64> %a) {
686 ; CHECK-LABEL: utofp_v32i64_v32bf16:
687 ; CHECK: // %bb.0: // %entry
688 ; CHECK-NEXT: ucvtf v17.2d, v2.2d
689 ; CHECK-NEXT: ucvtf v18.2d, v0.2d
690 ; CHECK-NEXT: ucvtf v19.2d, v3.2d
691 ; CHECK-NEXT: ucvtf v3.2d, v6.2d
692 ; CHECK-NEXT: ldp q21, q20, [sp, #32]
693 ; CHECK-NEXT: ucvtf v4.2d, v4.2d
694 ; CHECK-NEXT: ucvtf v6.2d, v7.2d
695 ; CHECK-NEXT: ucvtf v5.2d, v5.2d
696 ; CHECK-NEXT: ldp q24, q23, [sp, #64]
697 ; CHECK-NEXT: movi v16.4s, #1
698 ; CHECK-NEXT: fcvtn v0.2s, v17.2d
699 ; CHECK-NEXT: ucvtf v17.2d, v1.2d
700 ; CHECK-NEXT: fcvtn v1.2s, v18.2d
701 ; CHECK-NEXT: fcvtn v3.2s, v3.2d
702 ; CHECK-NEXT: ldp q18, q7, [sp]
703 ; CHECK-NEXT: ucvtf v21.2d, v21.2d
704 ; CHECK-NEXT: fcvtn v4.2s, v4.2d
705 ; CHECK-NEXT: movi v2.4s, #127, msl #8
706 ; CHECK-NEXT: ucvtf v20.2d, v20.2d
707 ; CHECK-NEXT: fcvtn2 v0.4s, v19.2d
708 ; CHECK-NEXT: ldp q22, q19, [sp, #96]
709 ; CHECK-NEXT: fcvtn2 v1.4s, v17.2d
710 ; CHECK-NEXT: fcvtn2 v3.4s, v6.2d
711 ; CHECK-NEXT: ucvtf v18.2d, v18.2d
712 ; CHECK-NEXT: ucvtf v17.2d, v24.2d
713 ; CHECK-NEXT: fcvtn v6.2s, v21.2d
714 ; CHECK-NEXT: fcvtn2 v4.4s, v5.2d
715 ; CHECK-NEXT: ucvtf v22.2d, v22.2d
716 ; CHECK-NEXT: ucvtf v21.2d, v23.2d
717 ; CHECK-NEXT: ucvtf v7.2d, v7.2d
718 ; CHECK-NEXT: ushr v24.4s, v0.4s, #16
719 ; CHECK-NEXT: add v5.4s, v0.4s, v2.4s
720 ; CHECK-NEXT: ucvtf v19.2d, v19.2d
721 ; CHECK-NEXT: ushr v23.4s, v1.4s, #16
722 ; CHECK-NEXT: ushr v25.4s, v3.4s, #16
723 ; CHECK-NEXT: fcvtn v18.2s, v18.2d
724 ; CHECK-NEXT: fcvtn2 v6.4s, v20.2d
725 ; CHECK-NEXT: add v26.4s, v1.4s, v2.4s
726 ; CHECK-NEXT: fcvtn v17.2s, v17.2d
727 ; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
728 ; CHECK-NEXT: fcvtn v22.2s, v22.2d
729 ; CHECK-NEXT: fcmeq v20.4s, v0.4s, v0.4s
730 ; CHECK-NEXT: and v23.16b, v23.16b, v16.16b
731 ; CHECK-NEXT: orr v0.4s, #64, lsl #16
732 ; CHECK-NEXT: fcmeq v27.4s, v3.4s, v3.4s
733 ; CHECK-NEXT: fcvtn2 v18.4s, v7.2d
734 ; CHECK-NEXT: add v7.4s, v3.4s, v2.4s
735 ; CHECK-NEXT: orr v3.4s, #64, lsl #16
736 ; CHECK-NEXT: add v5.4s, v24.4s, v5.4s
737 ; CHECK-NEXT: and v24.16b, v25.16b, v16.16b
738 ; CHECK-NEXT: ushr v25.4s, v4.4s, #16
739 ; CHECK-NEXT: fcvtn2 v22.4s, v19.2d
740 ; CHECK-NEXT: add v19.4s, v23.4s, v26.4s
741 ; CHECK-NEXT: ushr v26.4s, v6.4s, #16
742 ; CHECK-NEXT: fcvtn2 v17.4s, v21.2d
743 ; CHECK-NEXT: fcmeq v21.4s, v1.4s, v1.4s
744 ; CHECK-NEXT: orr v1.4s, #64, lsl #16
745 ; CHECK-NEXT: and v23.16b, v25.16b, v16.16b
746 ; CHECK-NEXT: add v25.4s, v4.4s, v2.4s
747 ; CHECK-NEXT: add v7.4s, v24.4s, v7.4s
748 ; CHECK-NEXT: ushr v24.4s, v18.4s, #16
749 ; CHECK-NEXT: add v30.4s, v18.4s, v2.4s
750 ; CHECK-NEXT: bit v0.16b, v5.16b, v20.16b
751 ; CHECK-NEXT: ushr v28.4s, v22.4s, #16
752 ; CHECK-NEXT: add v31.4s, v22.4s, v2.4s
753 ; CHECK-NEXT: add v23.4s, v23.4s, v25.4s
754 ; CHECK-NEXT: and v25.16b, v26.16b, v16.16b
755 ; CHECK-NEXT: add v26.4s, v6.4s, v2.4s
756 ; CHECK-NEXT: ushr v29.4s, v17.4s, #16
757 ; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
758 ; CHECK-NEXT: add v2.4s, v17.4s, v2.4s
759 ; CHECK-NEXT: and v28.16b, v28.16b, v16.16b
760 ; CHECK-NEXT: bit v3.16b, v7.16b, v27.16b
761 ; CHECK-NEXT: bit v1.16b, v19.16b, v21.16b
762 ; CHECK-NEXT: add v25.4s, v25.4s, v26.4s
763 ; CHECK-NEXT: fcmeq v26.4s, v6.4s, v6.4s
764 ; CHECK-NEXT: orr v6.4s, #64, lsl #16
765 ; CHECK-NEXT: and v16.16b, v29.16b, v16.16b
766 ; CHECK-NEXT: add v24.4s, v24.4s, v30.4s
767 ; CHECK-NEXT: fcmeq v30.4s, v18.4s, v18.4s
768 ; CHECK-NEXT: add v28.4s, v28.4s, v31.4s
769 ; CHECK-NEXT: fcmeq v31.4s, v22.4s, v22.4s
770 ; CHECK-NEXT: fcmeq v29.4s, v4.4s, v4.4s
771 ; CHECK-NEXT: orr v4.4s, #64, lsl #16
772 ; CHECK-NEXT: orr v18.4s, #64, lsl #16
773 ; CHECK-NEXT: orr v22.4s, #64, lsl #16
774 ; CHECK-NEXT: mov v5.16b, v26.16b
775 ; CHECK-NEXT: add v2.4s, v16.4s, v2.4s
776 ; CHECK-NEXT: fcmeq v16.4s, v17.4s, v17.4s
777 ; CHECK-NEXT: orr v17.4s, #64, lsl #16
778 ; CHECK-NEXT: uzp2 v0.8h, v1.8h, v0.8h
779 ; CHECK-NEXT: mov v7.16b, v31.16b
780 ; CHECK-NEXT: bit v4.16b, v23.16b, v29.16b
781 ; CHECK-NEXT: bsl v5.16b, v25.16b, v6.16b
782 ; CHECK-NEXT: mov v6.16b, v30.16b
783 ; CHECK-NEXT: bsl v16.16b, v2.16b, v17.16b
784 ; CHECK-NEXT: bsl v7.16b, v28.16b, v22.16b
785 ; CHECK-NEXT: bsl v6.16b, v24.16b, v18.16b
786 ; CHECK-NEXT: uzp2 v1.8h, v4.8h, v3.8h
787 ; CHECK-NEXT: uzp2 v3.8h, v16.8h, v7.8h
788 ; CHECK-NEXT: uzp2 v2.8h, v6.8h, v5.8h
791 %c = uitofp <32 x i64> %a to <32 x bfloat>
795 define <2 x bfloat> @stofp_v2i32_v2bf16(<2 x i32> %a) {
796 ; CHECK-LABEL: stofp_v2i32_v2bf16:
797 ; CHECK: // %bb.0: // %entry
798 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
799 ; CHECK-NEXT: movi v1.4s, #1
800 ; CHECK-NEXT: scvtf v0.4s, v0.4s
801 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
802 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
803 ; CHECK-NEXT: movi v2.4s, #127, msl #8
804 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
805 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
808 %c = sitofp <2 x i32> %a to <2 x bfloat>
812 define <2 x bfloat> @utofp_v2i32_v2bf16(<2 x i32> %a) {
813 ; CHECK-LABEL: utofp_v2i32_v2bf16:
814 ; CHECK: // %bb.0: // %entry
815 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
816 ; CHECK-NEXT: movi v1.4s, #1
817 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
818 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
819 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
820 ; CHECK-NEXT: movi v2.4s, #127, msl #8
821 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
822 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
825 %c = uitofp <2 x i32> %a to <2 x bfloat>
829 define <3 x bfloat> @stofp_v3i32_v3bf16(<3 x i32> %a) {
830 ; CHECK-LABEL: stofp_v3i32_v3bf16:
831 ; CHECK: // %bb.0: // %entry
832 ; CHECK-NEXT: scvtf v0.4s, v0.4s
833 ; CHECK-NEXT: movi v1.4s, #1
834 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
835 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
836 ; CHECK-NEXT: movi v2.4s, #127, msl #8
837 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
838 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
841 %c = sitofp <3 x i32> %a to <3 x bfloat>
845 define <3 x bfloat> @utofp_v3i32_v3bf16(<3 x i32> %a) {
846 ; CHECK-LABEL: utofp_v3i32_v3bf16:
847 ; CHECK: // %bb.0: // %entry
848 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
849 ; CHECK-NEXT: movi v1.4s, #1
850 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
851 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
852 ; CHECK-NEXT: movi v2.4s, #127, msl #8
853 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
854 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
857 %c = uitofp <3 x i32> %a to <3 x bfloat>
861 define <4 x bfloat> @stofp_v4i32_v4bf16(<4 x i32> %a) {
862 ; CHECK-LABEL: stofp_v4i32_v4bf16:
863 ; CHECK: // %bb.0: // %entry
864 ; CHECK-NEXT: scvtf v0.4s, v0.4s
865 ; CHECK-NEXT: movi v1.4s, #1
866 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
867 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
868 ; CHECK-NEXT: movi v2.4s, #127, msl #8
869 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
870 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
873 %c = sitofp <4 x i32> %a to <4 x bfloat>
877 define <4 x bfloat> @utofp_v4i32_v4bf16(<4 x i32> %a) {
878 ; CHECK-LABEL: utofp_v4i32_v4bf16:
879 ; CHECK: // %bb.0: // %entry
880 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
881 ; CHECK-NEXT: movi v1.4s, #1
882 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
883 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
884 ; CHECK-NEXT: movi v2.4s, #127, msl #8
885 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
886 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
889 %c = uitofp <4 x i32> %a to <4 x bfloat>
893 define <8 x bfloat> @stofp_v8i32_v8bf16(<8 x i32> %a) {
894 ; CHECK-LABEL: stofp_v8i32_v8bf16:
895 ; CHECK: // %bb.0: // %entry
896 ; CHECK-NEXT: scvtf v0.4s, v0.4s
897 ; CHECK-NEXT: movi v2.4s, #1
898 ; CHECK-NEXT: scvtf v1.4s, v1.4s
899 ; CHECK-NEXT: movi v5.4s, #127, msl #8
900 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
901 ; CHECK-NEXT: ushr v4.4s, v1.4s, #16
902 ; CHECK-NEXT: and v3.16b, v3.16b, v2.16b
903 ; CHECK-NEXT: and v2.16b, v4.16b, v2.16b
904 ; CHECK-NEXT: add v0.4s, v3.4s, v0.4s
905 ; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
906 ; CHECK-NEXT: addhn v0.4h, v0.4s, v5.4s
907 ; CHECK-NEXT: addhn2 v0.8h, v1.4s, v5.4s
910 %c = sitofp <8 x i32> %a to <8 x bfloat>
914 define <8 x bfloat> @utofp_v8i32_v8bf16(<8 x i32> %a) {
915 ; CHECK-LABEL: utofp_v8i32_v8bf16:
916 ; CHECK: // %bb.0: // %entry
917 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
918 ; CHECK-NEXT: movi v2.4s, #1
919 ; CHECK-NEXT: ucvtf v1.4s, v1.4s
920 ; CHECK-NEXT: movi v5.4s, #127, msl #8
921 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
922 ; CHECK-NEXT: ushr v4.4s, v1.4s, #16
923 ; CHECK-NEXT: and v3.16b, v3.16b, v2.16b
924 ; CHECK-NEXT: and v2.16b, v4.16b, v2.16b
925 ; CHECK-NEXT: add v0.4s, v3.4s, v0.4s
926 ; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
927 ; CHECK-NEXT: addhn v0.4h, v0.4s, v5.4s
928 ; CHECK-NEXT: addhn2 v0.8h, v1.4s, v5.4s
931 %c = uitofp <8 x i32> %a to <8 x bfloat>
935 define <16 x bfloat> @stofp_v16i32_v16bf16(<16 x i32> %a) {
936 ; CHECK-LABEL: stofp_v16i32_v16bf16:
937 ; CHECK: // %bb.0: // %entry
938 ; CHECK-NEXT: scvtf v2.4s, v2.4s
939 ; CHECK-NEXT: scvtf v0.4s, v0.4s
940 ; CHECK-NEXT: scvtf v4.4s, v1.4s
941 ; CHECK-NEXT: movi v1.4s, #1
942 ; CHECK-NEXT: scvtf v3.4s, v3.4s
943 ; CHECK-NEXT: movi v17.4s, #127, msl #8
944 ; CHECK-NEXT: ushr v5.4s, v0.4s, #16
945 ; CHECK-NEXT: ushr v6.4s, v2.4s, #16
946 ; CHECK-NEXT: ushr v7.4s, v4.4s, #16
947 ; CHECK-NEXT: ushr v16.4s, v3.4s, #16
948 ; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
949 ; CHECK-NEXT: and v6.16b, v6.16b, v1.16b
950 ; CHECK-NEXT: add v0.4s, v5.4s, v0.4s
951 ; CHECK-NEXT: add v2.4s, v6.4s, v2.4s
952 ; CHECK-NEXT: and v5.16b, v7.16b, v1.16b
953 ; CHECK-NEXT: and v6.16b, v16.16b, v1.16b
954 ; CHECK-NEXT: addhn v0.4h, v0.4s, v17.4s
955 ; CHECK-NEXT: addhn v1.4h, v2.4s, v17.4s
956 ; CHECK-NEXT: add v2.4s, v5.4s, v4.4s
957 ; CHECK-NEXT: add v3.4s, v6.4s, v3.4s
958 ; CHECK-NEXT: addhn2 v0.8h, v2.4s, v17.4s
959 ; CHECK-NEXT: addhn2 v1.8h, v3.4s, v17.4s
962 %c = sitofp <16 x i32> %a to <16 x bfloat>
966 define <16 x bfloat> @utofp_v16i32_v16bf16(<16 x i32> %a) {
967 ; CHECK-LABEL: utofp_v16i32_v16bf16:
968 ; CHECK: // %bb.0: // %entry
969 ; CHECK-NEXT: ucvtf v2.4s, v2.4s
970 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
971 ; CHECK-NEXT: ucvtf v4.4s, v1.4s
972 ; CHECK-NEXT: movi v1.4s, #1
973 ; CHECK-NEXT: ucvtf v3.4s, v3.4s
974 ; CHECK-NEXT: movi v17.4s, #127, msl #8
975 ; CHECK-NEXT: ushr v5.4s, v0.4s, #16
976 ; CHECK-NEXT: ushr v6.4s, v2.4s, #16
977 ; CHECK-NEXT: ushr v7.4s, v4.4s, #16
978 ; CHECK-NEXT: ushr v16.4s, v3.4s, #16
979 ; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
980 ; CHECK-NEXT: and v6.16b, v6.16b, v1.16b
981 ; CHECK-NEXT: add v0.4s, v5.4s, v0.4s
982 ; CHECK-NEXT: add v2.4s, v6.4s, v2.4s
983 ; CHECK-NEXT: and v5.16b, v7.16b, v1.16b
984 ; CHECK-NEXT: and v6.16b, v16.16b, v1.16b
985 ; CHECK-NEXT: addhn v0.4h, v0.4s, v17.4s
986 ; CHECK-NEXT: addhn v1.4h, v2.4s, v17.4s
987 ; CHECK-NEXT: add v2.4s, v5.4s, v4.4s
988 ; CHECK-NEXT: add v3.4s, v6.4s, v3.4s
989 ; CHECK-NEXT: addhn2 v0.8h, v2.4s, v17.4s
990 ; CHECK-NEXT: addhn2 v1.8h, v3.4s, v17.4s
993 %c = uitofp <16 x i32> %a to <16 x bfloat>
997 define <32 x bfloat> @stofp_v32i32_v32bf16(<32 x i32> %a) {
998 ; CHECK-LABEL: stofp_v32i32_v32bf16:
999 ; CHECK: // %bb.0: // %entry
1000 ; CHECK-NEXT: scvtf v0.4s, v0.4s
1001 ; CHECK-NEXT: scvtf v2.4s, v2.4s
1002 ; CHECK-NEXT: scvtf v4.4s, v4.4s
1003 ; CHECK-NEXT: scvtf v6.4s, v6.4s
1004 ; CHECK-NEXT: movi v16.4s, #1
1005 ; CHECK-NEXT: scvtf v1.4s, v1.4s
1006 ; CHECK-NEXT: scvtf v17.4s, v3.4s
1007 ; CHECK-NEXT: scvtf v5.4s, v5.4s
1008 ; CHECK-NEXT: scvtf v7.4s, v7.4s
1009 ; CHECK-NEXT: movi v21.4s, #127, msl #8
1010 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
1011 ; CHECK-NEXT: ushr v18.4s, v2.4s, #16
1012 ; CHECK-NEXT: ushr v19.4s, v4.4s, #16
1013 ; CHECK-NEXT: ushr v20.4s, v6.4s, #16
1014 ; CHECK-NEXT: ushr v22.4s, v1.4s, #16
1015 ; CHECK-NEXT: ushr v23.4s, v17.4s, #16
1016 ; CHECK-NEXT: ushr v24.4s, v5.4s, #16
1017 ; CHECK-NEXT: ushr v25.4s, v7.4s, #16
1018 ; CHECK-NEXT: and v3.16b, v3.16b, v16.16b
1019 ; CHECK-NEXT: and v18.16b, v18.16b, v16.16b
1020 ; CHECK-NEXT: and v19.16b, v19.16b, v16.16b
1021 ; CHECK-NEXT: and v20.16b, v20.16b, v16.16b
1022 ; CHECK-NEXT: add v0.4s, v3.4s, v0.4s
1023 ; CHECK-NEXT: and v3.16b, v22.16b, v16.16b
1024 ; CHECK-NEXT: add v2.4s, v18.4s, v2.4s
1025 ; CHECK-NEXT: add v4.4s, v19.4s, v4.4s
1026 ; CHECK-NEXT: add v6.4s, v20.4s, v6.4s
1027 ; CHECK-NEXT: and v18.16b, v23.16b, v16.16b
1028 ; CHECK-NEXT: and v19.16b, v24.16b, v16.16b
1029 ; CHECK-NEXT: and v16.16b, v25.16b, v16.16b
1030 ; CHECK-NEXT: add v20.4s, v3.4s, v1.4s
1031 ; CHECK-NEXT: addhn v0.4h, v0.4s, v21.4s
1032 ; CHECK-NEXT: addhn v1.4h, v2.4s, v21.4s
1033 ; CHECK-NEXT: addhn v2.4h, v4.4s, v21.4s
1034 ; CHECK-NEXT: addhn v3.4h, v6.4s, v21.4s
1035 ; CHECK-NEXT: add v4.4s, v18.4s, v17.4s
1036 ; CHECK-NEXT: add v5.4s, v19.4s, v5.4s
1037 ; CHECK-NEXT: add v6.4s, v16.4s, v7.4s
1038 ; CHECK-NEXT: addhn2 v0.8h, v20.4s, v21.4s
1039 ; CHECK-NEXT: addhn2 v1.8h, v4.4s, v21.4s
1040 ; CHECK-NEXT: addhn2 v2.8h, v5.4s, v21.4s
1041 ; CHECK-NEXT: addhn2 v3.8h, v6.4s, v21.4s
1044 %c = sitofp <32 x i32> %a to <32 x bfloat>
1045 ret <32 x bfloat> %c
1048 define <32 x bfloat> @utofp_v32i32_v32bf16(<32 x i32> %a) {
1049 ; CHECK-LABEL: utofp_v32i32_v32bf16:
1050 ; CHECK: // %bb.0: // %entry
1051 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
1052 ; CHECK-NEXT: ucvtf v2.4s, v2.4s
1053 ; CHECK-NEXT: ucvtf v4.4s, v4.4s
1054 ; CHECK-NEXT: ucvtf v6.4s, v6.4s
1055 ; CHECK-NEXT: movi v16.4s, #1
1056 ; CHECK-NEXT: ucvtf v1.4s, v1.4s
1057 ; CHECK-NEXT: ucvtf v17.4s, v3.4s
1058 ; CHECK-NEXT: ucvtf v5.4s, v5.4s
1059 ; CHECK-NEXT: ucvtf v7.4s, v7.4s
1060 ; CHECK-NEXT: movi v21.4s, #127, msl #8
1061 ; CHECK-NEXT: ushr v3.4s, v0.4s, #16
1062 ; CHECK-NEXT: ushr v18.4s, v2.4s, #16
1063 ; CHECK-NEXT: ushr v19.4s, v4.4s, #16
1064 ; CHECK-NEXT: ushr v20.4s, v6.4s, #16
1065 ; CHECK-NEXT: ushr v22.4s, v1.4s, #16
1066 ; CHECK-NEXT: ushr v23.4s, v17.4s, #16
1067 ; CHECK-NEXT: ushr v24.4s, v5.4s, #16
1068 ; CHECK-NEXT: ushr v25.4s, v7.4s, #16
1069 ; CHECK-NEXT: and v3.16b, v3.16b, v16.16b
1070 ; CHECK-NEXT: and v18.16b, v18.16b, v16.16b
1071 ; CHECK-NEXT: and v19.16b, v19.16b, v16.16b
1072 ; CHECK-NEXT: and v20.16b, v20.16b, v16.16b
1073 ; CHECK-NEXT: add v0.4s, v3.4s, v0.4s
1074 ; CHECK-NEXT: and v3.16b, v22.16b, v16.16b
1075 ; CHECK-NEXT: add v2.4s, v18.4s, v2.4s
1076 ; CHECK-NEXT: add v4.4s, v19.4s, v4.4s
1077 ; CHECK-NEXT: add v6.4s, v20.4s, v6.4s
1078 ; CHECK-NEXT: and v18.16b, v23.16b, v16.16b
1079 ; CHECK-NEXT: and v19.16b, v24.16b, v16.16b
1080 ; CHECK-NEXT: and v16.16b, v25.16b, v16.16b
1081 ; CHECK-NEXT: add v20.4s, v3.4s, v1.4s
1082 ; CHECK-NEXT: addhn v0.4h, v0.4s, v21.4s
1083 ; CHECK-NEXT: addhn v1.4h, v2.4s, v21.4s
1084 ; CHECK-NEXT: addhn v2.4h, v4.4s, v21.4s
1085 ; CHECK-NEXT: addhn v3.4h, v6.4s, v21.4s
1086 ; CHECK-NEXT: add v4.4s, v18.4s, v17.4s
1087 ; CHECK-NEXT: add v5.4s, v19.4s, v5.4s
1088 ; CHECK-NEXT: add v6.4s, v16.4s, v7.4s
1089 ; CHECK-NEXT: addhn2 v0.8h, v20.4s, v21.4s
1090 ; CHECK-NEXT: addhn2 v1.8h, v4.4s, v21.4s
1091 ; CHECK-NEXT: addhn2 v2.8h, v5.4s, v21.4s
1092 ; CHECK-NEXT: addhn2 v3.8h, v6.4s, v21.4s
1095 %c = uitofp <32 x i32> %a to <32 x bfloat>
1096 ret <32 x bfloat> %c
1099 define <2 x bfloat> @stofp_v2i16_v2bf16(<2 x i16> %a) {
1100 ; CHECK-LABEL: stofp_v2i16_v2bf16:
1101 ; CHECK: // %bb.0: // %entry
1102 ; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h
1103 ; CHECK-NEXT: movi v1.4s, #1
1104 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
1105 ; CHECK-NEXT: scvtf v0.4s, v0.4s
1106 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1107 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1108 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1109 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1110 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1113 %c = sitofp <2 x i16> %a to <2 x bfloat>
1117 define <2 x bfloat> @utofp_v2i16_v2bf16(<2 x i16> %a) {
1118 ; CHECK-LABEL: utofp_v2i16_v2bf16:
1119 ; CHECK: // %bb.0: // %entry
1120 ; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h
1121 ; CHECK-NEXT: movi v1.4s, #1
1122 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
1123 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
1124 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1125 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1126 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1127 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1128 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1131 %c = uitofp <2 x i16> %a to <2 x bfloat>
1135 define <3 x bfloat> @stofp_v3i16_v3bf16(<3 x i16> %a) {
1136 ; CHECK-LABEL: stofp_v3i16_v3bf16:
1137 ; CHECK: // %bb.0: // %entry
1138 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
1139 ; CHECK-NEXT: movi v1.4s, #1
1140 ; CHECK-NEXT: scvtf v0.4s, v0.4s
1141 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1142 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1143 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1144 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1145 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1148 %c = sitofp <3 x i16> %a to <3 x bfloat>
1152 define <3 x bfloat> @utofp_v3i16_v3bf16(<3 x i16> %a) {
1153 ; CHECK-LABEL: utofp_v3i16_v3bf16:
1154 ; CHECK: // %bb.0: // %entry
1155 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
1156 ; CHECK-NEXT: movi v1.4s, #1
1157 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
1158 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1159 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1160 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1161 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1162 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1165 %c = uitofp <3 x i16> %a to <3 x bfloat>
1169 define <4 x bfloat> @stofp_v4i16_v4bf16(<4 x i16> %a) {
1170 ; CHECK-LABEL: stofp_v4i16_v4bf16:
1171 ; CHECK: // %bb.0: // %entry
1172 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
1173 ; CHECK-NEXT: movi v1.4s, #1
1174 ; CHECK-NEXT: scvtf v0.4s, v0.4s
1175 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1176 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1177 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1178 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1179 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1182 %c = sitofp <4 x i16> %a to <4 x bfloat>
1186 define <4 x bfloat> @utofp_v4i16_v4bf16(<4 x i16> %a) {
1187 ; CHECK-LABEL: utofp_v4i16_v4bf16:
1188 ; CHECK: // %bb.0: // %entry
1189 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
1190 ; CHECK-NEXT: movi v1.4s, #1
1191 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
1192 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1193 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1194 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1195 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1196 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1199 %c = uitofp <4 x i16> %a to <4 x bfloat>
1203 define <8 x bfloat> @stofp_v8i16_v8bf16(<8 x i16> %a) {
1204 ; CHECK-LABEL: stofp_v8i16_v8bf16:
1205 ; CHECK: // %bb.0: // %entry
1206 ; CHECK-NEXT: sshll v2.4s, v0.4h, #0
1207 ; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
1208 ; CHECK-NEXT: movi v1.4s, #1
1209 ; CHECK-NEXT: movi v4.4s, #127, msl #8
1210 ; CHECK-NEXT: scvtf v2.4s, v2.4s
1211 ; CHECK-NEXT: scvtf v3.4s, v0.4s
1212 ; CHECK-NEXT: ushr v0.4s, v2.4s, #16
1213 ; CHECK-NEXT: ushr v5.4s, v3.4s, #16
1214 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
1215 ; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
1216 ; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
1217 ; CHECK-NEXT: add v1.4s, v1.4s, v4.4s
1218 ; CHECK-NEXT: addhn v0.4h, v2.4s, v0.4s
1219 ; CHECK-NEXT: addhn2 v0.8h, v3.4s, v1.4s
1222 %c = sitofp <8 x i16> %a to <8 x bfloat>
1226 define <8 x bfloat> @utofp_v8i16_v8bf16(<8 x i16> %a) {
1227 ; CHECK-LABEL: utofp_v8i16_v8bf16:
1228 ; CHECK: // %bb.0: // %entry
1229 ; CHECK-NEXT: ushll v2.4s, v0.4h, #0
1230 ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1231 ; CHECK-NEXT: movi v1.4s, #1
1232 ; CHECK-NEXT: movi v4.4s, #127, msl #8
1233 ; CHECK-NEXT: ucvtf v2.4s, v2.4s
1234 ; CHECK-NEXT: ucvtf v3.4s, v0.4s
1235 ; CHECK-NEXT: ushr v0.4s, v2.4s, #16
1236 ; CHECK-NEXT: ushr v5.4s, v3.4s, #16
1237 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
1238 ; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
1239 ; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
1240 ; CHECK-NEXT: add v1.4s, v1.4s, v4.4s
1241 ; CHECK-NEXT: addhn v0.4h, v2.4s, v0.4s
1242 ; CHECK-NEXT: addhn2 v0.8h, v3.4s, v1.4s
1245 %c = uitofp <8 x i16> %a to <8 x bfloat>
1249 define <16 x bfloat> @stofp_v16i16_v16bf16(<16 x i16> %a) {
1250 ; CHECK-LABEL: stofp_v16i16_v16bf16:
1251 ; CHECK: // %bb.0: // %entry
1252 ; CHECK-NEXT: sshll v3.4s, v0.4h, #0
1253 ; CHECK-NEXT: sshll v4.4s, v1.4h, #0
1254 ; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
1255 ; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
1256 ; CHECK-NEXT: movi v2.4s, #1
1257 ; CHECK-NEXT: movi v7.4s, #127, msl #8
1258 ; CHECK-NEXT: scvtf v3.4s, v3.4s
1259 ; CHECK-NEXT: scvtf v4.4s, v4.4s
1260 ; CHECK-NEXT: scvtf v5.4s, v0.4s
1261 ; CHECK-NEXT: scvtf v6.4s, v1.4s
1262 ; CHECK-NEXT: ushr v0.4s, v3.4s, #16
1263 ; CHECK-NEXT: ushr v1.4s, v4.4s, #16
1264 ; CHECK-NEXT: ushr v16.4s, v5.4s, #16
1265 ; CHECK-NEXT: ushr v17.4s, v6.4s, #16
1266 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
1267 ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
1268 ; CHECK-NEXT: and v16.16b, v16.16b, v2.16b
1269 ; CHECK-NEXT: and v2.16b, v17.16b, v2.16b
1270 ; CHECK-NEXT: add v0.4s, v0.4s, v7.4s
1271 ; CHECK-NEXT: add v1.4s, v1.4s, v7.4s
1272 ; CHECK-NEXT: add v2.4s, v2.4s, v7.4s
1273 ; CHECK-NEXT: addhn v0.4h, v3.4s, v0.4s
1274 ; CHECK-NEXT: addhn v1.4h, v4.4s, v1.4s
1275 ; CHECK-NEXT: add v3.4s, v16.4s, v7.4s
1276 ; CHECK-NEXT: addhn2 v0.8h, v5.4s, v3.4s
1277 ; CHECK-NEXT: addhn2 v1.8h, v6.4s, v2.4s
1280 %c = sitofp <16 x i16> %a to <16 x bfloat>
1281 ret <16 x bfloat> %c
1284 define <16 x bfloat> @utofp_v16i16_v16bf16(<16 x i16> %a) {
1285 ; CHECK-LABEL: utofp_v16i16_v16bf16:
1286 ; CHECK: // %bb.0: // %entry
1287 ; CHECK-NEXT: ushll v3.4s, v0.4h, #0
1288 ; CHECK-NEXT: ushll v4.4s, v1.4h, #0
1289 ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1290 ; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
1291 ; CHECK-NEXT: movi v2.4s, #1
1292 ; CHECK-NEXT: movi v7.4s, #127, msl #8
1293 ; CHECK-NEXT: ucvtf v3.4s, v3.4s
1294 ; CHECK-NEXT: ucvtf v4.4s, v4.4s
1295 ; CHECK-NEXT: ucvtf v5.4s, v0.4s
1296 ; CHECK-NEXT: ucvtf v6.4s, v1.4s
1297 ; CHECK-NEXT: ushr v0.4s, v3.4s, #16
1298 ; CHECK-NEXT: ushr v1.4s, v4.4s, #16
1299 ; CHECK-NEXT: ushr v16.4s, v5.4s, #16
1300 ; CHECK-NEXT: ushr v17.4s, v6.4s, #16
1301 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
1302 ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
1303 ; CHECK-NEXT: and v16.16b, v16.16b, v2.16b
1304 ; CHECK-NEXT: and v2.16b, v17.16b, v2.16b
1305 ; CHECK-NEXT: add v0.4s, v0.4s, v7.4s
1306 ; CHECK-NEXT: add v1.4s, v1.4s, v7.4s
1307 ; CHECK-NEXT: add v2.4s, v2.4s, v7.4s
1308 ; CHECK-NEXT: addhn v0.4h, v3.4s, v0.4s
1309 ; CHECK-NEXT: addhn v1.4h, v4.4s, v1.4s
1310 ; CHECK-NEXT: add v3.4s, v16.4s, v7.4s
1311 ; CHECK-NEXT: addhn2 v0.8h, v5.4s, v3.4s
1312 ; CHECK-NEXT: addhn2 v1.8h, v6.4s, v2.4s
1315 %c = uitofp <16 x i16> %a to <16 x bfloat>
1316 ret <16 x bfloat> %c
1319 define <32 x bfloat> @stofp_v32i16_v32bf16(<32 x i16> %a) {
1320 ; CHECK-LABEL: stofp_v32i16_v32bf16:
1321 ; CHECK: // %bb.0: // %entry
1322 ; CHECK-NEXT: sshll v4.4s, v1.4h, #0
1323 ; CHECK-NEXT: sshll v5.4s, v0.4h, #0
1324 ; CHECK-NEXT: sshll v6.4s, v2.4h, #0
1325 ; CHECK-NEXT: sshll v7.4s, v3.4h, #0
1326 ; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
1327 ; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
1328 ; CHECK-NEXT: sshll2 v2.4s, v2.8h, #0
1329 ; CHECK-NEXT: sshll2 v3.4s, v3.8h, #0
1330 ; CHECK-NEXT: movi v16.4s, #1
1331 ; CHECK-NEXT: scvtf v5.4s, v5.4s
1332 ; CHECK-NEXT: scvtf v4.4s, v4.4s
1333 ; CHECK-NEXT: scvtf v6.4s, v6.4s
1334 ; CHECK-NEXT: scvtf v7.4s, v7.4s
1335 ; CHECK-NEXT: scvtf v17.4s, v0.4s
1336 ; CHECK-NEXT: scvtf v18.4s, v1.4s
1337 ; CHECK-NEXT: scvtf v19.4s, v2.4s
1338 ; CHECK-NEXT: scvtf v20.4s, v3.4s
1339 ; CHECK-NEXT: movi v21.4s, #127, msl #8
1340 ; CHECK-NEXT: ushr v0.4s, v5.4s, #16
1341 ; CHECK-NEXT: ushr v1.4s, v4.4s, #16
1342 ; CHECK-NEXT: ushr v2.4s, v6.4s, #16
1343 ; CHECK-NEXT: ushr v3.4s, v7.4s, #16
1344 ; CHECK-NEXT: ushr v22.4s, v17.4s, #16
1345 ; CHECK-NEXT: ushr v23.4s, v18.4s, #16
1346 ; CHECK-NEXT: ushr v24.4s, v19.4s, #16
1347 ; CHECK-NEXT: ushr v25.4s, v20.4s, #16
1348 ; CHECK-NEXT: and v0.16b, v0.16b, v16.16b
1349 ; CHECK-NEXT: and v1.16b, v1.16b, v16.16b
1350 ; CHECK-NEXT: and v2.16b, v2.16b, v16.16b
1351 ; CHECK-NEXT: and v3.16b, v3.16b, v16.16b
1352 ; CHECK-NEXT: and v22.16b, v22.16b, v16.16b
1353 ; CHECK-NEXT: and v23.16b, v23.16b, v16.16b
1354 ; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
1355 ; CHECK-NEXT: and v16.16b, v25.16b, v16.16b
1356 ; CHECK-NEXT: add v0.4s, v0.4s, v21.4s
1357 ; CHECK-NEXT: add v1.4s, v1.4s, v21.4s
1358 ; CHECK-NEXT: add v2.4s, v2.4s, v21.4s
1359 ; CHECK-NEXT: add v3.4s, v3.4s, v21.4s
1360 ; CHECK-NEXT: addhn v0.4h, v5.4s, v0.4s
1361 ; CHECK-NEXT: addhn v1.4h, v4.4s, v1.4s
1362 ; CHECK-NEXT: addhn v2.4h, v6.4s, v2.4s
1363 ; CHECK-NEXT: addhn v3.4h, v7.4s, v3.4s
1364 ; CHECK-NEXT: add v4.4s, v22.4s, v21.4s
1365 ; CHECK-NEXT: add v5.4s, v23.4s, v21.4s
1366 ; CHECK-NEXT: add v6.4s, v24.4s, v21.4s
1367 ; CHECK-NEXT: add v7.4s, v16.4s, v21.4s
1368 ; CHECK-NEXT: addhn2 v0.8h, v17.4s, v4.4s
1369 ; CHECK-NEXT: addhn2 v1.8h, v18.4s, v5.4s
1370 ; CHECK-NEXT: addhn2 v2.8h, v19.4s, v6.4s
1371 ; CHECK-NEXT: addhn2 v3.8h, v20.4s, v7.4s
1374 %c = sitofp <32 x i16> %a to <32 x bfloat>
1375 ret <32 x bfloat> %c
1378 define <32 x bfloat> @utofp_v32i16_v32bf16(<32 x i16> %a) {
1379 ; CHECK-LABEL: utofp_v32i16_v32bf16:
1380 ; CHECK: // %bb.0: // %entry
1381 ; CHECK-NEXT: ushll v4.4s, v1.4h, #0
1382 ; CHECK-NEXT: ushll v5.4s, v0.4h, #0
1383 ; CHECK-NEXT: ushll v6.4s, v2.4h, #0
1384 ; CHECK-NEXT: ushll v7.4s, v3.4h, #0
1385 ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1386 ; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
1387 ; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0
1388 ; CHECK-NEXT: ushll2 v3.4s, v3.8h, #0
1389 ; CHECK-NEXT: movi v16.4s, #1
1390 ; CHECK-NEXT: ucvtf v5.4s, v5.4s
1391 ; CHECK-NEXT: ucvtf v4.4s, v4.4s
1392 ; CHECK-NEXT: ucvtf v6.4s, v6.4s
1393 ; CHECK-NEXT: ucvtf v7.4s, v7.4s
1394 ; CHECK-NEXT: ucvtf v17.4s, v0.4s
1395 ; CHECK-NEXT: ucvtf v18.4s, v1.4s
1396 ; CHECK-NEXT: ucvtf v19.4s, v2.4s
1397 ; CHECK-NEXT: ucvtf v20.4s, v3.4s
1398 ; CHECK-NEXT: movi v21.4s, #127, msl #8
1399 ; CHECK-NEXT: ushr v0.4s, v5.4s, #16
1400 ; CHECK-NEXT: ushr v1.4s, v4.4s, #16
1401 ; CHECK-NEXT: ushr v2.4s, v6.4s, #16
1402 ; CHECK-NEXT: ushr v3.4s, v7.4s, #16
1403 ; CHECK-NEXT: ushr v22.4s, v17.4s, #16
1404 ; CHECK-NEXT: ushr v23.4s, v18.4s, #16
1405 ; CHECK-NEXT: ushr v24.4s, v19.4s, #16
1406 ; CHECK-NEXT: ushr v25.4s, v20.4s, #16
1407 ; CHECK-NEXT: and v0.16b, v0.16b, v16.16b
1408 ; CHECK-NEXT: and v1.16b, v1.16b, v16.16b
1409 ; CHECK-NEXT: and v2.16b, v2.16b, v16.16b
1410 ; CHECK-NEXT: and v3.16b, v3.16b, v16.16b
1411 ; CHECK-NEXT: and v22.16b, v22.16b, v16.16b
1412 ; CHECK-NEXT: and v23.16b, v23.16b, v16.16b
1413 ; CHECK-NEXT: and v24.16b, v24.16b, v16.16b
1414 ; CHECK-NEXT: and v16.16b, v25.16b, v16.16b
1415 ; CHECK-NEXT: add v0.4s, v0.4s, v21.4s
1416 ; CHECK-NEXT: add v1.4s, v1.4s, v21.4s
1417 ; CHECK-NEXT: add v2.4s, v2.4s, v21.4s
1418 ; CHECK-NEXT: add v3.4s, v3.4s, v21.4s
1419 ; CHECK-NEXT: addhn v0.4h, v5.4s, v0.4s
1420 ; CHECK-NEXT: addhn v1.4h, v4.4s, v1.4s
1421 ; CHECK-NEXT: addhn v2.4h, v6.4s, v2.4s
1422 ; CHECK-NEXT: addhn v3.4h, v7.4s, v3.4s
1423 ; CHECK-NEXT: add v4.4s, v22.4s, v21.4s
1424 ; CHECK-NEXT: add v5.4s, v23.4s, v21.4s
1425 ; CHECK-NEXT: add v6.4s, v24.4s, v21.4s
1426 ; CHECK-NEXT: add v7.4s, v16.4s, v21.4s
1427 ; CHECK-NEXT: addhn2 v0.8h, v17.4s, v4.4s
1428 ; CHECK-NEXT: addhn2 v1.8h, v18.4s, v5.4s
1429 ; CHECK-NEXT: addhn2 v2.8h, v19.4s, v6.4s
1430 ; CHECK-NEXT: addhn2 v3.8h, v20.4s, v7.4s
1433 %c = uitofp <32 x i16> %a to <32 x bfloat>
1434 ret <32 x bfloat> %c
1437 define <2 x bfloat> @stofp_v2i8_v2bf16(<2 x i8> %a) {
1438 ; CHECK-LABEL: stofp_v2i8_v2bf16:
1439 ; CHECK: // %bb.0: // %entry
1440 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1441 ; CHECK-NEXT: mov w9, v0.s[1]
1442 ; CHECK-NEXT: fmov w10, s0
1443 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
1444 ; CHECK-NEXT: sxtb w10, w10
1445 ; CHECK-NEXT: sxtb w9, w9
1446 ; CHECK-NEXT: scvtf s1, w10
1447 ; CHECK-NEXT: scvtf s0, w9
1448 ; CHECK-NEXT: fmov w10, s1
1449 ; CHECK-NEXT: fmov w9, s0
1450 ; CHECK-NEXT: ubfx w12, w10, #16, #1
1451 ; CHECK-NEXT: ubfx w11, w9, #16, #1
1452 ; CHECK-NEXT: add w9, w9, w8
1453 ; CHECK-NEXT: add w8, w10, w8
1454 ; CHECK-NEXT: add w8, w12, w8
1455 ; CHECK-NEXT: add w9, w11, w9
1456 ; CHECK-NEXT: lsr w8, w8, #16
1457 ; CHECK-NEXT: lsr w9, w9, #16
1458 ; CHECK-NEXT: fmov s0, w8
1459 ; CHECK-NEXT: fmov s1, w9
1460 ; CHECK-NEXT: mov v0.h[1], v1.h[0]
1461 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1464 %c = sitofp <2 x i8> %a to <2 x bfloat>
1468 define <2 x bfloat> @utofp_v2i8_v2bf16(<2 x i8> %a) {
1469 ; CHECK-LABEL: utofp_v2i8_v2bf16:
1470 ; CHECK: // %bb.0: // %entry
1471 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1472 ; CHECK-NEXT: mov w9, v0.s[1]
1473 ; CHECK-NEXT: fmov w10, s0
1474 ; CHECK-NEXT: mov w8, #32767 // =0x7fff
1475 ; CHECK-NEXT: and w10, w10, #0xff
1476 ; CHECK-NEXT: and w9, w9, #0xff
1477 ; CHECK-NEXT: ucvtf s1, w10
1478 ; CHECK-NEXT: ucvtf s0, w9
1479 ; CHECK-NEXT: fmov w10, s1
1480 ; CHECK-NEXT: fmov w9, s0
1481 ; CHECK-NEXT: ubfx w12, w10, #16, #1
1482 ; CHECK-NEXT: ubfx w11, w9, #16, #1
1483 ; CHECK-NEXT: add w9, w9, w8
1484 ; CHECK-NEXT: add w8, w10, w8
1485 ; CHECK-NEXT: add w8, w12, w8
1486 ; CHECK-NEXT: add w9, w11, w9
1487 ; CHECK-NEXT: lsr w8, w8, #16
1488 ; CHECK-NEXT: lsr w9, w9, #16
1489 ; CHECK-NEXT: fmov s0, w8
1490 ; CHECK-NEXT: fmov s1, w9
1491 ; CHECK-NEXT: mov v0.h[1], v1.h[0]
1492 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1495 %c = uitofp <2 x i8> %a to <2 x bfloat>
1499 define <3 x bfloat> @stofp_v3i8_v3bf16(<3 x i8> %a) {
1500 ; CHECK-LABEL: stofp_v3i8_v3bf16:
1501 ; CHECK: // %bb.0: // %entry
1502 ; CHECK-NEXT: fmov s0, w0
1503 ; CHECK-NEXT: movi v1.4s, #1
1504 ; CHECK-NEXT: mov v0.h[1], w1
1505 ; CHECK-NEXT: mov v0.h[2], w2
1506 ; CHECK-NEXT: shl v0.4h, v0.4h, #8
1507 ; CHECK-NEXT: sshr v0.4h, v0.4h, #8
1508 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
1509 ; CHECK-NEXT: scvtf v0.4s, v0.4s
1510 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1511 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1512 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1513 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1514 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1517 %c = sitofp <3 x i8> %a to <3 x bfloat>
1521 define <3 x bfloat> @utofp_v3i8_v3bf16(<3 x i8> %a) {
1522 ; CHECK-LABEL: utofp_v3i8_v3bf16:
1523 ; CHECK: // %bb.0: // %entry
1524 ; CHECK-NEXT: fmov s0, w0
1525 ; CHECK-NEXT: movi v1.4s, #1
1526 ; CHECK-NEXT: mov v0.h[1], w1
1527 ; CHECK-NEXT: mov v0.h[2], w2
1528 ; CHECK-NEXT: bic v0.4h, #255, lsl #8
1529 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
1530 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
1531 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1532 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1533 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1534 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1535 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1538 %c = uitofp <3 x i8> %a to <3 x bfloat>
1542 define <4 x bfloat> @stofp_v4i8_v4bf16(<4 x i8> %a) {
1543 ; CHECK-LABEL: stofp_v4i8_v4bf16:
1544 ; CHECK: // %bb.0: // %entry
1545 ; CHECK-NEXT: shl v0.4h, v0.4h, #8
1546 ; CHECK-NEXT: movi v1.4s, #1
1547 ; CHECK-NEXT: sshr v0.4h, v0.4h, #8
1548 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
1549 ; CHECK-NEXT: scvtf v0.4s, v0.4s
1550 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1551 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1552 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1553 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1554 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1557 %c = sitofp <4 x i8> %a to <4 x bfloat>
1561 define <4 x bfloat> @utofp_v4i8_v4bf16(<4 x i8> %a) {
1562 ; CHECK-LABEL: utofp_v4i8_v4bf16:
1563 ; CHECK: // %bb.0: // %entry
1564 ; CHECK-NEXT: bic v0.4h, #255, lsl #8
1565 ; CHECK-NEXT: movi v1.4s, #1
1566 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
1567 ; CHECK-NEXT: ucvtf v0.4s, v0.4s
1568 ; CHECK-NEXT: ushr v2.4s, v0.4s, #16
1569 ; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
1570 ; CHECK-NEXT: movi v2.4s, #127, msl #8
1571 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
1572 ; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s
1575 %c = uitofp <4 x i8> %a to <4 x bfloat>
1579 define <8 x bfloat> @stofp_v8i8_v8bf16(<8 x i8> %a) {
1580 ; CHECK-LABEL: stofp_v8i8_v8bf16:
1581 ; CHECK: // %bb.0: // %entry
1582 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
1583 ; CHECK-NEXT: movi v1.4s, #1
1584 ; CHECK-NEXT: movi v4.4s, #127, msl #8
1585 ; CHECK-NEXT: sshll v2.4s, v0.4h, #0
1586 ; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
1587 ; CHECK-NEXT: scvtf v2.4s, v2.4s
1588 ; CHECK-NEXT: scvtf v3.4s, v0.4s
1589 ; CHECK-NEXT: ushr v0.4s, v2.4s, #16
1590 ; CHECK-NEXT: ushr v5.4s, v3.4s, #16
1591 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
1592 ; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
1593 ; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
1594 ; CHECK-NEXT: add v1.4s, v1.4s, v4.4s
1595 ; CHECK-NEXT: addhn v0.4h, v2.4s, v0.4s
1596 ; CHECK-NEXT: addhn2 v0.8h, v3.4s, v1.4s
1599 %c = sitofp <8 x i8> %a to <8 x bfloat>
1603 define <8 x bfloat> @utofp_v8i8_v8bf16(<8 x i8> %a) {
1604 ; CHECK-LABEL: utofp_v8i8_v8bf16:
1605 ; CHECK: // %bb.0: // %entry
1606 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
1607 ; CHECK-NEXT: movi v1.4s, #1
1608 ; CHECK-NEXT: movi v4.4s, #127, msl #8
1609 ; CHECK-NEXT: ushll v2.4s, v0.4h, #0
1610 ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1611 ; CHECK-NEXT: ucvtf v2.4s, v2.4s
1612 ; CHECK-NEXT: ucvtf v3.4s, v0.4s
1613 ; CHECK-NEXT: ushr v0.4s, v2.4s, #16
1614 ; CHECK-NEXT: ushr v5.4s, v3.4s, #16
1615 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
1616 ; CHECK-NEXT: and v1.16b, v5.16b, v1.16b
1617 ; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
1618 ; CHECK-NEXT: add v1.4s, v1.4s, v4.4s
1619 ; CHECK-NEXT: addhn v0.4h, v2.4s, v0.4s
1620 ; CHECK-NEXT: addhn2 v0.8h, v3.4s, v1.4s
1623 %c = uitofp <8 x i8> %a to <8 x bfloat>
1627 define <16 x bfloat> @stofp_v16i8_v16bf16(<16 x i8> %a) {
1628 ; CHECK-LABEL: stofp_v16i8_v16bf16:
1629 ; CHECK: // %bb.0: // %entry
1630 ; CHECK-NEXT: sshll2 v2.8h, v0.16b, #0
1631 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
1632 ; CHECK-NEXT: movi v1.4s, #1
1633 ; CHECK-NEXT: movi v7.4s, #127, msl #8
1634 ; CHECK-NEXT: sshll v3.4s, v2.4h, #0
1635 ; CHECK-NEXT: sshll v4.4s, v0.4h, #0
1636 ; CHECK-NEXT: sshll2 v2.4s, v2.8h, #0
1637 ; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
1638 ; CHECK-NEXT: scvtf v3.4s, v3.4s
1639 ; CHECK-NEXT: scvtf v4.4s, v4.4s
1640 ; CHECK-NEXT: scvtf v2.4s, v2.4s
1641 ; CHECK-NEXT: scvtf v6.4s, v0.4s
1642 ; CHECK-NEXT: ushr v5.4s, v3.4s, #16
1643 ; CHECK-NEXT: ushr v0.4s, v4.4s, #16
1644 ; CHECK-NEXT: ushr v16.4s, v2.4s, #16
1645 ; CHECK-NEXT: ushr v17.4s, v6.4s, #16
1646 ; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
1647 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
1648 ; CHECK-NEXT: and v16.16b, v16.16b, v1.16b
1649 ; CHECK-NEXT: and v17.16b, v17.16b, v1.16b
1650 ; CHECK-NEXT: add v5.4s, v5.4s, v7.4s
1651 ; CHECK-NEXT: add v0.4s, v0.4s, v7.4s
1652 ; CHECK-NEXT: addhn v1.4h, v3.4s, v5.4s
1653 ; CHECK-NEXT: addhn v0.4h, v4.4s, v0.4s
1654 ; CHECK-NEXT: add v3.4s, v16.4s, v7.4s
1655 ; CHECK-NEXT: add v4.4s, v17.4s, v7.4s
1656 ; CHECK-NEXT: addhn2 v1.8h, v2.4s, v3.4s
1657 ; CHECK-NEXT: addhn2 v0.8h, v6.4s, v4.4s
1660 %c = sitofp <16 x i8> %a to <16 x bfloat>
1661 ret <16 x bfloat> %c
1664 define <16 x bfloat> @utofp_v16i8_v16bf16(<16 x i8> %a) {
1665 ; CHECK-LABEL: utofp_v16i8_v16bf16:
1666 ; CHECK: // %bb.0: // %entry
1667 ; CHECK-NEXT: ushll2 v2.8h, v0.16b, #0
1668 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
1669 ; CHECK-NEXT: movi v1.4s, #1
1670 ; CHECK-NEXT: movi v7.4s, #127, msl #8
1671 ; CHECK-NEXT: ushll v3.4s, v2.4h, #0
1672 ; CHECK-NEXT: ushll v4.4s, v0.4h, #0
1673 ; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0
1674 ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1675 ; CHECK-NEXT: ucvtf v3.4s, v3.4s
1676 ; CHECK-NEXT: ucvtf v4.4s, v4.4s
1677 ; CHECK-NEXT: ucvtf v2.4s, v2.4s
1678 ; CHECK-NEXT: ucvtf v6.4s, v0.4s
1679 ; CHECK-NEXT: ushr v5.4s, v3.4s, #16
1680 ; CHECK-NEXT: ushr v0.4s, v4.4s, #16
1681 ; CHECK-NEXT: ushr v16.4s, v2.4s, #16
1682 ; CHECK-NEXT: ushr v17.4s, v6.4s, #16
1683 ; CHECK-NEXT: and v5.16b, v5.16b, v1.16b
1684 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
1685 ; CHECK-NEXT: and v16.16b, v16.16b, v1.16b
1686 ; CHECK-NEXT: and v17.16b, v17.16b, v1.16b
1687 ; CHECK-NEXT: add v5.4s, v5.4s, v7.4s
1688 ; CHECK-NEXT: add v0.4s, v0.4s, v7.4s
1689 ; CHECK-NEXT: addhn v1.4h, v3.4s, v5.4s
1690 ; CHECK-NEXT: addhn v0.4h, v4.4s, v0.4s
1691 ; CHECK-NEXT: add v3.4s, v16.4s, v7.4s
1692 ; CHECK-NEXT: add v4.4s, v17.4s, v7.4s
1693 ; CHECK-NEXT: addhn2 v1.8h, v2.4s, v3.4s
1694 ; CHECK-NEXT: addhn2 v0.8h, v6.4s, v4.4s
1697 %c = uitofp <16 x i8> %a to <16 x bfloat>
1698 ret <16 x bfloat> %c
1701 define <32 x bfloat> @stofp_v32i8_v32bf16(<32 x i8> %a) {
1702 ; CHECK-LABEL: stofp_v32i8_v32bf16:
1703 ; CHECK: // %bb.0: // %entry
1704 ; CHECK-NEXT: sshll2 v3.8h, v0.16b, #0
1705 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
1706 ; CHECK-NEXT: sshll2 v4.8h, v1.16b, #0
1707 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0
1708 ; CHECK-NEXT: movi v2.4s, #1
1709 ; CHECK-NEXT: movi v21.4s, #127, msl #8
1710 ; CHECK-NEXT: sshll v5.4s, v3.4h, #0
1711 ; CHECK-NEXT: sshll v6.4s, v0.4h, #0
1712 ; CHECK-NEXT: sshll v7.4s, v4.4h, #0
1713 ; CHECK-NEXT: sshll v16.4s, v1.4h, #0
1714 ; CHECK-NEXT: sshll2 v3.4s, v3.8h, #0
1715 ; CHECK-NEXT: sshll2 v4.4s, v4.8h, #0
1716 ; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
1717 ; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
1718 ; CHECK-NEXT: scvtf v5.4s, v5.4s
1719 ; CHECK-NEXT: scvtf v6.4s, v6.4s
1720 ; CHECK-NEXT: scvtf v7.4s, v7.4s
1721 ; CHECK-NEXT: scvtf v16.4s, v16.4s
1722 ; CHECK-NEXT: scvtf v17.4s, v3.4s
1723 ; CHECK-NEXT: scvtf v4.4s, v4.4s
1724 ; CHECK-NEXT: scvtf v18.4s, v0.4s
1725 ; CHECK-NEXT: scvtf v19.4s, v1.4s
1726 ; CHECK-NEXT: ushr v0.4s, v5.4s, #16
1727 ; CHECK-NEXT: ushr v3.4s, v6.4s, #16
1728 ; CHECK-NEXT: ushr v1.4s, v7.4s, #16
1729 ; CHECK-NEXT: ushr v20.4s, v16.4s, #16
1730 ; CHECK-NEXT: ushr v23.4s, v17.4s, #16
1731 ; CHECK-NEXT: ushr v24.4s, v4.4s, #16
1732 ; CHECK-NEXT: ushr v22.4s, v18.4s, #16
1733 ; CHECK-NEXT: ushr v25.4s, v19.4s, #16
1734 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
1735 ; CHECK-NEXT: and v3.16b, v3.16b, v2.16b
1736 ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
1737 ; CHECK-NEXT: and v20.16b, v20.16b, v2.16b
1738 ; CHECK-NEXT: and v23.16b, v23.16b, v2.16b
1739 ; CHECK-NEXT: and v24.16b, v24.16b, v2.16b
1740 ; CHECK-NEXT: and v22.16b, v22.16b, v2.16b
1741 ; CHECK-NEXT: and v25.16b, v25.16b, v2.16b
1742 ; CHECK-NEXT: add v0.4s, v0.4s, v21.4s
1743 ; CHECK-NEXT: add v3.4s, v3.4s, v21.4s
1744 ; CHECK-NEXT: add v26.4s, v1.4s, v21.4s
1745 ; CHECK-NEXT: add v20.4s, v20.4s, v21.4s
1746 ; CHECK-NEXT: addhn v1.4h, v5.4s, v0.4s
1747 ; CHECK-NEXT: addhn v0.4h, v6.4s, v3.4s
1748 ; CHECK-NEXT: addhn v3.4h, v7.4s, v26.4s
1749 ; CHECK-NEXT: addhn v2.4h, v16.4s, v20.4s
1750 ; CHECK-NEXT: add v5.4s, v22.4s, v21.4s
1751 ; CHECK-NEXT: add v6.4s, v23.4s, v21.4s
1752 ; CHECK-NEXT: add v7.4s, v24.4s, v21.4s
1753 ; CHECK-NEXT: add v16.4s, v25.4s, v21.4s
1754 ; CHECK-NEXT: addhn2 v0.8h, v18.4s, v5.4s
1755 ; CHECK-NEXT: addhn2 v1.8h, v17.4s, v6.4s
1756 ; CHECK-NEXT: addhn2 v3.8h, v4.4s, v7.4s
1757 ; CHECK-NEXT: addhn2 v2.8h, v19.4s, v16.4s
1760 %c = sitofp <32 x i8> %a to <32 x bfloat>
1761 ret <32 x bfloat> %c
1764 define <32 x bfloat> @utofp_v32i8_v32bf16(<32 x i8> %a) {
1765 ; CHECK-LABEL: utofp_v32i8_v32bf16:
1766 ; CHECK: // %bb.0: // %entry
1767 ; CHECK-NEXT: ushll2 v3.8h, v0.16b, #0
1768 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
1769 ; CHECK-NEXT: ushll2 v4.8h, v1.16b, #0
1770 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0
1771 ; CHECK-NEXT: movi v2.4s, #1
1772 ; CHECK-NEXT: movi v21.4s, #127, msl #8
1773 ; CHECK-NEXT: ushll v5.4s, v3.4h, #0
1774 ; CHECK-NEXT: ushll v6.4s, v0.4h, #0
1775 ; CHECK-NEXT: ushll v7.4s, v4.4h, #0
1776 ; CHECK-NEXT: ushll v16.4s, v1.4h, #0
1777 ; CHECK-NEXT: ushll2 v3.4s, v3.8h, #0
1778 ; CHECK-NEXT: ushll2 v4.4s, v4.8h, #0
1779 ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
1780 ; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
1781 ; CHECK-NEXT: ucvtf v5.4s, v5.4s
1782 ; CHECK-NEXT: ucvtf v6.4s, v6.4s
1783 ; CHECK-NEXT: ucvtf v7.4s, v7.4s
1784 ; CHECK-NEXT: ucvtf v16.4s, v16.4s
1785 ; CHECK-NEXT: ucvtf v17.4s, v3.4s
1786 ; CHECK-NEXT: ucvtf v4.4s, v4.4s
1787 ; CHECK-NEXT: ucvtf v18.4s, v0.4s
1788 ; CHECK-NEXT: ucvtf v19.4s, v1.4s
1789 ; CHECK-NEXT: ushr v0.4s, v5.4s, #16
1790 ; CHECK-NEXT: ushr v3.4s, v6.4s, #16
1791 ; CHECK-NEXT: ushr v1.4s, v7.4s, #16
1792 ; CHECK-NEXT: ushr v20.4s, v16.4s, #16
1793 ; CHECK-NEXT: ushr v23.4s, v17.4s, #16
1794 ; CHECK-NEXT: ushr v24.4s, v4.4s, #16
1795 ; CHECK-NEXT: ushr v22.4s, v18.4s, #16
1796 ; CHECK-NEXT: ushr v25.4s, v19.4s, #16
1797 ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
1798 ; CHECK-NEXT: and v3.16b, v3.16b, v2.16b
1799 ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
1800 ; CHECK-NEXT: and v20.16b, v20.16b, v2.16b
1801 ; CHECK-NEXT: and v23.16b, v23.16b, v2.16b
1802 ; CHECK-NEXT: and v24.16b, v24.16b, v2.16b
1803 ; CHECK-NEXT: and v22.16b, v22.16b, v2.16b
1804 ; CHECK-NEXT: and v25.16b, v25.16b, v2.16b
1805 ; CHECK-NEXT: add v0.4s, v0.4s, v21.4s
1806 ; CHECK-NEXT: add v3.4s, v3.4s, v21.4s
1807 ; CHECK-NEXT: add v26.4s, v1.4s, v21.4s
1808 ; CHECK-NEXT: add v20.4s, v20.4s, v21.4s
1809 ; CHECK-NEXT: addhn v1.4h, v5.4s, v0.4s
1810 ; CHECK-NEXT: addhn v0.4h, v6.4s, v3.4s
1811 ; CHECK-NEXT: addhn v3.4h, v7.4s, v26.4s
1812 ; CHECK-NEXT: addhn v2.4h, v16.4s, v20.4s
1813 ; CHECK-NEXT: add v5.4s, v22.4s, v21.4s
1814 ; CHECK-NEXT: add v6.4s, v23.4s, v21.4s
1815 ; CHECK-NEXT: add v7.4s, v24.4s, v21.4s
1816 ; CHECK-NEXT: add v16.4s, v25.4s, v21.4s
1817 ; CHECK-NEXT: addhn2 v0.8h, v18.4s, v5.4s
1818 ; CHECK-NEXT: addhn2 v1.8h, v17.4s, v6.4s
1819 ; CHECK-NEXT: addhn2 v3.8h, v4.4s, v7.4s
1820 ; CHECK-NEXT: addhn2 v2.8h, v19.4s, v16.4s
1823 %c = uitofp <32 x i8> %a to <32 x bfloat>
1824 ret <32 x bfloat> %c
1826 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1828 ; CHECK-GI-FP16: {{.*}}
1829 ; CHECK-GI-NOFP16: {{.*}}
1831 ; CHECK-SD-FP16: {{.*}}
1832 ; CHECK-SD-NOFP16: {{.*}}