1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 target triple = "aarch64-unknown-linux-gnu"
12 ; Don't use SVE for 64-bit vectors.
13 define void @fcvt_v2f16_v2f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
14 ; CHECK-LABEL: fcvt_v2f16_v2f32:
16 ; CHECK-NEXT: ldr s0, [x0]
17 ; CHECK-NEXT: fcvtl v0.4s, v0.4h
18 ; CHECK-NEXT: str d0, [x1]
20 %op1 = load <2 x half>, ptr %a
21 %res = fpext <2 x half> %op1 to <2 x float>
22 store <2 x float> %res, ptr %b
26 ; Don't use SVE for 128-bit vectors.
27 define void @fcvt_v4f16_v4f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
28 ; CHECK-LABEL: fcvt_v4f16_v4f32:
30 ; CHECK-NEXT: ldr d0, [x0]
31 ; CHECK-NEXT: fcvtl v0.4s, v0.4h
32 ; CHECK-NEXT: str q0, [x1]
34 %op1 = load <4 x half>, ptr %a
35 %res = fpext <4 x half> %op1 to <4 x float>
36 store <4 x float> %res, ptr %b
40 define void @fcvt_v8f16_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
41 ; CHECK-LABEL: fcvt_v8f16_v8f32:
43 ; CHECK-NEXT: ptrue p0.s, vl8
44 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
45 ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
46 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
48 %op1 = load <8 x half>, ptr %a
49 %res = fpext <8 x half> %op1 to <8 x float>
50 store <8 x float> %res, ptr %b
54 define void @fcvt_v16f16_v16f32(ptr %a, ptr %b) #0 {
55 ; VBITS_GE_256-LABEL: fcvt_v16f16_v16f32:
56 ; VBITS_GE_256: // %bb.0:
57 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
58 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
59 ; VBITS_GE_256-NEXT: ld1h { z0.s }, p0/z, [x0, x8, lsl #1]
60 ; VBITS_GE_256-NEXT: ld1h { z1.s }, p0/z, [x0]
61 ; VBITS_GE_256-NEXT: fcvt z0.s, p0/m, z0.h
62 ; VBITS_GE_256-NEXT: fcvt z1.s, p0/m, z1.h
63 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
64 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
65 ; VBITS_GE_256-NEXT: ret
67 ; VBITS_GE_512-LABEL: fcvt_v16f16_v16f32:
68 ; VBITS_GE_512: // %bb.0:
69 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
70 ; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0]
71 ; VBITS_GE_512-NEXT: fcvt z0.s, p0/m, z0.h
72 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
73 ; VBITS_GE_512-NEXT: ret
74 %op1 = load <16 x half>, ptr %a
75 %res = fpext <16 x half> %op1 to <16 x float>
76 store <16 x float> %res, ptr %b
80 define void @fcvt_v32f16_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
81 ; CHECK-LABEL: fcvt_v32f16_v32f32:
83 ; CHECK-NEXT: ptrue p0.s, vl32
84 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
85 ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
86 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
88 %op1 = load <32 x half>, ptr %a
89 %res = fpext <32 x half> %op1 to <32 x float>
90 store <32 x float> %res, ptr %b
94 define void @fcvt_v64f16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
95 ; CHECK-LABEL: fcvt_v64f16_v64f32:
97 ; CHECK-NEXT: ptrue p0.s, vl64
98 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
99 ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
100 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
102 %op1 = load <64 x half>, ptr %a
103 %res = fpext <64 x half> %op1 to <64 x float>
104 store <64 x float> %res, ptr %b
112 ; Don't use SVE for 64-bit vectors.
113 define void @fcvt_v1f16_v1f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
114 ; CHECK-LABEL: fcvt_v1f16_v1f64:
116 ; CHECK-NEXT: ldr h0, [x0]
117 ; CHECK-NEXT: fcvt d0, h0
118 ; CHECK-NEXT: str d0, [x1]
120 %op1 = load <1 x half>, ptr %a
121 %res = fpext <1 x half> %op1 to <1 x double>
122 store <1 x double> %res, ptr %b
126 ; v2f16 is not legal for NEON, so use SVE
127 define void @fcvt_v2f16_v2f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
128 ; CHECK-LABEL: fcvt_v2f16_v2f64:
130 ; CHECK-NEXT: ldr s0, [x0]
131 ; CHECK-NEXT: ptrue p0.d, vl4
132 ; CHECK-NEXT: uunpklo z0.s, z0.h
133 ; CHECK-NEXT: uunpklo z0.d, z0.s
134 ; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
135 ; CHECK-NEXT: str q0, [x1]
137 %op1 = load <2 x half>, ptr %a
138 %res = fpext <2 x half> %op1 to <2 x double>
139 store <2 x double> %res, ptr %b
143 define void @fcvt_v4f16_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
144 ; CHECK-LABEL: fcvt_v4f16_v4f64:
146 ; CHECK-NEXT: ptrue p0.d, vl4
147 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
148 ; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
149 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
151 %op1 = load <4 x half>, ptr %a
152 %res = fpext <4 x half> %op1 to <4 x double>
153 store <4 x double> %res, ptr %b
157 define void @fcvt_v8f16_v8f64(ptr %a, ptr %b) #0 {
158 ; VBITS_GE_256-LABEL: fcvt_v8f16_v8f64:
159 ; VBITS_GE_256: // %bb.0:
160 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
161 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
162 ; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1]
163 ; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [x0]
164 ; VBITS_GE_256-NEXT: fcvt z0.d, p0/m, z0.h
165 ; VBITS_GE_256-NEXT: fcvt z1.d, p0/m, z1.h
166 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
167 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
168 ; VBITS_GE_256-NEXT: ret
170 ; VBITS_GE_512-LABEL: fcvt_v8f16_v8f64:
171 ; VBITS_GE_512: // %bb.0:
172 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
173 ; VBITS_GE_512-NEXT: ld1h { z0.d }, p0/z, [x0]
174 ; VBITS_GE_512-NEXT: fcvt z0.d, p0/m, z0.h
175 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
176 ; VBITS_GE_512-NEXT: ret
177 %op1 = load <8 x half>, ptr %a
178 %res = fpext <8 x half> %op1 to <8 x double>
179 store <8 x double> %res, ptr %b
183 define void @fcvt_v16f16_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
184 ; CHECK-LABEL: fcvt_v16f16_v16f64:
186 ; CHECK-NEXT: ptrue p0.d, vl16
187 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
188 ; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
189 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
191 %op1 = load <16 x half>, ptr %a
192 %res = fpext <16 x half> %op1 to <16 x double>
193 store <16 x double> %res, ptr %b
197 define void @fcvt_v32f16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
198 ; CHECK-LABEL: fcvt_v32f16_v32f64:
200 ; CHECK-NEXT: ptrue p0.d, vl32
201 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
202 ; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
203 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
205 %op1 = load <32 x half>, ptr %a
206 %res = fpext <32 x half> %op1 to <32 x double>
207 store <32 x double> %res, ptr %b
215 ; Don't use SVE for 64-bit vectors.
216 define void @fcvt_v1f32_v1f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
217 ; CHECK-LABEL: fcvt_v1f32_v1f64:
219 ; CHECK-NEXT: ldr s0, [x0]
220 ; CHECK-NEXT: fcvtl v0.2d, v0.2s
221 ; CHECK-NEXT: str d0, [x1]
223 %op1 = load <1 x float>, ptr %a
224 %res = fpext <1 x float> %op1 to <1 x double>
225 store <1 x double> %res, ptr %b
229 ; Don't use SVE for 128-bit vectors.
230 define void @fcvt_v2f32_v2f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
231 ; CHECK-LABEL: fcvt_v2f32_v2f64:
233 ; CHECK-NEXT: ldr d0, [x0]
234 ; CHECK-NEXT: fcvtl v0.2d, v0.2s
235 ; CHECK-NEXT: str q0, [x1]
237 %op1 = load <2 x float>, ptr %a
238 %res = fpext <2 x float> %op1 to <2 x double>
239 store <2 x double> %res, ptr %b
243 define void @fcvt_v4f32_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
244 ; CHECK-LABEL: fcvt_v4f32_v4f64:
246 ; CHECK-NEXT: ptrue p0.d, vl4
247 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
248 ; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
249 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
251 %op1 = load <4 x float>, ptr %a
252 %res = fpext <4 x float> %op1 to <4 x double>
253 store <4 x double> %res, ptr %b
257 define void @fcvt_v8f32_v8f64(ptr %a, ptr %b) #0 {
258 ; VBITS_GE_256-LABEL: fcvt_v8f32_v8f64:
259 ; VBITS_GE_256: // %bb.0:
260 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
261 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
262 ; VBITS_GE_256-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
263 ; VBITS_GE_256-NEXT: ld1w { z1.d }, p0/z, [x0]
264 ; VBITS_GE_256-NEXT: fcvt z0.d, p0/m, z0.s
265 ; VBITS_GE_256-NEXT: fcvt z1.d, p0/m, z1.s
266 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
267 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
268 ; VBITS_GE_256-NEXT: ret
270 ; VBITS_GE_512-LABEL: fcvt_v8f32_v8f64:
271 ; VBITS_GE_512: // %bb.0:
272 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
273 ; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [x0]
274 ; VBITS_GE_512-NEXT: fcvt z0.d, p0/m, z0.s
275 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
276 ; VBITS_GE_512-NEXT: ret
277 %op1 = load <8 x float>, ptr %a
278 %res = fpext <8 x float> %op1 to <8 x double>
279 store <8 x double> %res, ptr %b
283 define void @fcvt_v16f32_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
284 ; CHECK-LABEL: fcvt_v16f32_v16f64:
286 ; CHECK-NEXT: ptrue p0.d, vl16
287 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
288 ; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
289 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
291 %op1 = load <16 x float>, ptr %a
292 %res = fpext <16 x float> %op1 to <16 x double>
293 store <16 x double> %res, ptr %b
297 define void @fcvt_v32f32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
298 ; CHECK-LABEL: fcvt_v32f32_v32f64:
300 ; CHECK-NEXT: ptrue p0.d, vl32
301 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
302 ; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
303 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
305 %op1 = load <32 x float>, ptr %a
306 %res = fpext <32 x float> %op1 to <32 x double>
307 store <32 x double> %res, ptr %b
315 ; Don't use SVE for 64-bit vectors.
316 define void @fcvt_v2f32_v2f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
317 ; CHECK-LABEL: fcvt_v2f32_v2f16:
319 ; CHECK-NEXT: ldr d0, [x0]
320 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
321 ; CHECK-NEXT: str s0, [x1]
323 %op1 = load <2 x float>, ptr %a
324 %res = fptrunc <2 x float> %op1 to <2 x half>
325 store <2 x half> %res, ptr %b
329 ; Don't use SVE for 128-bit vectors.
330 define void @fcvt_v4f32_v4f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
331 ; CHECK-LABEL: fcvt_v4f32_v4f16:
333 ; CHECK-NEXT: ldr q0, [x0]
334 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
335 ; CHECK-NEXT: str d0, [x1]
337 %op1 = load <4 x float>, ptr %a
338 %res = fptrunc <4 x float> %op1 to <4 x half>
339 store <4 x half> %res, ptr %b
343 define void @fcvt_v8f32_v8f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
344 ; CHECK-LABEL: fcvt_v8f32_v8f16:
346 ; CHECK-NEXT: ptrue p0.s, vl8
347 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
348 ; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
349 ; CHECK-NEXT: st1h { z0.s }, p0, [x1]
351 %op1 = load <8 x float>, ptr %a
352 %res = fptrunc <8 x float> %op1 to <8 x half>
353 store <8 x half> %res, ptr %b
357 define void @fcvt_v16f32_v16f16(ptr %a, ptr %b) #0 {
358 ; VBITS_GE_256-LABEL: fcvt_v16f32_v16f16:
359 ; VBITS_GE_256: // %bb.0:
360 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
361 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
362 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
363 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
364 ; VBITS_GE_256-NEXT: fcvt z0.h, p0/m, z0.s
365 ; VBITS_GE_256-NEXT: fcvt z1.h, p0/m, z1.s
366 ; VBITS_GE_256-NEXT: st1h { z0.s }, p0, [x1, x8, lsl #1]
367 ; VBITS_GE_256-NEXT: st1h { z1.s }, p0, [x1]
368 ; VBITS_GE_256-NEXT: ret
370 ; VBITS_GE_512-LABEL: fcvt_v16f32_v16f16:
371 ; VBITS_GE_512: // %bb.0:
372 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
373 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
374 ; VBITS_GE_512-NEXT: fcvt z0.h, p0/m, z0.s
375 ; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
376 ; VBITS_GE_512-NEXT: ret
377 %op1 = load <16 x float>, ptr %a
378 %res = fptrunc <16 x float> %op1 to <16 x half>
379 store <16 x half> %res, ptr %b
383 define void @fcvt_v32f32_v32f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
384 ; CHECK-LABEL: fcvt_v32f32_v32f16:
386 ; CHECK-NEXT: ptrue p0.s, vl32
387 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
388 ; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
389 ; CHECK-NEXT: st1h { z0.s }, p0, [x1]
391 %op1 = load <32 x float>, ptr %a
392 %res = fptrunc <32 x float> %op1 to <32 x half>
393 store <32 x half> %res, ptr %b
397 define void @fcvt_v64f32_v64f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
398 ; CHECK-LABEL: fcvt_v64f32_v64f16:
400 ; CHECK-NEXT: ptrue p0.s, vl64
401 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
402 ; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
403 ; CHECK-NEXT: st1h { z0.s }, p0, [x1]
405 %op1 = load <64 x float>, ptr %a
406 %res = fptrunc <64 x float> %op1 to <64 x half>
407 store <64 x half> %res, ptr %b
415 ; Don't use SVE for 64-bit vectors.
416 define void @fcvt_v1f64_v1f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
417 ; CHECK-LABEL: fcvt_v1f64_v1f16:
419 ; CHECK-NEXT: ldr d0, [x0]
420 ; CHECK-NEXT: fcvt h0, d0
421 ; CHECK-NEXT: str h0, [x1]
423 %op1 = load <1 x double>, ptr %a
424 %res = fptrunc <1 x double> %op1 to <1 x half>
425 store <1 x half> %res, ptr %b
429 ; v2f16 is not legal for NEON, so use SVE
430 define void @fcvt_v2f64_v2f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
431 ; CHECK-LABEL: fcvt_v2f64_v2f16:
433 ; CHECK-NEXT: ptrue p0.d
434 ; CHECK-NEXT: ldr q0, [x0]
435 ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
436 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
437 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
438 ; CHECK-NEXT: str s0, [x1]
440 %op1 = load <2 x double>, ptr %a
441 %res = fptrunc <2 x double> %op1 to <2 x half>
442 store <2 x half> %res, ptr %b
446 define void @fcvt_v4f64_v4f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
447 ; CHECK-LABEL: fcvt_v4f64_v4f16:
449 ; CHECK-NEXT: ptrue p0.d, vl4
450 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
451 ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
452 ; CHECK-NEXT: st1h { z0.d }, p0, [x1]
454 %op1 = load <4 x double>, ptr %a
455 %res = fptrunc <4 x double> %op1 to <4 x half>
456 store <4 x half> %res, ptr %b
460 define void @fcvt_v8f64_v8f16(ptr %a, ptr %b) #0 {
461 ; VBITS_GE_256-LABEL: fcvt_v8f64_v8f16:
462 ; VBITS_GE_256: // %bb.0:
463 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
464 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
465 ; VBITS_GE_256-NEXT: ptrue p1.d
466 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
467 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
468 ; VBITS_GE_256-NEXT: fcvt z0.h, p1/m, z0.d
469 ; VBITS_GE_256-NEXT: fcvt z1.h, p1/m, z1.d
470 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
471 ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
472 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
473 ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
474 ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
475 ; VBITS_GE_256-NEXT: str q1, [x1]
476 ; VBITS_GE_256-NEXT: ret
478 ; VBITS_GE_512-LABEL: fcvt_v8f64_v8f16:
479 ; VBITS_GE_512: // %bb.0:
480 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
481 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
482 ; VBITS_GE_512-NEXT: fcvt z0.h, p0/m, z0.d
483 ; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [x1]
484 ; VBITS_GE_512-NEXT: ret
485 %op1 = load <8 x double>, ptr %a
486 %res = fptrunc <8 x double> %op1 to <8 x half>
487 store <8 x half> %res, ptr %b
491 define void @fcvt_v16f64_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
492 ; CHECK-LABEL: fcvt_v16f64_v16f16:
494 ; CHECK-NEXT: ptrue p0.d, vl16
495 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
496 ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
497 ; CHECK-NEXT: st1h { z0.d }, p0, [x1]
499 %op1 = load <16 x double>, ptr %a
500 %res = fptrunc <16 x double> %op1 to <16 x half>
501 store <16 x half> %res, ptr %b
505 define void @fcvt_v32f64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
506 ; CHECK-LABEL: fcvt_v32f64_v32f16:
508 ; CHECK-NEXT: ptrue p0.d, vl32
509 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
510 ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
511 ; CHECK-NEXT: st1h { z0.d }, p0, [x1]
513 %op1 = load <32 x double>, ptr %a
514 %res = fptrunc <32 x double> %op1 to <32 x half>
515 store <32 x half> %res, ptr %b
523 ; Don't use SVE for 64-bit vectors.
524 define void @fcvt_v1f64_v1f32(<1 x double> %op1, ptr %b) vscale_range(2,0) #0 {
525 ; CHECK-LABEL: fcvt_v1f64_v1f32:
527 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
528 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
529 ; CHECK-NEXT: str s0, [x0]
531 %res = fptrunc <1 x double> %op1 to <1 x float>
532 store <1 x float> %res, ptr %b
536 ; Don't use SVE for 128-bit vectors.
537 define void @fcvt_v2f64_v2f32(<2 x double> %op1, ptr %b) vscale_range(2,0) #0 {
538 ; CHECK-LABEL: fcvt_v2f64_v2f32:
540 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
541 ; CHECK-NEXT: str d0, [x0]
543 %res = fptrunc <2 x double> %op1 to <2 x float>
544 store <2 x float> %res, ptr %b
548 define void @fcvt_v4f64_v4f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
549 ; CHECK-LABEL: fcvt_v4f64_v4f32:
551 ; CHECK-NEXT: ptrue p0.d, vl4
552 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
553 ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
554 ; CHECK-NEXT: st1w { z0.d }, p0, [x1]
556 %op1 = load <4 x double>, ptr %a
557 %res = fptrunc <4 x double> %op1 to <4 x float>
558 store <4 x float> %res, ptr %b
562 define void @fcvt_v8f64_v8f32(ptr %a, ptr %b) #0 {
563 ; VBITS_GE_256-LABEL: fcvt_v8f64_v8f32:
564 ; VBITS_GE_256: // %bb.0:
565 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
566 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
567 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
568 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
569 ; VBITS_GE_256-NEXT: fcvt z0.s, p0/m, z0.d
570 ; VBITS_GE_256-NEXT: fcvt z1.s, p0/m, z1.d
571 ; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2]
572 ; VBITS_GE_256-NEXT: st1w { z1.d }, p0, [x1]
573 ; VBITS_GE_256-NEXT: ret
575 ; VBITS_GE_512-LABEL: fcvt_v8f64_v8f32:
576 ; VBITS_GE_512: // %bb.0:
577 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
578 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
579 ; VBITS_GE_512-NEXT: fcvt z0.s, p0/m, z0.d
580 ; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
581 ; VBITS_GE_512-NEXT: ret
582 %op1 = load <8 x double>, ptr %a
583 %res = fptrunc <8 x double> %op1 to <8 x float>
584 store <8 x float> %res, ptr %b
588 define void @fcvt_v16f64_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
589 ; CHECK-LABEL: fcvt_v16f64_v16f32:
591 ; CHECK-NEXT: ptrue p0.d, vl16
592 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
593 ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
594 ; CHECK-NEXT: st1w { z0.d }, p0, [x1]
596 %op1 = load <16 x double>, ptr %a
597 %res = fptrunc <16 x double> %op1 to <16 x float>
598 store <16 x float> %res, ptr %b
602 define void @fcvt_v32f64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
603 ; CHECK-LABEL: fcvt_v32f64_v32f32:
605 ; CHECK-NEXT: ptrue p0.d, vl32
606 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
607 ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
608 ; CHECK-NEXT: st1w { z0.d }, p0, [x1]
610 %op1 = load <32 x double>, ptr %a
611 %res = fptrunc <32 x double> %op1 to <32 x float>
612 store <32 x float> %res, ptr %b
616 attributes #0 = { "target-features"="+sve" }