1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 target triple = "aarch64-unknown-linux-gnu"
12 ; Don't use SVE for 64-bit vectors.
13 define <4 x i16> @fcvtzu_v4f16_v4i16(<4 x half> %op1) vscale_range(2,0) #0 {
14 ; CHECK-LABEL: fcvtzu_v4f16_v4i16:
16 ; CHECK-NEXT: fcvtzu v0.4h, v0.4h
18 %res = fptoui <4 x half> %op1 to <4 x i16>
22 ; Don't use SVE for 128-bit vectors.
23 define void @fcvtzu_v8f16_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
24 ; CHECK-LABEL: fcvtzu_v8f16_v8i16:
26 ; CHECK-NEXT: ldr q0, [x0]
27 ; CHECK-NEXT: fcvtzu v0.8h, v0.8h
28 ; CHECK-NEXT: str q0, [x1]
30 %op1 = load <8 x half>, ptr %a
31 %res = fptoui <8 x half> %op1 to <8 x i16>
32 store <8 x i16> %res, ptr %b
36 define void @fcvtzu_v16f16_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
37 ; CHECK-LABEL: fcvtzu_v16f16_v16i16:
39 ; CHECK-NEXT: ptrue p0.h, vl16
40 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
41 ; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
42 ; CHECK-NEXT: st1h { z0.h }, p0, [x1]
44 %op1 = load <16 x half>, ptr %a
45 %res = fptoui <16 x half> %op1 to <16 x i16>
46 store <16 x i16> %res, ptr %b
50 define void @fcvtzu_v32f16_v32i16(ptr %a, ptr %b) #0 {
51 ; VBITS_GE_256-LABEL: fcvtzu_v32f16_v32i16:
52 ; VBITS_GE_256: // %bb.0:
53 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
54 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
55 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
56 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
57 ; VBITS_GE_256-NEXT: fcvtzu z0.h, p0/m, z0.h
58 ; VBITS_GE_256-NEXT: fcvtzu z1.h, p0/m, z1.h
59 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
60 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
61 ; VBITS_GE_256-NEXT: ret
63 ; VBITS_GE_512-LABEL: fcvtzu_v32f16_v32i16:
64 ; VBITS_GE_512: // %bb.0:
65 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
66 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
67 ; VBITS_GE_512-NEXT: fcvtzu z0.h, p0/m, z0.h
68 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
69 ; VBITS_GE_512-NEXT: ret
70 %op1 = load <32 x half>, ptr %a
71 %res = fptoui <32 x half> %op1 to <32 x i16>
72 store <32 x i16> %res, ptr %b
76 define void @fcvtzu_v64f16_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
77 ; CHECK-LABEL: fcvtzu_v64f16_v64i16:
79 ; CHECK-NEXT: ptrue p0.h, vl64
80 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
81 ; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
82 ; CHECK-NEXT: st1h { z0.h }, p0, [x1]
84 %op1 = load <64 x half>, ptr %a
85 %res = fptoui <64 x half> %op1 to <64 x i16>
86 store <64 x i16> %res, ptr %b
90 define void @fcvtzu_v128f16_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
91 ; CHECK-LABEL: fcvtzu_v128f16_v128i16:
93 ; CHECK-NEXT: ptrue p0.h, vl128
94 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
95 ; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
96 ; CHECK-NEXT: st1h { z0.h }, p0, [x1]
98 %op1 = load <128 x half>, ptr %a
99 %res = fptoui <128 x half> %op1 to <128 x i16>
100 store <128 x i16> %res, ptr %b
108 ; Don't use SVE for 64-bit vectors.
109 define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 {
110 ; CHECK-LABEL: fcvtzu_v2f16_v2i32:
112 ; CHECK-NEXT: fcvtl v0.4s, v0.4h
113 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s
114 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
116 %res = fptoui <2 x half> %op1 to <2 x i32>
120 ; Don't use SVE for 128-bit vectors.
121 define <4 x i32> @fcvtzu_v4f16_v4i32(<4 x half> %op1) vscale_range(2,0) #0 {
122 ; CHECK-LABEL: fcvtzu_v4f16_v4i32:
124 ; CHECK-NEXT: fcvtl v0.4s, v0.4h
125 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s
127 %res = fptoui <4 x half> %op1 to <4 x i32>
131 define void @fcvtzu_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
132 ; CHECK-LABEL: fcvtzu_v8f16_v8i32:
134 ; CHECK-NEXT: ptrue p0.s, vl8
135 ; CHECK-NEXT: ldr q0, [x0]
136 ; CHECK-NEXT: uunpklo z0.s, z0.h
137 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
138 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
140 %op1 = load <8 x half>, ptr %a
141 %res = fptoui <8 x half> %op1 to <8 x i32>
142 store <8 x i32> %res, ptr %b
146 define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) #0 {
147 ; VBITS_GE_256-LABEL: fcvtzu_v16f16_v16i32:
148 ; VBITS_GE_256: // %bb.0:
149 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
150 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
151 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
152 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
153 ; VBITS_GE_256-NEXT: uunpklo z1.s, z0.h
154 ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
155 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
156 ; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.h
157 ; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.h
158 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
159 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
160 ; VBITS_GE_256-NEXT: ret
162 ; VBITS_GE_512-LABEL: fcvtzu_v16f16_v16i32:
163 ; VBITS_GE_512: // %bb.0:
164 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
165 ; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0]
166 ; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.h
167 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
168 ; VBITS_GE_512-NEXT: ret
169 %op1 = load <16 x half>, ptr %a
170 %res = fptoui <16 x half> %op1 to <16 x i32>
171 store <16 x i32> %res, ptr %b
175 define void @fcvtzu_v32f16_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
176 ; CHECK-LABEL: fcvtzu_v32f16_v32i32:
178 ; CHECK-NEXT: ptrue p0.s, vl32
179 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
180 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
181 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
183 %op1 = load <32 x half>, ptr %a
184 %res = fptoui <32 x half> %op1 to <32 x i32>
185 store <32 x i32> %res, ptr %b
189 define void @fcvtzu_v64f16_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
190 ; CHECK-LABEL: fcvtzu_v64f16_v64i32:
192 ; CHECK-NEXT: ptrue p0.s, vl64
193 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
194 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
195 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
197 %op1 = load <64 x half>, ptr %a
198 %res = fptoui <64 x half> %op1 to <64 x i32>
199 store <64 x i32> %res, ptr %b
207 ; Don't use SVE for 64-bit vectors.
208 define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 {
209 ; CHECK-LABEL: fcvtzu_v1f16_v1i64:
211 ; CHECK-NEXT: fcvtzu x8, h0
212 ; CHECK-NEXT: fmov d0, x8
214 %res = fptoui <1 x half> %op1 to <1 x i64>
218 ; v2f16 is not legal for NEON, so use SVE
219 define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 {
220 ; CHECK-LABEL: fcvtzu_v2f16_v2i64:
222 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
223 ; CHECK-NEXT: ptrue p0.d, vl4
224 ; CHECK-NEXT: uunpklo z0.s, z0.h
225 ; CHECK-NEXT: uunpklo z0.d, z0.s
226 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
227 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
229 %res = fptoui <2 x half> %op1 to <2 x i64>
233 define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
234 ; CHECK-LABEL: fcvtzu_v4f16_v4i64:
236 ; CHECK-NEXT: ldr d0, [x0]
237 ; CHECK-NEXT: ptrue p0.d, vl4
238 ; CHECK-NEXT: uunpklo z0.s, z0.h
239 ; CHECK-NEXT: uunpklo z0.d, z0.s
240 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
241 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
243 %op1 = load <4 x half>, ptr %a
244 %res = fptoui <4 x half> %op1 to <4 x i64>
245 store <4 x i64> %res, ptr %b
249 define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) #0 {
250 ; VBITS_GE_256-LABEL: fcvtzu_v8f16_v8i64:
251 ; VBITS_GE_256: // %bb.0:
252 ; VBITS_GE_256-NEXT: ldr q0, [x0]
253 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
254 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
255 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
256 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
257 ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
258 ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
259 ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
260 ; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.h
261 ; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.h
262 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
263 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
264 ; VBITS_GE_256-NEXT: ret
266 ; VBITS_GE_512-LABEL: fcvtzu_v8f16_v8i64:
267 ; VBITS_GE_512: // %bb.0:
268 ; VBITS_GE_512-NEXT: ldr q0, [x0]
269 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
270 ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
271 ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
272 ; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.h
273 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
274 ; VBITS_GE_512-NEXT: ret
275 %op1 = load <8 x half>, ptr %a
276 %res = fptoui <8 x half> %op1 to <8 x i64>
277 store <8 x i64> %res, ptr %b
281 define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
282 ; CHECK-LABEL: fcvtzu_v16f16_v16i64:
284 ; CHECK-NEXT: ptrue p0.d, vl16
285 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
286 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
287 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
289 %op1 = load <16 x half>, ptr %a
290 %res = fptoui <16 x half> %op1 to <16 x i64>
291 store <16 x i64> %res, ptr %b
295 define void @fcvtzu_v32f16_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
296 ; CHECK-LABEL: fcvtzu_v32f16_v32i64:
298 ; CHECK-NEXT: ptrue p0.d, vl32
299 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
300 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
301 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
303 %op1 = load <32 x half>, ptr %a
304 %res = fptoui <32 x half> %op1 to <32 x i64>
305 store <32 x i64> %res, ptr %b
313 ; Don't use SVE for 64-bit vectors.
314 define <2 x i16> @fcvtzu_v2f32_v2i16(<2 x float> %op1) vscale_range(2,0) #0 {
315 ; CHECK-LABEL: fcvtzu_v2f32_v2i16:
317 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s
319 %res = fptoui <2 x float> %op1 to <2 x i16>
323 ; Don't use SVE for 128-bit vectors.
324 define <4 x i16> @fcvtzu_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 {
325 ; CHECK-LABEL: fcvtzu_v4f32_v4i16:
327 ; CHECK-NEXT: fcvtzu v1.4s, v0.4s
328 ; CHECK-NEXT: mov w8, v1.s[1]
329 ; CHECK-NEXT: mov v0.16b, v1.16b
330 ; CHECK-NEXT: mov w9, v1.s[2]
331 ; CHECK-NEXT: mov v0.h[1], w8
332 ; CHECK-NEXT: mov w8, v1.s[3]
333 ; CHECK-NEXT: mov v0.h[2], w9
334 ; CHECK-NEXT: mov v0.h[3], w8
335 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
337 %res = fptoui <4 x float> %op1 to <4 x i16>
341 define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
342 ; CHECK-LABEL: fcvtzu_v8f32_v8i16:
344 ; CHECK-NEXT: ptrue p0.s, vl8
345 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
346 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
347 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
348 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
350 %op1 = load <8 x float>, ptr %a
351 %res = fptoui <8 x float> %op1 to <8 x i16>
355 define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) #0 {
356 ; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i16:
357 ; VBITS_GE_256: // %bb.0:
358 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
359 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
360 ; VBITS_GE_256-NEXT: ptrue p1.h, vl16
361 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
362 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
363 ; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.s
364 ; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.s
365 ; VBITS_GE_256-NEXT: ptrue p0.h, vl8
366 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
367 ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
368 ; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h
369 ; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x1]
370 ; VBITS_GE_256-NEXT: ret
372 ; VBITS_GE_512-LABEL: fcvtzu_v16f32_v16i16:
373 ; VBITS_GE_512: // %bb.0:
374 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
375 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
376 ; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.s
377 ; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
378 ; VBITS_GE_512-NEXT: ret
379 %op1 = load <16 x float>, ptr %a
380 %res = fptoui <16 x float> %op1 to <16 x i16>
381 store <16 x i16> %res, ptr %b
385 define void @fcvtzu_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
386 ; CHECK-LABEL: fcvtzu_v32f32_v32i16:
388 ; CHECK-NEXT: ptrue p0.s, vl32
389 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
390 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
391 ; CHECK-NEXT: st1h { z0.s }, p0, [x1]
393 %op1 = load <32 x float>, ptr %a
394 %res = fptoui <32 x float> %op1 to <32 x i16>
395 store <32 x i16> %res, ptr %b
399 define void @fcvtzu_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
400 ; CHECK-LABEL: fcvtzu_v64f32_v64i16:
402 ; CHECK-NEXT: ptrue p0.s, vl64
403 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
404 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
405 ; CHECK-NEXT: st1h { z0.s }, p0, [x1]
407 %op1 = load <64 x float>, ptr %a
408 %res = fptoui <64 x float> %op1 to <64 x i16>
409 store <64 x i16> %res, ptr %b
417 ; Don't use SVE for 64-bit vectors.
418 define <2 x i32> @fcvtzu_v2f32_v2i32(<2 x float> %op1) vscale_range(2,0) #0 {
419 ; CHECK-LABEL: fcvtzu_v2f32_v2i32:
421 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s
423 %res = fptoui <2 x float> %op1 to <2 x i32>
427 ; Don't use SVE for 128-bit vectors.
428 define <4 x i32> @fcvtzu_v4f32_v4i32(<4 x float> %op1) vscale_range(2,0) #0 {
429 ; CHECK-LABEL: fcvtzu_v4f32_v4i32:
431 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s
433 %res = fptoui <4 x float> %op1 to <4 x i32>
437 define void @fcvtzu_v8f32_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
438 ; CHECK-LABEL: fcvtzu_v8f32_v8i32:
440 ; CHECK-NEXT: ptrue p0.s, vl8
441 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
442 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
443 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
445 %op1 = load <8 x float>, ptr %a
446 %res = fptoui <8 x float> %op1 to <8 x i32>
447 store <8 x i32> %res, ptr %b
451 define void @fcvtzu_v16f32_v16i32(ptr %a, ptr %b) #0 {
452 ; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i32:
453 ; VBITS_GE_256: // %bb.0:
454 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
455 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
456 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
457 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
458 ; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.s
459 ; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.s
460 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
461 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
462 ; VBITS_GE_256-NEXT: ret
464 ; VBITS_GE_512-LABEL: fcvtzu_v16f32_v16i32:
465 ; VBITS_GE_512: // %bb.0:
466 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
467 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
468 ; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.s
469 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
470 ; VBITS_GE_512-NEXT: ret
471 %op1 = load <16 x float>, ptr %a
472 %res = fptoui <16 x float> %op1 to <16 x i32>
473 store <16 x i32> %res, ptr %b
477 define void @fcvtzu_v32f32_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
478 ; CHECK-LABEL: fcvtzu_v32f32_v32i32:
480 ; CHECK-NEXT: ptrue p0.s, vl32
481 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
482 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
483 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
485 %op1 = load <32 x float>, ptr %a
486 %res = fptoui <32 x float> %op1 to <32 x i32>
487 store <32 x i32> %res, ptr %b
491 define void @fcvtzu_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
492 ; CHECK-LABEL: fcvtzu_v64f32_v64i32:
494 ; CHECK-NEXT: ptrue p0.s, vl64
495 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
496 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
497 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
499 %op1 = load <64 x float>, ptr %a
500 %res = fptoui <64 x float> %op1 to <64 x i32>
501 store <64 x i32> %res, ptr %b
509 ; Don't use SVE for 64-bit vectors.
510 define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
511 ; CHECK-LABEL: fcvtzu_v1f32_v1i64:
513 ; CHECK-NEXT: fcvtl v0.2d, v0.2s
514 ; CHECK-NEXT: fcvtzu v0.2d, v0.2d
515 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
517 %res = fptoui <1 x float> %op1 to <1 x i64>
521 ; Don't use SVE for 128-bit vectors.
522 define <2 x i64> @fcvtzu_v2f32_v2i64(<2 x float> %op1) vscale_range(2,0) #0 {
523 ; CHECK-LABEL: fcvtzu_v2f32_v2i64:
525 ; CHECK-NEXT: fcvtl v0.2d, v0.2s
526 ; CHECK-NEXT: fcvtzu v0.2d, v0.2d
528 %res = fptoui <2 x float> %op1 to <2 x i64>
532 define void @fcvtzu_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
533 ; CHECK-LABEL: fcvtzu_v4f32_v4i64:
535 ; CHECK-NEXT: ptrue p0.d, vl4
536 ; CHECK-NEXT: ldr q0, [x0]
537 ; CHECK-NEXT: uunpklo z0.d, z0.s
538 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
539 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
541 %op1 = load <4 x float>, ptr %a
542 %res = fptoui <4 x float> %op1 to <4 x i64>
543 store <4 x i64> %res, ptr %b
547 define void @fcvtzu_v8f32_v8i64(ptr %a, ptr %b) #0 {
548 ; VBITS_GE_256-LABEL: fcvtzu_v8f32_v8i64:
549 ; VBITS_GE_256: // %bb.0:
550 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
551 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
552 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
553 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
554 ; VBITS_GE_256-NEXT: uunpklo z1.d, z0.s
555 ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
556 ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
557 ; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.s
558 ; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.s
559 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
560 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
561 ; VBITS_GE_256-NEXT: ret
563 ; VBITS_GE_512-LABEL: fcvtzu_v8f32_v8i64:
564 ; VBITS_GE_512: // %bb.0:
565 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
566 ; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [x0]
567 ; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.s
568 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
569 ; VBITS_GE_512-NEXT: ret
570 %op1 = load <8 x float>, ptr %a
571 %res = fptoui <8 x float> %op1 to <8 x i64>
572 store <8 x i64> %res, ptr %b
576 define void @fcvtzu_v16f32_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
577 ; CHECK-LABEL: fcvtzu_v16f32_v16i64:
579 ; CHECK-NEXT: ptrue p0.d, vl16
580 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
581 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
582 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
584 %op1 = load <16 x float>, ptr %a
585 %res = fptoui <16 x float> %op1 to <16 x i64>
586 store <16 x i64> %res, ptr %b
590 define void @fcvtzu_v32f32_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
591 ; CHECK-LABEL: fcvtzu_v32f32_v32i64:
593 ; CHECK-NEXT: ptrue p0.d, vl32
594 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
595 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
596 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
598 %op1 = load <32 x float>, ptr %a
599 %res = fptoui <32 x float> %op1 to <32 x i64>
600 store <32 x i64> %res, ptr %b
609 ; v1f64 is perfered to be widened to v4f64, so use SVE
610 define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
611 ; CHECK-LABEL: fcvtzu_v1f64_v1i16:
613 ; CHECK-NEXT: ptrue p0.d, vl4
614 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
615 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
616 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
617 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
618 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
620 %res = fptoui <1 x double> %op1 to <1 x i16>
624 ; Don't use SVE for 128-bit vectors.
625 define <2 x i16> @fcvtzu_v2f64_v2i16(<2 x double> %op1) vscale_range(2,0) #0 {
626 ; CHECK-LABEL: fcvtzu_v2f64_v2i16:
628 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d
629 ; CHECK-NEXT: xtn v0.2s, v0.2d
631 %res = fptoui <2 x double> %op1 to <2 x i16>
635 define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
636 ; CHECK-LABEL: fcvtzu_v4f64_v4i16:
638 ; CHECK-NEXT: ptrue p0.d, vl4
639 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
640 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
641 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
642 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
643 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
645 %op1 = load <4 x double>, ptr %a
646 %res = fptoui <4 x double> %op1 to <4 x i16>
650 define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) #0 {
651 ; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i16:
652 ; VBITS_GE_256: // %bb.0:
653 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
654 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
655 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
656 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
657 ; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d
658 ; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d
659 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
660 ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
661 ; VBITS_GE_256-NEXT: uzp1 z2.h, z0.h, z0.h
662 ; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
663 ; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0]
664 ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0
665 ; VBITS_GE_256-NEXT: ret
667 ; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i16:
668 ; VBITS_GE_512: // %bb.0:
669 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
670 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
671 ; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d
672 ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
673 ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
674 ; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
675 ; VBITS_GE_512-NEXT: ret
676 %op1 = load <8 x double>, ptr %a
677 %res = fptoui <8 x double> %op1 to <8 x i16>
681 define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
682 ; CHECK-LABEL: fcvtzu_v16f64_v16i16:
684 ; CHECK-NEXT: ptrue p0.d, vl16
685 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
686 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
687 ; CHECK-NEXT: st1h { z0.d }, p0, [x1]
689 %op1 = load <16 x double>, ptr %a
690 %res = fptoui <16 x double> %op1 to <16 x i16>
691 store <16 x i16> %res, ptr %b
695 define void @fcvtzu_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
696 ; CHECK-LABEL: fcvtzu_v32f64_v32i16:
698 ; CHECK-NEXT: ptrue p0.d, vl32
699 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
700 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
701 ; CHECK-NEXT: st1h { z0.d }, p0, [x1]
703 %op1 = load <32 x double>, ptr %a
704 %res = fptoui <32 x double> %op1 to <32 x i16>
705 store <32 x i16> %res, ptr %b
713 ; Don't use SVE for 64-bit vectors.
714 define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
715 ; CHECK-LABEL: fcvtzu_v1f64_v1i32:
717 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
718 ; CHECK-NEXT: fcvtzu v0.2d, v0.2d
719 ; CHECK-NEXT: xtn v0.2s, v0.2d
721 %res = fptoui <1 x double> %op1 to <1 x i32>
725 ; Don't use SVE for 128-bit vectors.
726 define <2 x i32> @fcvtzu_v2f64_v2i32(<2 x double> %op1) vscale_range(2,0) #0 {
727 ; CHECK-LABEL: fcvtzu_v2f64_v2i32:
729 ; CHECK-NEXT: fcvtzu v0.2d, v0.2d
730 ; CHECK-NEXT: xtn v0.2s, v0.2d
732 %res = fptoui <2 x double> %op1 to <2 x i32>
736 define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
737 ; CHECK-LABEL: fcvtzu_v4f64_v4i32:
739 ; CHECK-NEXT: ptrue p0.d, vl4
740 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
741 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
742 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
743 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
745 %op1 = load <4 x double>, ptr %a
746 %res = fptoui <4 x double> %op1 to <4 x i32>
750 define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) #0 {
751 ; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i32:
752 ; VBITS_GE_256: // %bb.0:
753 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
754 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
755 ; VBITS_GE_256-NEXT: ptrue p1.s, vl8
756 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
757 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
758 ; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d
759 ; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d
760 ; VBITS_GE_256-NEXT: ptrue p0.s, vl4
761 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
762 ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
763 ; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
764 ; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x1]
765 ; VBITS_GE_256-NEXT: ret
767 ; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i32:
768 ; VBITS_GE_512: // %bb.0:
769 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
770 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
771 ; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d
772 ; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
773 ; VBITS_GE_512-NEXT: ret
774 %op1 = load <8 x double>, ptr %a
775 %res = fptoui <8 x double> %op1 to <8 x i32>
776 store <8 x i32> %res, ptr %b
780 define void @fcvtzu_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
781 ; CHECK-LABEL: fcvtzu_v16f64_v16i32:
783 ; CHECK-NEXT: ptrue p0.d, vl16
784 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
785 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
786 ; CHECK-NEXT: st1w { z0.d }, p0, [x1]
788 %op1 = load <16 x double>, ptr %a
789 %res = fptoui <16 x double> %op1 to <16 x i32>
790 store <16 x i32> %res, ptr %b
794 define void @fcvtzu_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
795 ; CHECK-LABEL: fcvtzu_v32f64_v32i32:
797 ; CHECK-NEXT: ptrue p0.d, vl32
798 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
799 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
800 ; CHECK-NEXT: st1w { z0.d }, p0, [x1]
802 %op1 = load <32 x double>, ptr %a
803 %res = fptoui <32 x double> %op1 to <32 x i32>
804 store <32 x i32> %res, ptr %b
812 ; Don't use SVE for 64-bit vectors.
813 define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 {
814 ; CHECK-LABEL: fcvtzu_v1f64_v1i64:
816 ; CHECK-NEXT: fcvtzu x8, d0
817 ; CHECK-NEXT: fmov d0, x8
819 %res = fptoui <1 x double> %op1 to <1 x i64>
823 ; Don't use SVE for 128-bit vectors.
824 define <2 x i64> @fcvtzu_v2f64_v2i64(<2 x double> %op1) vscale_range(2,0) #0 {
825 ; CHECK-LABEL: fcvtzu_v2f64_v2i64:
827 ; CHECK-NEXT: fcvtzu v0.2d, v0.2d
829 %res = fptoui <2 x double> %op1 to <2 x i64>
833 define void @fcvtzu_v4f64_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
834 ; CHECK-LABEL: fcvtzu_v4f64_v4i64:
836 ; CHECK-NEXT: ptrue p0.d, vl4
837 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
838 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
839 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
841 %op1 = load <4 x double>, ptr %a
842 %res = fptoui <4 x double> %op1 to <4 x i64>
843 store <4 x i64> %res, ptr %b
847 define void @fcvtzu_v8f64_v8i64(ptr %a, ptr %b) #0 {
848 ; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i64:
849 ; VBITS_GE_256: // %bb.0:
850 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
851 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
852 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
853 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
854 ; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d
855 ; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d
856 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
857 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
858 ; VBITS_GE_256-NEXT: ret
860 ; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i64:
861 ; VBITS_GE_512: // %bb.0:
862 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
863 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
864 ; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d
865 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
866 ; VBITS_GE_512-NEXT: ret
867 %op1 = load <8 x double>, ptr %a
868 %res = fptoui <8 x double> %op1 to <8 x i64>
869 store <8 x i64> %res, ptr %b
873 define void @fcvtzu_v16f64_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
874 ; CHECK-LABEL: fcvtzu_v16f64_v16i64:
876 ; CHECK-NEXT: ptrue p0.d, vl16
877 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
878 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
879 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
881 %op1 = load <16 x double>, ptr %a
882 %res = fptoui <16 x double> %op1 to <16 x i64>
883 store <16 x i64> %res, ptr %b
887 define void @fcvtzu_v32f64_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
888 ; CHECK-LABEL: fcvtzu_v32f64_v32i64:
890 ; CHECK-NEXT: ptrue p0.d, vl32
891 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
892 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
893 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
895 %op1 = load <32 x double>, ptr %a
896 %res = fptoui <32 x double> %op1 to <32 x i64>
897 store <32 x i64> %res, ptr %b
905 ; Don't use SVE for 64-bit vectors.
906 define <4 x i16> @fcvtzs_v4f16_v4i16(<4 x half> %op1) vscale_range(2,0) #0 {
907 ; CHECK-LABEL: fcvtzs_v4f16_v4i16:
909 ; CHECK-NEXT: fcvtzs v0.4h, v0.4h
911 %res = fptosi <4 x half> %op1 to <4 x i16>
915 ; Don't use SVE for 128-bit vectors.
916 define void @fcvtzs_v8f16_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
917 ; CHECK-LABEL: fcvtzs_v8f16_v8i16:
919 ; CHECK-NEXT: ldr q0, [x0]
920 ; CHECK-NEXT: fcvtzs v0.8h, v0.8h
921 ; CHECK-NEXT: str q0, [x1]
923 %op1 = load <8 x half>, ptr %a
924 %res = fptosi <8 x half> %op1 to <8 x i16>
925 store <8 x i16> %res, ptr %b
929 define void @fcvtzs_v16f16_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
930 ; CHECK-LABEL: fcvtzs_v16f16_v16i16:
932 ; CHECK-NEXT: ptrue p0.h, vl16
933 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
934 ; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
935 ; CHECK-NEXT: st1h { z0.h }, p0, [x1]
937 %op1 = load <16 x half>, ptr %a
938 %res = fptosi <16 x half> %op1 to <16 x i16>
939 store <16 x i16> %res, ptr %b
943 define void @fcvtzs_v32f16_v32i16(ptr %a, ptr %b) #0 {
944 ; VBITS_GE_256-LABEL: fcvtzs_v32f16_v32i16:
945 ; VBITS_GE_256: // %bb.0:
946 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
947 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
948 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
949 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
950 ; VBITS_GE_256-NEXT: fcvtzs z0.h, p0/m, z0.h
951 ; VBITS_GE_256-NEXT: fcvtzs z1.h, p0/m, z1.h
952 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
953 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
954 ; VBITS_GE_256-NEXT: ret
956 ; VBITS_GE_512-LABEL: fcvtzs_v32f16_v32i16:
957 ; VBITS_GE_512: // %bb.0:
958 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
959 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
960 ; VBITS_GE_512-NEXT: fcvtzs z0.h, p0/m, z0.h
961 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
962 ; VBITS_GE_512-NEXT: ret
963 %op1 = load <32 x half>, ptr %a
964 %res = fptosi <32 x half> %op1 to <32 x i16>
965 store <32 x i16> %res, ptr %b
969 define void @fcvtzs_v64f16_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
970 ; CHECK-LABEL: fcvtzs_v64f16_v64i16:
972 ; CHECK-NEXT: ptrue p0.h, vl64
973 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
974 ; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
975 ; CHECK-NEXT: st1h { z0.h }, p0, [x1]
977 %op1 = load <64 x half>, ptr %a
978 %res = fptosi <64 x half> %op1 to <64 x i16>
979 store <64 x i16> %res, ptr %b
983 define void @fcvtzs_v128f16_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
984 ; CHECK-LABEL: fcvtzs_v128f16_v128i16:
986 ; CHECK-NEXT: ptrue p0.h, vl128
987 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
988 ; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
989 ; CHECK-NEXT: st1h { z0.h }, p0, [x1]
991 %op1 = load <128 x half>, ptr %a
992 %res = fptosi <128 x half> %op1 to <128 x i16>
993 store <128 x i16> %res, ptr %b
1001 ; Don't use SVE for 64-bit vectors.
1002 define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 {
1003 ; CHECK-LABEL: fcvtzs_v2f16_v2i32:
1005 ; CHECK-NEXT: fcvtl v0.4s, v0.4h
1006 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s
1007 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1009 %res = fptosi <2 x half> %op1 to <2 x i32>
1013 ; Don't use SVE for 128-bit vectors.
1014 define <4 x i32> @fcvtzs_v4f16_v4i32(<4 x half> %op1) vscale_range(2,0) #0 {
1015 ; CHECK-LABEL: fcvtzs_v4f16_v4i32:
1017 ; CHECK-NEXT: fcvtl v0.4s, v0.4h
1018 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s
1020 %res = fptosi <4 x half> %op1 to <4 x i32>
1024 define void @fcvtzs_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
1025 ; CHECK-LABEL: fcvtzs_v8f16_v8i32:
1027 ; CHECK-NEXT: ptrue p0.s, vl8
1028 ; CHECK-NEXT: ldr q0, [x0]
1029 ; CHECK-NEXT: uunpklo z0.s, z0.h
1030 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
1031 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
1033 %op1 = load <8 x half>, ptr %a
1034 %res = fptosi <8 x half> %op1 to <8 x i32>
1035 store <8 x i32> %res, ptr %b
1039 define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) #0 {
1040 ; VBITS_GE_256-LABEL: fcvtzs_v16f16_v16i32:
1041 ; VBITS_GE_256: // %bb.0:
1042 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
1043 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
1044 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
1045 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
1046 ; VBITS_GE_256-NEXT: uunpklo z1.s, z0.h
1047 ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
1048 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
1049 ; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.h
1050 ; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.h
1051 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
1052 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
1053 ; VBITS_GE_256-NEXT: ret
1055 ; VBITS_GE_512-LABEL: fcvtzs_v16f16_v16i32:
1056 ; VBITS_GE_512: // %bb.0:
1057 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
1058 ; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0]
1059 ; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.h
1060 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
1061 ; VBITS_GE_512-NEXT: ret
1062 %op1 = load <16 x half>, ptr %a
1063 %res = fptosi <16 x half> %op1 to <16 x i32>
1064 store <16 x i32> %res, ptr %b
1068 define void @fcvtzs_v32f16_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
1069 ; CHECK-LABEL: fcvtzs_v32f16_v32i32:
1071 ; CHECK-NEXT: ptrue p0.s, vl32
1072 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
1073 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
1074 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
1076 %op1 = load <32 x half>, ptr %a
1077 %res = fptosi <32 x half> %op1 to <32 x i32>
1078 store <32 x i32> %res, ptr %b
1082 define void @fcvtzs_v64f16_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1083 ; CHECK-LABEL: fcvtzs_v64f16_v64i32:
1085 ; CHECK-NEXT: ptrue p0.s, vl64
1086 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
1087 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
1088 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
1090 %op1 = load <64 x half>, ptr %a
1091 %res = fptosi <64 x half> %op1 to <64 x i32>
1092 store <64 x i32> %res, ptr %b
1100 ; Don't use SVE for 64-bit vectors.
1101 define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 {
1102 ; CHECK-LABEL: fcvtzs_v1f16_v1i64:
1104 ; CHECK-NEXT: fcvtzs x8, h0
1105 ; CHECK-NEXT: fmov d0, x8
1107 %res = fptosi <1 x half> %op1 to <1 x i64>
1111 ; v2f16 is not legal for NEON, so use SVE
1112 define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 {
1113 ; CHECK-LABEL: fcvtzs_v2f16_v2i64:
1115 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1116 ; CHECK-NEXT: ptrue p0.d, vl4
1117 ; CHECK-NEXT: uunpklo z0.s, z0.h
1118 ; CHECK-NEXT: uunpklo z0.d, z0.s
1119 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
1120 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
1122 %res = fptosi <2 x half> %op1 to <2 x i64>
1126 define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1127 ; CHECK-LABEL: fcvtzs_v4f16_v4i64:
1129 ; CHECK-NEXT: ldr d0, [x0]
1130 ; CHECK-NEXT: ptrue p0.d, vl4
1131 ; CHECK-NEXT: uunpklo z0.s, z0.h
1132 ; CHECK-NEXT: uunpklo z0.d, z0.s
1133 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
1134 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
1136 %op1 = load <4 x half>, ptr %a
1137 %res = fptosi <4 x half> %op1 to <4 x i64>
1138 store <4 x i64> %res, ptr %b
1142 define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) #0 {
1143 ; VBITS_GE_256-LABEL: fcvtzs_v8f16_v8i64:
1144 ; VBITS_GE_256: // %bb.0:
1145 ; VBITS_GE_256-NEXT: ldr q0, [x0]
1146 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
1147 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
1148 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1149 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
1150 ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
1151 ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
1152 ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
1153 ; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.h
1154 ; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.h
1155 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
1156 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
1157 ; VBITS_GE_256-NEXT: ret
1159 ; VBITS_GE_512-LABEL: fcvtzs_v8f16_v8i64:
1160 ; VBITS_GE_512: // %bb.0:
1161 ; VBITS_GE_512-NEXT: ldr q0, [x0]
1162 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
1163 ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
1164 ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
1165 ; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.h
1166 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
1167 ; VBITS_GE_512-NEXT: ret
1168 %op1 = load <8 x half>, ptr %a
1169 %res = fptosi <8 x half> %op1 to <8 x i64>
1170 store <8 x i64> %res, ptr %b
1174 define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
1175 ; CHECK-LABEL: fcvtzs_v16f16_v16i64:
1177 ; CHECK-NEXT: ptrue p0.d, vl16
1178 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
1179 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
1180 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
1182 %op1 = load <16 x half>, ptr %a
1183 %res = fptosi <16 x half> %op1 to <16 x i64>
1184 store <16 x i64> %res, ptr %b
1188 define void @fcvtzs_v32f16_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
1189 ; CHECK-LABEL: fcvtzs_v32f16_v32i64:
1191 ; CHECK-NEXT: ptrue p0.d, vl32
1192 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
1193 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
1194 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
1196 %op1 = load <32 x half>, ptr %a
1197 %res = fptosi <32 x half> %op1 to <32 x i64>
1198 store <32 x i64> %res, ptr %b
1206 ; Don't use SVE for 64-bit vectors.
1207 define <2 x i16> @fcvtzs_v2f32_v2i16(<2 x float> %op1) vscale_range(2,0) #0 {
1208 ; CHECK-LABEL: fcvtzs_v2f32_v2i16:
1210 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s
1212 %res = fptosi <2 x float> %op1 to <2 x i16>
1216 ; Don't use SVE for 128-bit vectors.
1217 define <4 x i16> @fcvtzs_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 {
1218 ; CHECK-LABEL: fcvtzs_v4f32_v4i16:
1220 ; CHECK-NEXT: fcvtzs v1.4s, v0.4s
1221 ; CHECK-NEXT: mov w8, v1.s[1]
1222 ; CHECK-NEXT: mov v0.16b, v1.16b
1223 ; CHECK-NEXT: mov w9, v1.s[2]
1224 ; CHECK-NEXT: mov v0.h[1], w8
1225 ; CHECK-NEXT: mov w8, v1.s[3]
1226 ; CHECK-NEXT: mov v0.h[2], w9
1227 ; CHECK-NEXT: mov v0.h[3], w8
1228 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1230 %res = fptosi <4 x float> %op1 to <4 x i16>
1234 define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
1235 ; CHECK-LABEL: fcvtzs_v8f32_v8i16:
1237 ; CHECK-NEXT: ptrue p0.s, vl8
1238 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1239 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
1240 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
1241 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
1243 %op1 = load <8 x float>, ptr %a
1244 %res = fptosi <8 x float> %op1 to <8 x i16>
1248 define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) #0 {
1249 ; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i16:
1250 ; VBITS_GE_256: // %bb.0:
1251 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
1252 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
1253 ; VBITS_GE_256-NEXT: ptrue p1.h, vl16
1254 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1255 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
1256 ; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.s
1257 ; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.s
1258 ; VBITS_GE_256-NEXT: ptrue p0.h, vl8
1259 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
1260 ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
1261 ; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h
1262 ; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x1]
1263 ; VBITS_GE_256-NEXT: ret
1265 ; VBITS_GE_512-LABEL: fcvtzs_v16f32_v16i16:
1266 ; VBITS_GE_512: // %bb.0:
1267 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
1268 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
1269 ; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.s
1270 ; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
1271 ; VBITS_GE_512-NEXT: ret
1272 %op1 = load <16 x float>, ptr %a
1273 %res = fptosi <16 x float> %op1 to <16 x i16>
1274 store <16 x i16> %res, ptr %b
1278 define void @fcvtzs_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
1279 ; CHECK-LABEL: fcvtzs_v32f32_v32i16:
1281 ; CHECK-NEXT: ptrue p0.s, vl32
1282 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1283 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
1284 ; CHECK-NEXT: st1h { z0.s }, p0, [x1]
1286 %op1 = load <32 x float>, ptr %a
1287 %res = fptosi <32 x float> %op1 to <32 x i16>
1288 store <32 x i16> %res, ptr %b
1292 define void @fcvtzs_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
1293 ; CHECK-LABEL: fcvtzs_v64f32_v64i16:
1295 ; CHECK-NEXT: ptrue p0.s, vl64
1296 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1297 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
1298 ; CHECK-NEXT: st1h { z0.s }, p0, [x1]
1300 %op1 = load <64 x float>, ptr %a
1301 %res = fptosi <64 x float> %op1 to <64 x i16>
1302 store <64 x i16> %res, ptr %b
1310 ; Don't use SVE for 64-bit vectors.
1311 define <2 x i32> @fcvtzs_v2f32_v2i32(<2 x float> %op1) vscale_range(2,0) #0 {
1312 ; CHECK-LABEL: fcvtzs_v2f32_v2i32:
1314 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s
1316 %res = fptosi <2 x float> %op1 to <2 x i32>
1320 ; Don't use SVE for 128-bit vectors.
1321 define <4 x i32> @fcvtzs_v4f32_v4i32(<4 x float> %op1) vscale_range(2,0) #0 {
1322 ; CHECK-LABEL: fcvtzs_v4f32_v4i32:
1324 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s
1326 %res = fptosi <4 x float> %op1 to <4 x i32>
1330 define void @fcvtzs_v8f32_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
1331 ; CHECK-LABEL: fcvtzs_v8f32_v8i32:
1333 ; CHECK-NEXT: ptrue p0.s, vl8
1334 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1335 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
1336 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
1338 %op1 = load <8 x float>, ptr %a
1339 %res = fptosi <8 x float> %op1 to <8 x i32>
1340 store <8 x i32> %res, ptr %b
1344 define void @fcvtzs_v16f32_v16i32(ptr %a, ptr %b) #0 {
1345 ; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i32:
1346 ; VBITS_GE_256: // %bb.0:
1347 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
1348 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
1349 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1350 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
1351 ; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.s
1352 ; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.s
1353 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
1354 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
1355 ; VBITS_GE_256-NEXT: ret
1357 ; VBITS_GE_512-LABEL: fcvtzs_v16f32_v16i32:
1358 ; VBITS_GE_512: // %bb.0:
1359 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
1360 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
1361 ; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.s
1362 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
1363 ; VBITS_GE_512-NEXT: ret
1364 %op1 = load <16 x float>, ptr %a
1365 %res = fptosi <16 x float> %op1 to <16 x i32>
1366 store <16 x i32> %res, ptr %b
1370 define void @fcvtzs_v32f32_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
1371 ; CHECK-LABEL: fcvtzs_v32f32_v32i32:
1373 ; CHECK-NEXT: ptrue p0.s, vl32
1374 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1375 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
1376 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
1378 %op1 = load <32 x float>, ptr %a
1379 %res = fptosi <32 x float> %op1 to <32 x i32>
1380 store <32 x i32> %res, ptr %b
1384 define void @fcvtzs_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1385 ; CHECK-LABEL: fcvtzs_v64f32_v64i32:
1387 ; CHECK-NEXT: ptrue p0.s, vl64
1388 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1389 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
1390 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
1392 %op1 = load <64 x float>, ptr %a
1393 %res = fptosi <64 x float> %op1 to <64 x i32>
1394 store <64 x i32> %res, ptr %b
1402 ; Don't use SVE for 64-bit vectors.
1403 define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
1404 ; CHECK-LABEL: fcvtzs_v1f32_v1i64:
1406 ; CHECK-NEXT: fcvtl v0.2d, v0.2s
1407 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d
1408 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1410 %res = fptosi <1 x float> %op1 to <1 x i64>
1414 ; Don't use SVE for 128-bit vectors.
1415 define <2 x i64> @fcvtzs_v2f32_v2i64(<2 x float> %op1) vscale_range(2,0) #0 {
1416 ; CHECK-LABEL: fcvtzs_v2f32_v2i64:
1418 ; CHECK-NEXT: fcvtl v0.2d, v0.2s
1419 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d
1421 %res = fptosi <2 x float> %op1 to <2 x i64>
1425 define void @fcvtzs_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1426 ; CHECK-LABEL: fcvtzs_v4f32_v4i64:
1428 ; CHECK-NEXT: ptrue p0.d, vl4
1429 ; CHECK-NEXT: ldr q0, [x0]
1430 ; CHECK-NEXT: uunpklo z0.d, z0.s
1431 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
1432 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
1434 %op1 = load <4 x float>, ptr %a
1435 %res = fptosi <4 x float> %op1 to <4 x i64>
1436 store <4 x i64> %res, ptr %b
1440 define void @fcvtzs_v8f32_v8i64(ptr %a, ptr %b) #0 {
1441 ; VBITS_GE_256-LABEL: fcvtzs_v8f32_v8i64:
1442 ; VBITS_GE_256: // %bb.0:
1443 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
1444 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
1445 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
1446 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
1447 ; VBITS_GE_256-NEXT: uunpklo z1.d, z0.s
1448 ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
1449 ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
1450 ; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.s
1451 ; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.s
1452 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
1453 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
1454 ; VBITS_GE_256-NEXT: ret
1456 ; VBITS_GE_512-LABEL: fcvtzs_v8f32_v8i64:
1457 ; VBITS_GE_512: // %bb.0:
1458 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
1459 ; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [x0]
1460 ; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.s
1461 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
1462 ; VBITS_GE_512-NEXT: ret
1463 %op1 = load <8 x float>, ptr %a
1464 %res = fptosi <8 x float> %op1 to <8 x i64>
1465 store <8 x i64> %res, ptr %b
1469 define void @fcvtzs_v16f32_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
1470 ; CHECK-LABEL: fcvtzs_v16f32_v16i64:
1472 ; CHECK-NEXT: ptrue p0.d, vl16
1473 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
1474 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
1475 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
1477 %op1 = load <16 x float>, ptr %a
1478 %res = fptosi <16 x float> %op1 to <16 x i64>
1479 store <16 x i64> %res, ptr %b
1483 define void @fcvtzs_v32f32_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
1484 ; CHECK-LABEL: fcvtzs_v32f32_v32i64:
1486 ; CHECK-NEXT: ptrue p0.d, vl32
1487 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
1488 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
1489 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
1491 %op1 = load <32 x float>, ptr %a
1492 %res = fptosi <32 x float> %op1 to <32 x i64>
1493 store <32 x i64> %res, ptr %b
1502 ; v1f64 is perfered to be widened to v4f64, so use SVE
1503 define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
1504 ; CHECK-LABEL: fcvtzs_v1f64_v1i16:
1506 ; CHECK-NEXT: ptrue p0.d, vl4
1507 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1508 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1509 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
1510 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
1511 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1513 %res = fptosi <1 x double> %op1 to <1 x i16>
1517 ; Don't use SVE for 128-bit vectors.
1518 define <2 x i16> @fcvtzs_v2f64_v2i16(<2 x double> %op1) vscale_range(2,0) #0 {
1519 ; CHECK-LABEL: fcvtzs_v2f64_v2i16:
1521 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d
1522 ; CHECK-NEXT: xtn v0.2s, v0.2d
1524 %res = fptosi <2 x double> %op1 to <2 x i16>
1528 define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
1529 ; CHECK-LABEL: fcvtzs_v4f64_v4i16:
1531 ; CHECK-NEXT: ptrue p0.d, vl4
1532 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1533 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1534 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
1535 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
1536 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1538 %op1 = load <4 x double>, ptr %a
1539 %res = fptosi <4 x double> %op1 to <4 x i16>
1543 define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) #0 {
1544 ; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i16:
1545 ; VBITS_GE_256: // %bb.0:
1546 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
1547 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
1548 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1549 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
1550 ; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d
1551 ; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d
1552 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
1553 ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
1554 ; VBITS_GE_256-NEXT: uzp1 z2.h, z0.h, z0.h
1555 ; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
1556 ; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0]
1557 ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0
1558 ; VBITS_GE_256-NEXT: ret
1560 ; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i16:
1561 ; VBITS_GE_512: // %bb.0:
1562 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
1563 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
1564 ; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d
1565 ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
1566 ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
1567 ; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
1568 ; VBITS_GE_512-NEXT: ret
1569 %op1 = load <8 x double>, ptr %a
1570 %res = fptosi <8 x double> %op1 to <8 x i16>
1574 define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
1575 ; CHECK-LABEL: fcvtzs_v16f64_v16i16:
1577 ; CHECK-NEXT: ptrue p0.d, vl16
1578 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1579 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1580 ; CHECK-NEXT: st1h { z0.d }, p0, [x1]
1582 %op1 = load <16 x double>, ptr %a
1583 %res = fptosi <16 x double> %op1 to <16 x i16>
1584 store <16 x i16> %res, ptr %b
1588 define void @fcvtzs_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
1589 ; CHECK-LABEL: fcvtzs_v32f64_v32i16:
1591 ; CHECK-NEXT: ptrue p0.d, vl32
1592 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1593 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1594 ; CHECK-NEXT: st1h { z0.d }, p0, [x1]
1596 %op1 = load <32 x double>, ptr %a
1597 %res = fptosi <32 x double> %op1 to <32 x i16>
1598 store <32 x i16> %res, ptr %b
1606 ; Don't use SVE for 64-bit vectors.
1607 define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
1608 ; CHECK-LABEL: fcvtzs_v1f64_v1i32:
1610 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1611 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d
1612 ; CHECK-NEXT: xtn v0.2s, v0.2d
1614 %res = fptosi <1 x double> %op1 to <1 x i32>
1618 ; Don't use SVE for 128-bit vectors.
1619 define <2 x i32> @fcvtzs_v2f64_v2i32(<2 x double> %op1) vscale_range(2,0) #0 {
1620 ; CHECK-LABEL: fcvtzs_v2f64_v2i32:
1622 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d
1623 ; CHECK-NEXT: xtn v0.2s, v0.2d
1625 %res = fptosi <2 x double> %op1 to <2 x i32>
1629 define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
1630 ; CHECK-LABEL: fcvtzs_v4f64_v4i32:
1632 ; CHECK-NEXT: ptrue p0.d, vl4
1633 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1634 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1635 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
1636 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
1638 %op1 = load <4 x double>, ptr %a
1639 %res = fptosi <4 x double> %op1 to <4 x i32>
1643 define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) #0 {
1644 ; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i32:
1645 ; VBITS_GE_256: // %bb.0:
1646 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
1647 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
1648 ; VBITS_GE_256-NEXT: ptrue p1.s, vl8
1649 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1650 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
1651 ; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d
1652 ; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d
1653 ; VBITS_GE_256-NEXT: ptrue p0.s, vl4
1654 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
1655 ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
1656 ; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
1657 ; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x1]
1658 ; VBITS_GE_256-NEXT: ret
1660 ; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i32:
1661 ; VBITS_GE_512: // %bb.0:
1662 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
1663 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
1664 ; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d
1665 ; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
1666 ; VBITS_GE_512-NEXT: ret
1667 %op1 = load <8 x double>, ptr %a
1668 %res = fptosi <8 x double> %op1 to <8 x i32>
1669 store <8 x i32> %res, ptr %b
1673 define void @fcvtzs_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
1674 ; CHECK-LABEL: fcvtzs_v16f64_v16i32:
1676 ; CHECK-NEXT: ptrue p0.d, vl16
1677 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1678 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1679 ; CHECK-NEXT: st1w { z0.d }, p0, [x1]
1681 %op1 = load <16 x double>, ptr %a
1682 %res = fptosi <16 x double> %op1 to <16 x i32>
1683 store <16 x i32> %res, ptr %b
1687 define void @fcvtzs_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1688 ; CHECK-LABEL: fcvtzs_v32f64_v32i32:
1690 ; CHECK-NEXT: ptrue p0.d, vl32
1691 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1692 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1693 ; CHECK-NEXT: st1w { z0.d }, p0, [x1]
1695 %op1 = load <32 x double>, ptr %a
1696 %res = fptosi <32 x double> %op1 to <32 x i32>
1697 store <32 x i32> %res, ptr %b
1705 ; Don't use SVE for 64-bit vectors.
1706 define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 {
1707 ; CHECK-LABEL: fcvtzs_v1f64_v1i64:
1709 ; CHECK-NEXT: fcvtzs x8, d0
1710 ; CHECK-NEXT: fmov d0, x8
1712 %res = fptosi <1 x double> %op1 to <1 x i64>
1716 ; Don't use SVE for 128-bit vectors.
1717 define <2 x i64> @fcvtzs_v2f64_v2i64(<2 x double> %op1) vscale_range(2,0) #0 {
1718 ; CHECK-LABEL: fcvtzs_v2f64_v2i64:
1720 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d
1722 %res = fptosi <2 x double> %op1 to <2 x i64>
1726 define void @fcvtzs_v4f64_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1727 ; CHECK-LABEL: fcvtzs_v4f64_v4i64:
1729 ; CHECK-NEXT: ptrue p0.d, vl4
1730 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1731 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1732 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
1734 %op1 = load <4 x double>, ptr %a
1735 %res = fptosi <4 x double> %op1 to <4 x i64>
1736 store <4 x i64> %res, ptr %b
1740 define void @fcvtzs_v8f64_v8i64(ptr %a, ptr %b) #0 {
1741 ; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i64:
1742 ; VBITS_GE_256: // %bb.0:
1743 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
1744 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
1745 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1746 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
1747 ; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d
1748 ; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d
1749 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
1750 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
1751 ; VBITS_GE_256-NEXT: ret
1753 ; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i64:
1754 ; VBITS_GE_512: // %bb.0:
1755 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
1756 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
1757 ; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d
1758 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
1759 ; VBITS_GE_512-NEXT: ret
1760 %op1 = load <8 x double>, ptr %a
1761 %res = fptosi <8 x double> %op1 to <8 x i64>
1762 store <8 x i64> %res, ptr %b
1766 define void @fcvtzs_v16f64_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
1767 ; CHECK-LABEL: fcvtzs_v16f64_v16i64:
1769 ; CHECK-NEXT: ptrue p0.d, vl16
1770 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1771 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1772 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
1774 %op1 = load <16 x double>, ptr %a
1775 %res = fptosi <16 x double> %op1 to <16 x i64>
1776 store <16 x i64> %res, ptr %b
1780 define void @fcvtzs_v32f64_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
1781 ; CHECK-LABEL: fcvtzs_v32f64_v32i64:
1783 ; CHECK-NEXT: ptrue p0.d, vl32
1784 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1785 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1786 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
1788 %op1 = load <32 x double>, ptr %a
1789 %res = fptosi <32 x double> %op1 to <32 x i64>
1790 store <32 x i64> %res, ptr %b
1794 attributes #0 = { "target-features"="+sve" }