1 ; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
4 ; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5 ; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 ; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
7 ; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
18 target triple = "aarch64-unknown-linux-gnu"
20 ; Don't use SVE when its registers are no bigger than NEON.
27 ; Don't use SVE for 64-bit vectors.
28 define <4 x half> @ucvtf_v4i16_v4f16(<4 x i16> %op1) #0 {
29 ; CHECK-LABEL: ucvtf_v4i16_v4f16:
30 ; CHECK: ucvtf v0.4h, v0.4h
32 %res = uitofp <4 x i16> %op1 to <4 x half>
36 ; Don't use SVE for 128-bit vectors.
37 define void @ucvtf_v8i16_v8f16(<8 x i16>* %a, <8 x half>* %b) #0 {
38 ; CHECK-LABEL: ucvtf_v8i16_v8f16:
40 ; CHECK-NEXT: ucvtf v0.8h, v0.8h
41 ; CHECK-NEXT: str q0, [x1]
43 %op1 = load <8 x i16>, <8 x i16>* %a
44 %res = uitofp <8 x i16> %op1 to <8 x half>
45 store <8 x half> %res, <8 x half>* %b
49 define void @ucvtf_v16i16_v16f16(<16 x i16>* %a, <16 x half>* %b) #0 {
50 ; CHECK-LABEL: ucvtf_v16i16_v16f16:
51 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
52 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
53 ; CHECK-NEXT: ucvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
54 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
56 %op1 = load <16 x i16>, <16 x i16>* %a
57 %res = uitofp <16 x i16> %op1 to <16 x half>
58 store <16 x half> %res, <16 x half>* %b
62 define void @ucvtf_v32i16_v32f16(<32 x i16>* %a, <32 x half>* %b) #0 {
63 ; CHECK-LABEL: ucvtf_v32i16_v32f16:
64 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
65 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
66 ; VBITS_GE_512-NEXT: ucvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
67 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
68 ; VBITS_GE_512-NEXT: ret
70 ; Ensure sensible type legalisation.
71 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
72 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
73 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
74 ; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
75 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[LO]].h
76 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[HI]].h
77 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x1]
78 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1]
79 ; VBITS_EQ_256-NEXT: ret
80 %op1 = load <32 x i16>, <32 x i16>* %a
81 %res = uitofp <32 x i16> %op1 to <32 x half>
82 store <32 x half> %res, <32 x half>* %b
86 define void @ucvtf_v64i16_v64f16(<64 x i16>* %a, <64 x half>* %b) #0 {
87 ; CHECK-LABEL: ucvtf_v64i16_v64f16:
88 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
89 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
90 ; VBITS_GE_1024-NEXT: ucvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
91 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
92 ; VBITS_GE_1024-NEXT: ret
93 %op1 = load <64 x i16>, <64 x i16>* %a
94 %res = uitofp <64 x i16> %op1 to <64 x half>
95 store <64 x half> %res, <64 x half>* %b
99 define void @ucvtf_v128i16_v128f16(<128 x i16>* %a, <128 x half>* %b) #0 {
100 ; CHECK-LABEL: ucvtf_v128i16_v128f16:
101 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
102 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
103 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
104 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
105 ; VBITS_GE_2048-NEXT: ret
106 %op1 = load <128 x i16>, <128 x i16>* %a
107 %res = uitofp <128 x i16> %op1 to <128 x half>
108 store <128 x half> %res, <128 x half>* %b
116 ; Don't use SVE for 64-bit vectors.
117 define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) #0 {
118 ; CHECK-LABEL: ucvtf_v2i16_v2f32:
119 ; CHECK: movi d1, #0x00ffff0000ffff
120 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
121 ; CHECK-NEXT: ucvtf v0.2s, v0.2s
123 %res = uitofp <2 x i16> %op1 to <2 x float>
127 ; Don't use SVE for 128-bit vectors.
128 define <4 x float> @ucvtf_v4i16_v4f32(<4 x i16> %op1) #0 {
129 ; CHECK-LABEL: ucvtf_v4i16_v4f32:
130 ; CHECK: ucvtf v0.4s, v0.4s
132 %res = uitofp <4 x i16> %op1 to <4 x float>
136 define void @ucvtf_v8i16_v8f32(<8 x i16>* %a, <8 x float>* %b) #0 {
137 ; CHECK-LABEL: ucvtf_v8i16_v8f32:
138 ; CHECK: ldr q[[OP:[0-9]+]], [x0]
139 ; CHECK-NEXT: ptrue [[PG:p[0-9]+]].s, vl8
140 ; CHECK-NEXT: uunpklo [[UPK:z[0-9]+]].s, z[[OP]].h
141 ; CHECK-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[UPK]].s
142 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
144 %op1 = load <8 x i16>, <8 x i16>* %a
145 %res = uitofp <8 x i16> %op1 to <8 x float>
146 store <8 x float> %res, <8 x float>* %b
150 define void @ucvtf_v16i16_v16f32(<16 x i16>* %a, <16 x float>* %b) #0 {
151 ; CHECK-LABEL: ucvtf_v16i16_v16f32:
152 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].h, vl16
153 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
154 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].s, vl16
155 ; VBITS_GE_512-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[OP]].h
156 ; VBITS_GE_512-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG2]]/m, [[UPK]].s
157 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG1]], [x1]
158 ; VBITS_GE_512-NEXT: ret
160 ; Ensure sensible type legalisation - fixed type extract_subvector codegen is poor currently.
161 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].h, vl16
162 ; VBITS_EQ_256-DAG: ld1h { [[VEC:z[0-9]+]].h }, [[PG1]]/z, [x0]
163 ; VBITS_EQ_256-DAG: mov x8, sp
164 ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8]
165 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
166 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8
167 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
168 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h
169 ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h
170 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].s
171 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].s
172 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1]
173 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2]
174 %op1 = load <16 x i16>, <16 x i16>* %a
175 %res = uitofp <16 x i16> %op1 to <16 x float>
176 store <16 x float> %res, <16 x float>* %b
180 define void @ucvtf_v32i16_v32f32(<32 x i16>* %a, <32 x float>* %b) #0 {
181 ; CHECK-LABEL: ucvtf_v32i16_v32f32:
182 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].h, vl32
183 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
184 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].s, vl32
185 ; VBITS_GE_1024-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[OP]].h
186 ; VBITS_GE_1024-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG2]]/m, [[UPK]].s
187 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG1]], [x1]
188 ; VBITS_GE_1024-NEXT: ret
189 %op1 = load <32 x i16>, <32 x i16>* %a
190 %res = uitofp <32 x i16> %op1 to <32 x float>
191 store <32 x float> %res, <32 x float>* %b
195 define void @ucvtf_v64i16_v64f32(<64 x i16>* %a, <64 x float>* %b) #0 {
196 ; CHECK-LABEL: ucvtf_v64i16_v64f32:
197 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].h, vl64
198 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
199 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].s, vl64
200 ; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[OP]].h
201 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG2]]/m, [[UPK]].s
202 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG1]], [x1]
203 ; VBITS_GE_2048-NEXT: ret
204 %op1 = load <64 x i16>, <64 x i16>* %a
205 %res = uitofp <64 x i16> %op1 to <64 x float>
206 store <64 x float> %res, <64 x float>* %b
214 ; v1i16 is perfered to be widened to v4i16, which pushes the output into SVE types, so use SVE
215 define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) #0 {
216 ; CHECK-LABEL: ucvtf_v1i16_v1f64:
217 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
218 ; CHECK-NEXT: uunpklo [[UPK1:z[0-9]+]].s, z0.h
219 ; CHECK-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
220 ; CHECK-NEXT: ucvtf z0.d, [[PG]]/m, [[UPK2]].d
222 %res = uitofp <1 x i16> %op1 to <1 x double>
223 ret <1 x double> %res
226 ; Don't use SVE for 128-bit vectors.
227 define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) #0 {
228 ; CHECK-LABEL: ucvtf_v2i16_v2f64:
229 ; CHECK: movi d1, #0x00ffff0000ffff
230 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
231 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
232 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
234 %res = uitofp <2 x i16> %op1 to <2 x double>
235 ret <2 x double> %res
238 define void @ucvtf_v4i16_v4f64(<4 x i16>* %a, <4 x double>* %b) #0 {
239 ; CHECK-LABEL: ucvtf_v4i16_v4f64:
240 ; CHECK: ldr d[[OP:[0-9]+]], [x0]
241 ; CHECK-NEXT: ptrue [[PG:p[0-9]+]].d, vl4
242 ; CHECK-NEXT: uunpklo [[UPK1:z[0-9]+]].s, z[[OP]].h
243 ; CHECK-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
244 ; CHECK-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[UPK2]].d
245 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
247 %op1 = load <4 x i16>, <4 x i16>* %a
248 %res = uitofp <4 x i16> %op1 to <4 x double>
249 store <4 x double> %res, <4 x double>* %b
253 define void @ucvtf_v8i16_v8f64(<8 x i16>* %a, <8 x double>* %b) #0 {
254 ; CHECK-LABEL: ucvtf_v8i16_v8f64:
255 ; VBITS_GE_512: ldr q[[OP:[0-9]+]], [x0]
256 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].d, vl8
257 ; VBITS_GE_512-NEXT: uunpklo [[UPK1:z[0-9]+]].s, z[[OP]].h
258 ; VBITS_GE_512-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
259 ; VBITS_GE_512-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[UPK2]].d
260 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
261 ; VBITS_GE_512-NEXT: ret
263 ; Ensure sensible type legalisation.
264 ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0]
265 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
266 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
267 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[LO:[0-9]+]].16b, v[[OP]].16b, #8
268 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_LO:z[0-9]+]].s, z[[LO]].h
269 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h
270 ; VBITS_EQ_256-DAG: uunpklo [[UPK2_LO:z[0-9]+]].d, [[UPK1_LO]].s
271 ; VBITS_EQ_256-DAG: uunpklo [[UPK2_HI:z[0-9]+]].d, [[UPK1_HI]].s
272 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK2_LO]].d
273 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK2_HI]].d
274 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
275 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
276 ; VBITS_EQ_256-NEXT: ret
277 %op1 = load <8 x i16>, <8 x i16>* %a
278 %res = uitofp <8 x i16> %op1 to <8 x double>
279 store <8 x double> %res, <8 x double>* %b
283 define void @ucvtf_v16i16_v16f64(<16 x i16>* %a, <16 x double>* %b) #0 {
284 ; CHECK-LABEL: ucvtf_v16i16_v16f64:
285 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].h, vl16
286 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
287 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d, vl16
288 ; VBITS_GE_1024-NEXT: uunpklo [[UPK1:z[0-9]+]].s, [[OP]].h
289 ; VBITS_GE_1024-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
290 ; VBITS_GE_1024-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK2]].d
291 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
292 ; VBITS_GE_1024-NEXT: ret
293 %op1 = load <16 x i16>, <16 x i16>* %a
294 %res = uitofp <16 x i16> %op1 to <16 x double>
295 store <16 x double> %res, <16 x double>* %b
299 define void @ucvtf_v32i16_v32f64(<32 x i16>* %a, <32 x double>* %b) #0 {
300 ; CHECK-LABEL: ucvtf_v32i16_v32f64:
301 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].h, vl32
302 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
303 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d, vl32
304 ; VBITS_GE_2048-NEXT: uunpklo [[UPK1:z[0-9]+]].s, [[OP]].h
305 ; VBITS_GE_2048-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK]].s
306 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK2]].d
307 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
308 ; VBITS_GE_2048-NEXT: ret
309 %op1 = load <32 x i16>, <32 x i16>* %a
310 %res = uitofp <32 x i16> %op1 to <32 x double>
311 store <32 x double> %res, <32 x double>* %b
319 ; Don't use SVE for 64-bit vectors.
320 define <2 x half> @ucvtf_v2i32_v2f16(<2 x i32> %op1) #0 {
321 ; CHECK-LABEL: ucvtf_v2i32_v2f16:
322 ; CHECK: ucvtf v0.4s, v0.4s
323 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
325 %res = uitofp <2 x i32> %op1 to <2 x half>
329 ; Don't use SVE for 128-bit vectors.
330 define <4 x half> @ucvtf_v4i32_v4f16(<4 x i32> %op1) #0 {
331 ; CHECK-LABEL: ucvtf_v4i32_v4f16:
332 ; CHECK: ucvtf v0.4s, v0.4s
333 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
335 %res = uitofp <4 x i32> %op1 to <4 x half>
339 define <8 x half> @ucvtf_v8i32_v8f16(<8 x i32>* %a) #0 {
340 ; CHECK-LABEL: ucvtf_v8i32_v8f16:
341 ; CHECK: ptrue [[PG1:p[0-9]+]].s, vl8
342 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
343 ; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].s
344 ; CHECK-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].s
345 ; CHECK-NEXT: uzp1 z0.h, [[CVT]].h, [[CVT]].h
347 %op1 = load <8 x i32>, <8 x i32>* %a
348 %res = uitofp <8 x i32> %op1 to <8 x half>
352 define void @ucvtf_v16i32_v16f16(<16 x i32>* %a, <16 x half>* %b) #0 {
353 ; CHECK-LABEL: ucvtf_v16i32_v16f16:
354 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].s, vl16
355 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
356 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].s
357 ; VBITS_GE_512-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].s
358 ; VBITS_GE_512-NEXT: uzp1 [[RES:z[0-9]+]].h, [[CVT]].h, [[CVT]].h
359 ; VBITS_GE_512-NEXT: ptrue [[PG3:p[0-9]+]].h, vl16
360 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
361 ; VBITS_GE_512-NEXT: ret
363 ; Ensure sensible type legalisation.
364 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
365 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
366 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
367 ; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
368 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s
369 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8
370 ; VBITS_EQ_256-DAG: ucvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].s
371 ; VBITS_EQ_256-DAG: ucvtf [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].s
372 ; VBITS_EQ_256-DAG: uzp1 [[RES_LO:z[0-9]+]].h, [[CVT_LO]].h, [[CVT_LO]].h
373 ; VBITS_EQ_256-DAG: uzp1 [[RES_HI:z[0-9]+]].h, [[CVT_HI]].h, [[CVT_HI]].h
374 ; VBITS_EQ_256-DAG: splice [[RES:z[0-9]+]].h, [[PG3]], [[RES_LO]].h, [[RES_HI]].h
375 ; VBITS_EQ_256-DAG: ptrue [[PG4:p[0-9]+]].h, vl16
376 ; VBITS_EQ_256-NEXT: st1h { [[RES]].h }, [[PG4]], [x1]
377 ; VBITS_EQ_256-NEXT: ret
378 %op1 = load <16 x i32>, <16 x i32>* %a
379 %res = uitofp <16 x i32> %op1 to <16 x half>
380 store <16 x half> %res, <16 x half>* %b
384 define void @ucvtf_v32i32_v32f16(<32 x i32>* %a, <32 x half>* %b) #0 {
385 ; CHECK-LABEL: ucvtf_v32i32_v32f16:
386 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].s, vl32
387 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
388 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].s
389 ; VBITS_GE_1024-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].s
390 ; VBITS_GE_1024-NEXT: uzp1 [[RES:z[0-9]+]].h, [[CVT]].h, [[CVT]].h
391 ; VBITS_GE_1024-NEXT: ptrue [[PG3:p[0-9]+]].h, vl32
392 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
393 ; VBITS_GE_1024-NEXT: ret
394 %op1 = load <32 x i32>, <32 x i32>* %a
395 %res = uitofp <32 x i32> %op1 to <32 x half>
396 store <32 x half> %res, <32 x half>* %b
400 define void @ucvtf_v64i32_v64f16(<64 x i32>* %a, <64 x half>* %b) #0 {
401 ; CHECK-LABEL: ucvtf_v64i32_v64f16:
402 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].s, vl64
403 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
404 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].s
405 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].h, [[PG2]]/m, [[UPK]].s
406 ; VBITS_GE_2048-NEXT: uzp1 [[RES:z[0-9]+]].h, [[CVT]].h, [[CVT]].h
407 ; VBITS_GE_2048-NEXT: ptrue [[PG3:p[0-9]+]].h, vl64
408 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
409 ; VBITS_GE_2048-NEXT: ret
410 %op1 = load <64 x i32>, <64 x i32>* %a
411 %res = uitofp <64 x i32> %op1 to <64 x half>
412 store <64 x half> %res, <64 x half>* %b
420 ; Don't use SVE for 64-bit vectors.
421 define <2 x float> @ucvtf_v2i32_v2f32(<2 x i32> %op1) #0 {
422 ; CHECK-LABEL: ucvtf_v2i32_v2f32:
423 ; CHECK: ucvtf v0.2s, v0.2s
425 %res = uitofp <2 x i32> %op1 to <2 x float>
429 ; Don't use SVE for 128-bit vectors.
430 define <4 x float> @ucvtf_v4i32_v4f32(<4 x i32> %op1) #0 {
431 ; CHECK-LABEL: ucvtf_v4i32_v4f32:
432 ; CHECK: ucvtf v0.4s, v0.4s
434 %res = uitofp <4 x i32> %op1 to <4 x float>
438 define void @ucvtf_v8i32_v8f32(<8 x i32>* %a, <8 x float>* %b) #0 {
439 ; CHECK-LABEL: ucvtf_v8i32_v8f32:
440 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
441 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
442 ; CHECK-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
443 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
445 %op1 = load <8 x i32>, <8 x i32>* %a
446 %res = uitofp <8 x i32> %op1 to <8 x float>
447 store <8 x float> %res, <8 x float>* %b
451 define void @ucvtf_v16i32_v16f32(<16 x i32>* %a, <16 x float>* %b) #0 {
452 ; CHECK-LABEL: ucvtf_v16i32_v16f32:
453 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
454 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
455 ; VBITS_GE_512-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
456 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
457 ; VBITS_GE_512-NEXT: ret
459 ; Ensure sensible type legalisation.
460 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
461 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
462 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
463 ; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
464 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[LO]].s
465 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[HI]].s
466 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x1]
467 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2]
468 ; VBITS_EQ_256-NEXT: ret
469 %op1 = load <16 x i32>, <16 x i32>* %a
470 %res = uitofp <16 x i32> %op1 to <16 x float>
471 store <16 x float> %res, <16 x float>* %b
475 define void @ucvtf_v32i32_v32f32(<32 x i32>* %a, <32 x float>* %b) #0 {
476 ; CHECK-LABEL: ucvtf_v32i32_v32f32:
477 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
478 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
479 ; VBITS_GE_1024-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
480 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
481 ; VBITS_GE_1024-NEXT: ret
482 %op1 = load <32 x i32>, <32 x i32>* %a
483 %res = uitofp <32 x i32> %op1 to <32 x float>
484 store <32 x float> %res, <32 x float>* %b
488 define void @ucvtf_v64i32_v64f32(<64 x i32>* %a, <64 x float>* %b) #0 {
489 ; CHECK-LABEL: ucvtf_v64i32_v64f32:
490 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
491 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
492 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
493 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
494 ; VBITS_GE_2048-NEXT: ret
495 %op1 = load <64 x i32>, <64 x i32>* %a
496 %res = uitofp <64 x i32> %op1 to <64 x float>
497 store <64 x float> %res, <64 x float>* %b
505 ; Don't use SVE for 64-bit vectors.
506 define <1 x double> @ucvtf_v1i32_v1f64(<1 x i32> %op1) #0 {
507 ; CHECK-LABEL: ucvtf_v1i32_v1f64:
508 ; CHECK: ushll v0.2d, v0.2s, #0
509 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
511 %res = uitofp <1 x i32> %op1 to <1 x double>
512 ret <1 x double> %res
515 ; Don't use SVE for 128-bit vectors.
516 define <2 x double> @ucvtf_v2i32_v2f64(<2 x i32> %op1) #0 {
517 ; CHECK-LABEL: ucvtf_v2i32_v2f64:
518 ; CHECK: ushll v0.2d, v0.2s, #0
519 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
521 %res = uitofp <2 x i32> %op1 to <2 x double>
522 ret <2 x double> %res
525 define void @ucvtf_v4i32_v4f64(<4 x i32>* %a, <4 x double>* %b) #0 {
526 ; CHECK-LABEL: ucvtf_v4i32_v4f64:
527 ; CHECK: ldr q[[OP:[0-9]+]], [x0]
528 ; CHECK-NEXT: ptrue [[PG:p[0-9]+]].d, vl4
529 ; CHECK-NEXT: uunpklo [[UPK:z[0-9]+]].d, z[[OP]].s
530 ; CHECK-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[UPK]].d
531 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
533 %op1 = load <4 x i32>, <4 x i32>* %a
534 %res = uitofp <4 x i32> %op1 to <4 x double>
535 store <4 x double> %res, <4 x double>* %b
539 define void @ucvtf_v8i32_v8f64(<8 x i32>* %a, <8 x double>* %b) #0 {
540 ; CHECK-LABEL: ucvtf_v8i32_v8f64:
541 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].s, vl8
542 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
543 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].d, vl8
544 ; VBITS_GE_512-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[OP]].s
545 ; VBITS_GE_512-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG1]]/m, [[UPK]].d
546 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
547 ; VBITS_GE_512-NEXT: ret
549 ; Ensure sensible type legalisation - fixed type extract_subvector codegen is poor currently.
550 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
551 ; VBITS_EQ_256-DAG: ld1w { [[VEC:z[0-9]+]].s }, [[PG1]]/z, [x0]
552 ; VBITS_EQ_256-DAG: mov x8, sp
553 ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8]
554 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
555 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4
556 ; VBITS_EQ_256-DAG: mov x[[NUMELTS]], #4
557 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s
558 ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s
559 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].d
560 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].d
561 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
562 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
563 %op1 = load <8 x i32>, <8 x i32>* %a
564 %res = uitofp <8 x i32> %op1 to <8 x double>
565 store <8 x double> %res, <8 x double>* %b
569 define void @ucvtf_v16i32_v16f64(<16 x i32>* %a, <16 x double>* %b) #0 {
570 ; CHECK-LABEL: ucvtf_v16i32_v16f64:
571 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].s, vl16
572 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
573 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d, vl16
574 ; VBITS_GE_1024-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[OP]].s
575 ; VBITS_GE_1024-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK]].d
576 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
577 ; VBITS_GE_1024-NEXT: ret
578 %op1 = load <16 x i32>, <16 x i32>* %a
579 %res = uitofp <16 x i32> %op1 to <16 x double>
580 store <16 x double> %res, <16 x double>* %b
584 define void @ucvtf_v32i32_v32f64(<32 x i32>* %a, <32 x double>* %b) #0 {
585 ; CHECK-LABEL: ucvtf_v32i32_v32f64:
586 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].s, vl32
587 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
588 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d, vl32
589 ; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[OP]].s
590 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK]].d
591 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
592 ; VBITS_GE_2048-NEXT: ret
593 %op1 = load <32 x i32>, <32 x i32>* %a
594 %res = uitofp <32 x i32> %op1 to <32 x double>
595 store <32 x double> %res, <32 x double>* %b
604 ; Don't use SVE for 64-bit vectors.
605 define <1 x half> @ucvtf_v1i64_v1f16(<1 x i64> %op1) #0 {
606 ; CHECK-LABEL: ucvtf_v1i64_v1f16:
608 ; CHECK-NEXT: ucvtf h0, x8
610 %res = uitofp <1 x i64> %op1 to <1 x half>
614 ; v2f16 is not legal for NEON, so use SVE
615 define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) #0 {
616 ; CHECK-LABEL: ucvtf_v2i64_v2f16:
617 ; CHECK: ptrue [[PG:p[0-9]+]].d
618 ; CHECK-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG]]/m, z0.d
619 ; CHECK-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
620 ; CHECK-NEXT: uzp1 z0.h, [[UZP]].h, [[UZP]].h
622 %res = uitofp <2 x i64> %op1 to <2 x half>
626 define <4 x half> @ucvtf_v4i64_v4f16(<4 x i64>* %a) #0 {
627 ; CHECK-LABEL: ucvtf_v4i64_v4f16:
628 ; CHECK: ptrue [[PG1:p[0-9]+]].d, vl4
629 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
630 ; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].d
631 ; CHECK-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
632 ; CHECK-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
633 ; CHECK-NEXT: uzp1 z0.h, [[UZP]].h, [[UZP]].h
635 %op1 = load <4 x i64>, <4 x i64>* %a
636 %res = uitofp <4 x i64> %op1 to <4 x half>
640 define <8 x half> @ucvtf_v8i64_v8f16(<8 x i64>* %a) #0 {
641 ; CHECK-LABEL: ucvtf_v8i64_v8f16:
642 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].d, vl8
643 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
644 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].d
645 ; VBITS_GE_512-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
646 ; VBITS_GE_512-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
647 ; VBITS_GE_512-NEXT: uzp1 z0.h, [[UZP]].h, [[UZP]].h
648 ; VBITS_GE_512-NEXT: ret
650 ; Ensure sensible type legalisation.
651 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
652 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
653 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
654 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
655 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
656 ; VBITS_EQ_256-DAG: ucvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].d
657 ; VBITS_EQ_256-DAG: ucvtf [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].d
658 ; VBITS_EQ_256-DAG: uzp1 [[UZP_LO:z[0-9]+]].s, [[CVT_LO]].s, [[CVT_LO]].s
659 ; VBITS_EQ_256-DAG: uzp1 [[UZP_HI:z[0-9]+]].s, [[CVT_HI]].s, [[CVT_HI]].s
660 ; VBITS_EQ_256-DAG: uzp1 z0.h, [[UZP_LO]].h, [[UZP_LO]].h
661 ; VBITS_EQ_256-DAG: uzp1 z[[RES_HI:[0-9]+]].h, [[UZP_HI]].h, [[UZP_HI]].h
662 ; VBITS_EQ_256-NEXT: mov v0.d[1], v[[RES_HI]].d[0]
663 ; VBITS_EQ_256-NEXT: ret
664 %op1 = load <8 x i64>, <8 x i64>* %a
665 %res = uitofp <8 x i64> %op1 to <8 x half>
669 define void @ucvtf_v16i64_v16f16(<16 x i64>* %a, <16 x half>* %b) #0 {
670 ; CHECK-LABEL: ucvtf_v16i64_v16f16:
671 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].d, vl16
672 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
673 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d
674 ; VBITS_GE_1024-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
675 ; VBITS_GE_1024-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
676 ; VBITS_GE_1024-NEXT: uzp1 [[RES:z[0-9]+]].h, [[UZP]].h, [[UZP]].h
677 ; VBITS_GE_1024-NEXT: ptrue [[PG3:p[0-9]+]].h, vl16
678 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
679 ; VBITS_GE_1024-NEXT: ret
680 %op1 = load <16 x i64>, <16 x i64>* %a
681 %res = uitofp <16 x i64> %op1 to <16 x half>
682 store <16 x half> %res, <16 x half>* %b
686 define void @ucvtf_v32i64_v32f16(<32 x i64>* %a, <32 x half>* %b) #0 {
687 ; CHECK-LABEL: ucvtf_v32i64_v32f16:
688 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].d, vl32
689 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
690 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d
691 ; VBITS_GE_2048-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
692 ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
693 ; VBITS_GE_2048-NEXT: uzp1 [[RES:z[0-9]+]].h, [[UZP]].h, [[UZP]].h
694 ; VBITS_GE_2048-NEXT: ptrue [[PG3:p[0-9]+]].h, vl32
695 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
696 ; VBITS_GE_2048-NEXT: ret
697 %op1 = load <32 x i64>, <32 x i64>* %a
698 %res = uitofp <32 x i64> %op1 to <32 x half>
699 store <32 x half> %res, <32 x half>* %b
707 ; Don't use SVE for 64-bit vectors.
708 define <1 x float> @ucvtf_v1i64_v1f32(<1 x i64> %op1) #0 {
709 ; CHECK-LABEL: ucvtf_v1i64_v1f32:
710 ; CHECK: ucvtf v0.2d, v0.2d
711 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
713 %res = uitofp <1 x i64> %op1 to <1 x float>
717 ; Don't use SVE for 128-bit vectors.
718 define <2 x float> @ucvtf_v2i64_v2f32(<2 x i64> %op1) #0 {
719 ; CHECK-LABEL: ucvtf_v2i64_v2f32:
720 ; CHECK: ucvtf v0.2d, v0.2d
721 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
723 %res = uitofp <2 x i64> %op1 to <2 x float>
727 define <4 x float> @ucvtf_v4i64_v4f32(<4 x i64>* %a) #0 {
728 ; CHECK-LABEL: ucvtf_v4i64_v4f32:
729 ; CHECK: ptrue [[PG1:p[0-9]+]].d, vl4
730 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
731 ; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].d
732 ; CHECK-NEXT: ucvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
733 ; CHECK-NEXT: uzp1 z0.s, [[CVT]].s, [[CVT]].s
735 %op1 = load <4 x i64>, <4 x i64>* %a
736 %res = uitofp <4 x i64> %op1 to <4 x float>
740 define void @ucvtf_v8i64_v8f32(<8 x i64>* %a, <8 x float>* %b) #0 {
741 ; CHECK-LABEL: ucvtf_v8i64_v8f32:
742 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].d, vl8
743 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
744 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].d
745 ; VBITS_GE_512-NEXT: ucvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
746 ; VBITS_GE_512-NEXT: uzp1 [[RES:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
747 ; VBITS_GE_512-NEXT: ptrue [[PG3:p[0-9]+]].s, vl8
748 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG3]], [x1]
749 ; VBITS_GE_512-NEXT: ret
751 ; Ensure sensible type legalisation.
752 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
753 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
754 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
755 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
756 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
757 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4
758 ; VBITS_EQ_256-DAG: ucvtf [[CVT_LO:z[0-9]+]].s, [[PG2]]/m, [[LO]].d
759 ; VBITS_EQ_256-DAG: ucvtf [[CVT_HI:z[0-9]+]].s, [[PG2]]/m, [[HI]].d
760 ; VBITS_EQ_256-DAG: uzp1 [[RES_LO:z[0-9]+]].s, [[CVT_LO]].s, [[CVT_LO]].s
761 ; VBITS_EQ_256-DAG: uzp1 [[RES_HI:z[0-9]+]].s, [[CVT_HI]].s, [[CVT_HI]].s
762 ; VBITS_EQ_256-DAG: splice [[RES:z[0-9]+]].s, [[PG3]], [[RES_LO]].s, [[RES_HI]].s
763 ; VBITS_EQ_256-DAG: ptrue [[PG4:p[0-9]+]].s, vl8
764 ; VBITS_EQ_256-NEXT: st1w { [[RES]].s }, [[PG4]], [x1]
765 ; VBITS_EQ_256-NEXT: ret
766 %op1 = load <8 x i64>, <8 x i64>* %a
767 %res = uitofp <8 x i64> %op1 to <8 x float>
768 store <8 x float> %res, <8 x float>* %b
772 define void @ucvtf_v16i64_v16f32(<16 x i64>* %a, <16 x float>* %b) #0 {
773 ; CHECK-LABEL: ucvtf_v16i64_v16f32:
774 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].d, vl16
775 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
776 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d
777 ; VBITS_GE_1024-NEXT: ucvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
778 ; VBITS_GE_1024-NEXT: uzp1 [[RES:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
779 ; VBITS_GE_1024-NEXT: ptrue [[PG3:p[0-9]+]].s, vl16
780 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG3]], [x1]
781 ; VBITS_GE_1024-NEXT: ret
782 %op1 = load <16 x i64>, <16 x i64>* %a
783 %res = uitofp <16 x i64> %op1 to <16 x float>
784 store <16 x float> %res, <16 x float>* %b
788 define void @ucvtf_v32i64_v32f32(<32 x i64>* %a, <32 x float>* %b) #0 {
789 ; CHECK-LABEL: ucvtf_v32i64_v32f32:
790 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].d, vl32
791 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
792 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d
793 ; VBITS_GE_2048-NEXT: ucvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
794 ; VBITS_GE_2048-NEXT: uzp1 [[RES:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
795 ; VBITS_GE_2048-NEXT: ptrue [[PG3:p[0-9]+]].s, vl32
796 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG3]], [x1]
797 ; VBITS_GE_2048-NEXT: ret
798 %op1 = load <32 x i64>, <32 x i64>* %a
799 %res = uitofp <32 x i64> %op1 to <32 x float>
800 store <32 x float> %res, <32 x float>* %b
808 ; Don't use SVE for 64-bit vectors.
809 define <1 x double> @ucvtf_v1i64_v1f64(<1 x i64> %op1) #0 {
810 ; CHECK-LABEL: ucvtf_v1i64_v1f64:
812 ; CHECK-NEXT: ucvtf d0, x8
814 %res = uitofp <1 x i64> %op1 to <1 x double>
815 ret <1 x double> %res
818 ; Don't use SVE for 128-bit vectors.
819 define <2 x double> @ucvtf_v2i64_v2f64(<2 x i64> %op1) #0 {
820 ; CHECK-LABEL: ucvtf_v2i64_v2f64:
821 ; CHECK: ucvtf v0.2d, v0.2d
823 %res = uitofp <2 x i64> %op1 to <2 x double>
824 ret <2 x double> %res
827 define void @ucvtf_v4i64_v4f64(<4 x i64>* %a, <4 x double>* %b) #0 {
828 ; CHECK-LABEL: ucvtf_v4i64_v4f64:
829 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
830 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
831 ; CHECK-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
832 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
834 %op1 = load <4 x i64>, <4 x i64>* %a
835 %res = uitofp <4 x i64> %op1 to <4 x double>
836 store <4 x double> %res, <4 x double>* %b
840 define void @ucvtf_v8i64_v8f64(<8 x i64>* %a, <8 x double>* %b) #0 {
841 ; CHECK-LABEL: ucvtf_v8i64_v8f64:
842 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
843 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
844 ; VBITS_GE_512-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
845 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
846 ; VBITS_GE_512-NEXT: ret
848 ; Ensure sensible type legalisation.
849 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
850 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
851 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
852 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
853 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[LO]].d
854 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[HI]].d
855 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1]
856 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3]
857 ; VBITS_EQ_256-NEXT: ret
858 %op1 = load <8 x i64>, <8 x i64>* %a
859 %res = uitofp <8 x i64> %op1 to <8 x double>
860 store <8 x double> %res, <8 x double>* %b
864 define void @ucvtf_v16i64_v16f64(<16 x i64>* %a, <16 x double>* %b) #0 {
865 ; CHECK-LABEL: ucvtf_v16i64_v16f64:
866 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
867 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
868 ; VBITS_GE_1024-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
869 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
870 ; VBITS_GE_1024-NEXT: ret
871 %op1 = load <16 x i64>, <16 x i64>* %a
872 %res = uitofp <16 x i64> %op1 to <16 x double>
873 store <16 x double> %res, <16 x double>* %b
877 define void @ucvtf_v32i64_v32f64(<32 x i64>* %a, <32 x double>* %b) #0 {
878 ; CHECK-LABEL: ucvtf_v32i64_v32f64:
879 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
880 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
881 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
882 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
883 ; VBITS_GE_2048-NEXT: ret
884 %op1 = load <32 x i64>, <32 x i64>* %a
885 %res = uitofp <32 x i64> %op1 to <32 x double>
886 store <32 x double> %res, <32 x double>* %b
894 ; Don't use SVE for 64-bit vectors.
895 define <4 x half> @scvtf_v4i16_v4f16(<4 x i16> %op1) #0 {
896 ; CHECK-LABEL: scvtf_v4i16_v4f16:
897 ; CHECK: scvtf v0.4h, v0.4h
899 %res = sitofp <4 x i16> %op1 to <4 x half>
903 ; Don't use SVE for 128-bit vectors.
904 define void @scvtf_v8i16_v8f16(<8 x i16>* %a, <8 x half>* %b) #0 {
905 ; CHECK-LABEL: scvtf_v8i16_v8f16:
906 ; CHECK: ldr q0, [x0]
907 ; CHECK-NEXT: scvtf v0.8h, v0.8h
908 ; CHECK-NEXT: str q0, [x1]
910 %op1 = load <8 x i16>, <8 x i16>* %a
911 %res = sitofp <8 x i16> %op1 to <8 x half>
912 store <8 x half> %res, <8 x half>* %b
916 define void @scvtf_v16i16_v16f16(<16 x i16>* %a, <16 x half>* %b) #0 {
917 ; CHECK-LABEL: scvtf_v16i16_v16f16:
918 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
919 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
920 ; CHECK-NEXT: scvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
921 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
923 %op1 = load <16 x i16>, <16 x i16>* %a
924 %res = sitofp <16 x i16> %op1 to <16 x half>
925 store <16 x half> %res, <16 x half>* %b
929 define void @scvtf_v32i16_v32f16(<32 x i16>* %a, <32 x half>* %b) #0 {
930 ; CHECK-LABEL: scvtf_v32i16_v32f16:
931 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
932 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
933 ; VBITS_GE_512-NEXT: scvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
934 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
935 ; VBITS_GE_512-NEXT: ret
937 ; Ensure sensible type legalisation.
938 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
939 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
940 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
941 ; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
942 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[LO]].h
943 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[HI]].h
944 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x1]
945 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1]
946 ; VBITS_EQ_256-NEXT: ret
947 %op1 = load <32 x i16>, <32 x i16>* %a
948 %res = sitofp <32 x i16> %op1 to <32 x half>
949 store <32 x half> %res, <32 x half>* %b
953 define void @scvtf_v64i16_v64f16(<64 x i16>* %a, <64 x half>* %b) #0 {
954 ; CHECK-LABEL: scvtf_v64i16_v64f16:
955 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
956 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
957 ; VBITS_GE_1024-NEXT: scvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
958 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
959 ; VBITS_GE_1024-NEXT: ret
960 %op1 = load <64 x i16>, <64 x i16>* %a
961 %res = sitofp <64 x i16> %op1 to <64 x half>
962 store <64 x half> %res, <64 x half>* %b
966 define void @scvtf_v128i16_v128f16(<128 x i16>* %a, <128 x half>* %b) #0 {
967 ; CHECK-LABEL: scvtf_v128i16_v128f16:
968 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
969 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
970 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
971 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
972 ; VBITS_GE_2048-NEXT: ret
973 %op1 = load <128 x i16>, <128 x i16>* %a
974 %res = sitofp <128 x i16> %op1 to <128 x half>
975 store <128 x half> %res, <128 x half>* %b
983 ; Don't use SVE for 64-bit vectors.
984 define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) #0 {
985 ; CHECK-LABEL: scvtf_v2i16_v2f32:
986 ; CHECK: shl v0.2s, v0.2s, #16
987 ; CHECK-NEXT: sshr v0.2s, v0.2s, #16
988 ; CHECK-NEXT: scvtf v0.2s, v0.2s
990 %res = sitofp <2 x i16> %op1 to <2 x float>
994 ; Don't use SVE for 128-bit vectors.
995 define <4 x float> @scvtf_v4i16_v4f32(<4 x i16> %op1) #0 {
996 ; CHECK-LABEL: scvtf_v4i16_v4f32:
997 ; CHECK: scvtf v0.4s, v0.4s
999 %res = sitofp <4 x i16> %op1 to <4 x float>
1000 ret <4 x float> %res
1003 define void @scvtf_v8i16_v8f32(<8 x i16>* %a, <8 x float>* %b) #0 {
1004 ; CHECK-LABEL: scvtf_v8i16_v8f32:
1005 ; CHECK: ldr q[[OP:[0-9]+]], [x0]
1006 ; CHECK-NEXT: ptrue [[PG:p[0-9]+]].s, vl8
1007 ; CHECK-NEXT: sunpklo [[UPK:z[0-9]+]].s, z[[OP]].h
1008 ; CHECK-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[UPK]].s
1009 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
1011 %op1 = load <8 x i16>, <8 x i16>* %a
1012 %res = sitofp <8 x i16> %op1 to <8 x float>
1013 store <8 x float> %res, <8 x float>* %b
1017 define void @scvtf_v16i16_v16f32(<16 x i16>* %a, <16 x float>* %b) #0 {
1018 ; CHECK-LABEL: scvtf_v16i16_v16f32:
1019 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].h, vl16
1020 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
1021 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].s, vl16
1022 ; VBITS_GE_512-NEXT: sunpklo [[UPK:z[0-9]+]].s, [[OP]].h
1023 ; VBITS_GE_512-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG2]]/m, [[UPK]].s
1024 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG1]], [x1]
1025 ; VBITS_GE_512-NEXT: ret
1027 ; Ensure sensible type legalisation - fixed type extract_subvector codegen is poor currently.
1028 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].h, vl16
1029 ; VBITS_EQ_256-DAG: ld1h { [[VEC:z[0-9]+]].h }, [[PG1]]/z, [x0]
1030 ; VBITS_EQ_256-DAG: mov x8, sp
1031 ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8]
1032 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
1033 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8
1034 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1035 ; VBITS_EQ_256-DAG: sunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h
1036 ; VBITS_EQ_256-DAG: sunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h
1037 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].s
1038 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].s
1039 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1]
1040 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2]
1041 %op1 = load <16 x i16>, <16 x i16>* %a
1042 %res = sitofp <16 x i16> %op1 to <16 x float>
1043 store <16 x float> %res, <16 x float>* %b
1047 define void @scvtf_v32i16_v32f32(<32 x i16>* %a, <32 x float>* %b) #0 {
1048 ; CHECK-LABEL: scvtf_v32i16_v32f32:
1049 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].h, vl32
1050 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
1051 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].s, vl32
1052 ; VBITS_GE_1024-NEXT: sunpklo [[UPK:z[0-9]+]].s, [[OP]].h
1053 ; VBITS_GE_1024-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG2]]/m, [[UPK]].s
1054 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG1]], [x1]
1055 ; VBITS_GE_1024-NEXT: ret
1056 %op1 = load <32 x i16>, <32 x i16>* %a
1057 %res = sitofp <32 x i16> %op1 to <32 x float>
1058 store <32 x float> %res, <32 x float>* %b
1062 define void @scvtf_v64i16_v64f32(<64 x i16>* %a, <64 x float>* %b) #0 {
1063 ; CHECK-LABEL: scvtf_v64i16_v64f32:
1064 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].h, vl64
1065 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
1066 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].s, vl64
1067 ; VBITS_GE_2048-NEXT: sunpklo [[UPK:z[0-9]+]].s, [[OP]].h
1068 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG2]]/m, [[UPK]].s
1069 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG1]], [x1]
1070 ; VBITS_GE_2048-NEXT: ret
1071 %op1 = load <64 x i16>, <64 x i16>* %a
1072 %res = sitofp <64 x i16> %op1 to <64 x float>
1073 store <64 x float> %res, <64 x float>* %b
1081 ; v1i16 is perfered to be widened to v4i16, which pushes the output into SVE types, so use SVE
1082 define <1 x double> @scvtf_v1i16_v1f64(<1 x i16> %op1) #0 {
1083 ; CHECK-LABEL: scvtf_v1i16_v1f64:
1084 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1085 ; CHECK-NEXT: sunpklo [[UPK1:z[0-9]+]].s, z0.h
1086 ; CHECK-NEXT: sunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
1087 ; CHECK-NEXT: scvtf z0.d, [[PG]]/m, [[UPK2]].d
1089 %res = sitofp <1 x i16> %op1 to <1 x double>
1090 ret <1 x double> %res
1093 ; Don't use SVE for 128-bit vectors.
1094 define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) #0 {
1095 ; CHECK-LABEL: scvtf_v2i16_v2f64:
1096 ; CHECK: shl v0.2s, v0.2s, #16
1097 ; CHECK-NEXT: sshr v0.2s, v0.2s, #16
1098 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0
1099 ; CHECK-NEXT: scvtf v0.2d, v0.2d
1101 %res = sitofp <2 x i16> %op1 to <2 x double>
1102 ret <2 x double> %res
1105 define void @scvtf_v4i16_v4f64(<4 x i16>* %a, <4 x double>* %b) #0 {
1106 ; CHECK-LABEL: scvtf_v4i16_v4f64:
1107 ; CHECK: ldr d[[OP:[0-9]+]], [x0]
1108 ; CHECK-NEXT: ptrue [[PG:p[0-9]+]].d, vl4
1109 ; CHECK-NEXT: sunpklo [[UPK1:z[0-9]+]].s, z[[OP]].h
1110 ; CHECK-NEXT: sunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
1111 ; CHECK-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[UPK2]].d
1112 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1114 %op1 = load <4 x i16>, <4 x i16>* %a
1115 %res = sitofp <4 x i16> %op1 to <4 x double>
1116 store <4 x double> %res, <4 x double>* %b
1120 define void @scvtf_v8i16_v8f64(<8 x i16>* %a, <8 x double>* %b) #0 {
1121 ; CHECK-LABEL: scvtf_v8i16_v8f64:
1122 ; VBITS_GE_512: ldr q[[OP:[0-9]+]], [x0]
1123 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].d, vl8
1124 ; VBITS_GE_512-NEXT: sunpklo [[UPK1:z[0-9]+]].s, z[[OP]].h
1125 ; VBITS_GE_512-NEXT: sunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
1126 ; VBITS_GE_512-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[UPK2]].d
1127 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1128 ; VBITS_GE_512-NEXT: ret
1130 ; Ensure sensible type legalisation.
1131 ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0]
1132 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
1133 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1134 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[LO:[0-9]+]].16b, v[[OP]].16b, #8
1135 ; VBITS_EQ_256-DAG: sunpklo [[UPK1_LO:z[0-9]+]].s, z[[LO]].h
1136 ; VBITS_EQ_256-DAG: sunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h
1137 ; VBITS_EQ_256-DAG: sunpklo [[UPK2_LO:z[0-9]+]].d, [[UPK1_LO]].s
1138 ; VBITS_EQ_256-DAG: sunpklo [[UPK2_HI:z[0-9]+]].d, [[UPK1_HI]].s
1139 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK2_LO]].d
1140 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK2_HI]].d
1141 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
1142 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
1143 ; VBITS_EQ_256-NEXT: ret
1144 %op1 = load <8 x i16>, <8 x i16>* %a
1145 %res = sitofp <8 x i16> %op1 to <8 x double>
1146 store <8 x double> %res, <8 x double>* %b
1150 define void @scvtf_v16i16_v16f64(<16 x i16>* %a, <16 x double>* %b) #0 {
1151 ; CHECK-LABEL: scvtf_v16i16_v16f64:
1152 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].h, vl16
1153 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
1154 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d, vl16
1155 ; VBITS_GE_1024-NEXT: sunpklo [[UPK1:z[0-9]+]].s, [[OP]].h
1156 ; VBITS_GE_1024-NEXT: sunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
1157 ; VBITS_GE_1024-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK2]].d
1158 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
1159 ; VBITS_GE_1024-NEXT: ret
1160 %op1 = load <16 x i16>, <16 x i16>* %a
1161 %res = sitofp <16 x i16> %op1 to <16 x double>
1162 store <16 x double> %res, <16 x double>* %b
1166 define void @scvtf_v32i16_v32f64(<32 x i16>* %a, <32 x double>* %b) #0 {
1167 ; CHECK-LABEL: scvtf_v32i16_v32f64:
1168 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].h, vl32
1169 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
1170 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d, vl32
1171 ; VBITS_GE_2048-NEXT: sunpklo [[UPK1:z[0-9]+]].s, [[OP]].h
1172 ; VBITS_GE_2048-NEXT: sunpklo [[UPK2:z[0-9]+]].d, [[UPK]].s
1173 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK2]].d
1174 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
1175 ; VBITS_GE_2048-NEXT: ret
1176 %op1 = load <32 x i16>, <32 x i16>* %a
1177 %res = sitofp <32 x i16> %op1 to <32 x double>
1178 store <32 x double> %res, <32 x double>* %b
1186 ; Don't use SVE for 64-bit vectors.
1187 define <2 x half> @scvtf_v2i32_v2f16(<2 x i32> %op1) #0 {
1188 ; CHECK-LABEL: scvtf_v2i32_v2f16:
1189 ; CHECK: scvtf v0.4s, v0.4s
1190 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
1192 %res = sitofp <2 x i32> %op1 to <2 x half>
1196 ; Don't use SVE for 128-bit vectors.
1197 define <4 x half> @scvtf_v4i32_v4f16(<4 x i32> %op1) #0 {
1198 ; CHECK-LABEL: scvtf_v4i32_v4f16:
1199 ; CHECK: scvtf v0.4s, v0.4s
1200 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
1202 %res = sitofp <4 x i32> %op1 to <4 x half>
1206 define <8 x half> @scvtf_v8i32_v8f16(<8 x i32>* %a) #0 {
1207 ; CHECK-LABEL: scvtf_v8i32_v8f16:
1208 ; CHECK: ptrue [[PG1:p[0-9]+]].s, vl8
1209 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1210 ; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].s
1211 ; CHECK-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].s
1212 ; CHECK-NEXT: uzp1 z0.h, [[CVT]].h, [[CVT]].h
1214 %op1 = load <8 x i32>, <8 x i32>* %a
1215 %res = sitofp <8 x i32> %op1 to <8 x half>
1219 define void @scvtf_v16i32_v16f16(<16 x i32>* %a, <16 x half>* %b) #0 {
1220 ; CHECK-LABEL: scvtf_v16i32_v16f16:
1221 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].s, vl16
1222 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1223 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].s
1224 ; VBITS_GE_512-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].s
1225 ; VBITS_GE_512-NEXT: uzp1 [[RES:z[0-9]+]].h, [[CVT]].h, [[CVT]].h
1226 ; VBITS_GE_512-NEXT: ptrue [[PG3:p[0-9]+]].h, vl16
1227 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
1228 ; VBITS_GE_512-NEXT: ret
1230 ; Ensure sensible type legalisation.
1231 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
1232 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1233 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1234 ; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1235 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s
1236 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8
1237 ; VBITS_EQ_256-DAG: scvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].s
1238 ; VBITS_EQ_256-DAG: scvtf [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].s
1239 ; VBITS_EQ_256-DAG: uzp1 [[RES_LO:z[0-9]+]].h, [[CVT_LO]].h, [[CVT_LO]].h
1240 ; VBITS_EQ_256-DAG: uzp1 [[RES_HI:z[0-9]+]].h, [[CVT_HI]].h, [[CVT_HI]].h
1241 ; VBITS_EQ_256-DAG: splice [[RES:z[0-9]+]].h, [[PG3]], [[RES_LO]].h, [[RES_HI]].h
1242 ; VBITS_EQ_256-DAG: ptrue [[PG4:p[0-9]+]].h, vl16
1243 ; VBITS_EQ_256-NEXT: st1h { [[RES]].h }, [[PG4]], [x1]
1244 ; VBITS_EQ_256-NEXT: ret
1245 %op1 = load <16 x i32>, <16 x i32>* %a
1246 %res = sitofp <16 x i32> %op1 to <16 x half>
1247 store <16 x half> %res, <16 x half>* %b
1251 define void @scvtf_v32i32_v32f16(<32 x i32>* %a, <32 x half>* %b) #0 {
1252 ; CHECK-LABEL: scvtf_v32i32_v32f16:
1253 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].s, vl32
1254 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1255 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].s
1256 ; VBITS_GE_1024-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].s
1257 ; VBITS_GE_1024-NEXT: uzp1 [[RES:z[0-9]+]].h, [[CVT]].h, [[CVT]].h
1258 ; VBITS_GE_1024-NEXT: ptrue [[PG3:p[0-9]+]].h, vl32
1259 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
1260 ; VBITS_GE_1024-NEXT: ret
1261 %op1 = load <32 x i32>, <32 x i32>* %a
1262 %res = sitofp <32 x i32> %op1 to <32 x half>
1263 store <32 x half> %res, <32 x half>* %b
1267 define void @scvtf_v64i32_v64f16(<64 x i32>* %a, <64 x half>* %b) #0 {
1268 ; CHECK-LABEL: scvtf_v64i32_v64f16:
1269 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].s, vl64
1270 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1271 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].s
1272 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].h, [[PG2]]/m, [[UPK]].s
1273 ; VBITS_GE_2048-NEXT: uzp1 [[RES:z[0-9]+]].h, [[CVT]].h, [[CVT]].h
1274 ; VBITS_GE_2048-NEXT: ptrue [[PG3:p[0-9]+]].h, vl64
1275 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
1276 ; VBITS_GE_2048-NEXT: ret
1277 %op1 = load <64 x i32>, <64 x i32>* %a
1278 %res = sitofp <64 x i32> %op1 to <64 x half>
1279 store <64 x half> %res, <64 x half>* %b
1287 ; Don't use SVE for 64-bit vectors.
1288 define <2 x float> @scvtf_v2i32_v2f32(<2 x i32> %op1) #0 {
1289 ; CHECK-LABEL: scvtf_v2i32_v2f32:
1290 ; CHECK: scvtf v0.2s, v0.2s
1292 %res = sitofp <2 x i32> %op1 to <2 x float>
1293 ret <2 x float> %res
1296 ; Don't use SVE for 128-bit vectors.
1297 define <4 x float> @scvtf_v4i32_v4f32(<4 x i32> %op1) #0 {
1298 ; CHECK-LABEL: scvtf_v4i32_v4f32:
1299 ; CHECK: scvtf v0.4s, v0.4s
1301 %res = sitofp <4 x i32> %op1 to <4 x float>
1302 ret <4 x float> %res
1305 define void @scvtf_v8i32_v8f32(<8 x i32>* %a, <8 x float>* %b) #0 {
1306 ; CHECK-LABEL: scvtf_v8i32_v8f32:
1307 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
1308 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1309 ; CHECK-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1310 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
1312 %op1 = load <8 x i32>, <8 x i32>* %a
1313 %res = sitofp <8 x i32> %op1 to <8 x float>
1314 store <8 x float> %res, <8 x float>* %b
1318 define void @scvtf_v16i32_v16f32(<16 x i32>* %a, <16 x float>* %b) #0 {
1319 ; CHECK-LABEL: scvtf_v16i32_v16f32:
1320 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
1321 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1322 ; VBITS_GE_512-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1323 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
1324 ; VBITS_GE_512-NEXT: ret
1326 ; Ensure sensible type legalisation.
1327 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1328 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1329 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1330 ; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1331 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[LO]].s
1332 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[HI]].s
1333 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x1]
1334 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2]
1335 ; VBITS_EQ_256-NEXT: ret
1336 %op1 = load <16 x i32>, <16 x i32>* %a
1337 %res = sitofp <16 x i32> %op1 to <16 x float>
1338 store <16 x float> %res, <16 x float>* %b
1342 define void @scvtf_v32i32_v32f32(<32 x i32>* %a, <32 x float>* %b) #0 {
1343 ; CHECK-LABEL: scvtf_v32i32_v32f32:
1344 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1345 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1346 ; VBITS_GE_1024-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1347 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
1348 ; VBITS_GE_1024-NEXT: ret
1349 %op1 = load <32 x i32>, <32 x i32>* %a
1350 %res = sitofp <32 x i32> %op1 to <32 x float>
1351 store <32 x float> %res, <32 x float>* %b
1355 define void @scvtf_v64i32_v64f32(<64 x i32>* %a, <64 x float>* %b) #0 {
1356 ; CHECK-LABEL: scvtf_v64i32_v64f32:
1357 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1358 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1359 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1360 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
1361 ; VBITS_GE_2048-NEXT: ret
1362 %op1 = load <64 x i32>, <64 x i32>* %a
1363 %res = sitofp <64 x i32> %op1 to <64 x float>
1364 store <64 x float> %res, <64 x float>* %b
1372 ; Don't use SVE for 64-bit vectors.
1373 define <1 x double> @scvtf_v1i32_v1f64(<1 x i32> %op1) #0 {
1374 ; CHECK-LABEL: scvtf_v1i32_v1f64:
1375 ; CHECK: sshll v0.2d, v0.2s, #0
1376 ; CHECK-NEXT: scvtf v0.2d, v0.2d
1378 %res = sitofp <1 x i32> %op1 to <1 x double>
1379 ret <1 x double> %res
1382 ; Don't use SVE for 128-bit vectors.
1383 define <2 x double> @scvtf_v2i32_v2f64(<2 x i32> %op1) #0 {
1384 ; CHECK-LABEL: scvtf_v2i32_v2f64:
1385 ; CHECK: sshll v0.2d, v0.2s, #0
1386 ; CHECK-NEXT: scvtf v0.2d, v0.2d
1388 %res = sitofp <2 x i32> %op1 to <2 x double>
1389 ret <2 x double> %res
1392 define void @scvtf_v4i32_v4f64(<4 x i32>* %a, <4 x double>* %b) #0 {
1393 ; CHECK-LABEL: scvtf_v4i32_v4f64:
1394 ; CHECK: ldr q[[OP:[0-9]+]], [x0]
1395 ; CHECK-NEXT: ptrue [[PG:p[0-9]+]].d, vl4
1396 ; CHECK-NEXT: sunpklo [[UPK:z[0-9]+]].d, z[[OP]].s
1397 ; CHECK-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[UPK]].d
1398 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1400 %op1 = load <4 x i32>, <4 x i32>* %a
1401 %res = sitofp <4 x i32> %op1 to <4 x double>
1402 store <4 x double> %res, <4 x double>* %b
1406 define void @scvtf_v8i32_v8f64(<8 x i32>* %a, <8 x double>* %b) #0 {
1407 ; CHECK-LABEL: scvtf_v8i32_v8f64:
1408 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].s, vl8
1409 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1410 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].d, vl8
1411 ; VBITS_GE_512-NEXT: sunpklo [[UPK:z[0-9]+]].d, [[OP]].s
1412 ; VBITS_GE_512-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG1]]/m, [[UPK]].d
1413 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
1414 ; VBITS_GE_512-NEXT: ret
1416 ; Ensure sensible type legalisation - fixed type extract_subvector codegen is poor currently.
1417 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
1418 ; VBITS_EQ_256-DAG: ld1w { [[VEC:z[0-9]+]].s }, [[PG1]]/z, [x0]
1419 ; VBITS_EQ_256-DAG: mov x8, sp
1420 ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8]
1421 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
1422 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4
1423 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1424 ; VBITS_EQ_256-DAG: sunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s
1425 ; VBITS_EQ_256-DAG: sunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s
1426 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].d
1427 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].d
1428 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
1429 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
1430 %op1 = load <8 x i32>, <8 x i32>* %a
1431 %res = sitofp <8 x i32> %op1 to <8 x double>
1432 store <8 x double> %res, <8 x double>* %b
1436 define void @scvtf_v16i32_v16f64(<16 x i32>* %a, <16 x double>* %b) #0 {
1437 ; CHECK-LABEL: scvtf_v16i32_v16f64:
1438 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].s, vl16
1439 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1440 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d, vl16
1441 ; VBITS_GE_1024-NEXT: sunpklo [[UPK:z[0-9]+]].d, [[OP]].s
1442 ; VBITS_GE_1024-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK]].d
1443 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
1444 ; VBITS_GE_1024-NEXT: ret
1445 %op1 = load <16 x i32>, <16 x i32>* %a
1446 %res = sitofp <16 x i32> %op1 to <16 x double>
1447 store <16 x double> %res, <16 x double>* %b
1451 define void @scvtf_v32i32_v32f64(<32 x i32>* %a, <32 x double>* %b) #0 {
1452 ; CHECK-LABEL: scvtf_v32i32_v32f64:
1453 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].s, vl32
1454 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1455 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d, vl32
1456 ; VBITS_GE_2048-NEXT: sunpklo [[UPK:z[0-9]+]].d, [[OP]].s
1457 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK]].d
1458 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
1459 ; VBITS_GE_2048-NEXT: ret
1460 %op1 = load <32 x i32>, <32 x i32>* %a
1461 %res = sitofp <32 x i32> %op1 to <32 x double>
1462 store <32 x double> %res, <32 x double>* %b
1471 ; Don't use SVE for 64-bit vectors.
1472 define <1 x half> @scvtf_v1i64_v1f16(<1 x i64> %op1) #0 {
1473 ; CHECK-LABEL: scvtf_v1i64_v1f16:
1474 ; CHECK: fmov x8, d0
1475 ; CHECK-NEXT: scvtf h0, x8
1477 %res = sitofp <1 x i64> %op1 to <1 x half>
1481 ; v2f16 is not legal for NEON, so use SVE
1482 define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) #0 {
1483 ; CHECK-LABEL: scvtf_v2i64_v2f16:
1484 ; CHECK: ptrue [[PG:p[0-9]+]].d
1485 ; CHECK-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG]]/m, z0.d
1486 ; CHECK-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1487 ; CHECK-NEXT: uzp1 z0.h, [[UZP]].h, [[UZP]].h
1489 %res = sitofp <2 x i64> %op1 to <2 x half>
1493 define <4 x half> @scvtf_v4i64_v4f16(<4 x i64>* %a) #0 {
1494 ; CHECK-LABEL: scvtf_v4i64_v4f16:
1495 ; CHECK: ptrue [[PG1:p[0-9]+]].d, vl4
1496 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1497 ; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].d
1498 ; CHECK-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
1499 ; CHECK-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1500 ; CHECK-NEXT: uzp1 z0.h, [[UZP]].h, [[UZP]].h
1502 %op1 = load <4 x i64>, <4 x i64>* %a
1503 %res = sitofp <4 x i64> %op1 to <4 x half>
1507 define <8 x half> @scvtf_v8i64_v8f16(<8 x i64>* %a) #0 {
1508 ; CHECK-LABEL: scvtf_v8i64_v8f16:
1509 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].d, vl8
1510 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1511 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].d
1512 ; VBITS_GE_512-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
1513 ; VBITS_GE_512-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1514 ; VBITS_GE_512-NEXT: uzp1 z0.h, [[UZP]].h, [[UZP]].h
1515 ; VBITS_GE_512-NEXT: ret
1517 ; Ensure sensible type legalisation.
1518 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
1519 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1520 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1521 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1522 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
1523 ; VBITS_EQ_256-DAG: scvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].d
1524 ; VBITS_EQ_256-DAG: scvtf [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].d
1525 ; VBITS_EQ_256-DAG: uzp1 [[UZP_LO:z[0-9]+]].s, [[CVT_LO]].s, [[CVT_LO]].s
1526 ; VBITS_EQ_256-DAG: uzp1 [[UZP_HI:z[0-9]+]].s, [[CVT_HI]].s, [[CVT_HI]].s
1527 ; VBITS_EQ_256-DAG: uzp1 z[[RES_LO:[0-9]+]].h, [[UZP_LO]].h, [[UZP_LO]].h
1528 ; VBITS_EQ_256-DAG: uzp1 z[[RES_HI:[0-9]+]].h, [[UZP_HI]].h, [[UZP_HI]].h
1529 ; VBITS_EQ_256-NEXT: mov v[[RES_LO]].d[1], v[[RES_HI]].d[0]
1530 ; VBITS_EQ_256-NEXT: ret
1531 %op1 = load <8 x i64>, <8 x i64>* %a
1532 %res = sitofp <8 x i64> %op1 to <8 x half>
1536 define void @scvtf_v16i64_v16f16(<16 x i64>* %a, <16 x half>* %b) #0 {
1537 ; CHECK-LABEL: scvtf_v16i64_v16f16:
1538 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].d, vl16
1539 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1540 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d
1541 ; VBITS_GE_1024-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
1542 ; VBITS_GE_1024-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1543 ; VBITS_GE_1024-NEXT: uzp1 [[RES:z[0-9]+]].h, [[UZP]].h, [[UZP]].h
1544 ; VBITS_GE_1024-NEXT: ptrue [[PG3:p[0-9]+]].h, vl16
1545 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
1546 ; VBITS_GE_1024-NEXT: ret
1547 %op1 = load <16 x i64>, <16 x i64>* %a
1548 %res = sitofp <16 x i64> %op1 to <16 x half>
1549 store <16 x half> %res, <16 x half>* %b
1553 define void @scvtf_v32i64_v32f16(<32 x i64>* %a, <32 x half>* %b) #0 {
1554 ; CHECK-LABEL: scvtf_v32i64_v32f16:
1555 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].d, vl32
1556 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1557 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d
1558 ; VBITS_GE_2048-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
1559 ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1560 ; VBITS_GE_2048-NEXT: uzp1 [[RES:z[0-9]+]].h, [[UZP]].h, [[UZP]].h
1561 ; VBITS_GE_2048-NEXT: ptrue [[PG3:p[0-9]+]].h, vl32
1562 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
1563 ; VBITS_GE_2048-NEXT: ret
1564 %op1 = load <32 x i64>, <32 x i64>* %a
1565 %res = sitofp <32 x i64> %op1 to <32 x half>
1566 store <32 x half> %res, <32 x half>* %b
1574 ; Don't use SVE for 64-bit vectors.
1575 define <1 x float> @scvtf_v1i64_v1f32(<1 x i64> %op1) #0 {
1576 ; CHECK-LABEL: scvtf_v1i64_v1f32:
1577 ; CHECK: scvtf v0.2d, v0.2d
1578 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
1580 %res = sitofp <1 x i64> %op1 to <1 x float>
1581 ret <1 x float> %res
1584 ; Don't use SVE for 128-bit vectors.
1585 define <2 x float> @scvtf_v2i64_v2f32(<2 x i64> %op1) #0 {
1586 ; CHECK-LABEL: scvtf_v2i64_v2f32:
1587 ; CHECK: scvtf v0.2d, v0.2d
1588 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
1590 %res = sitofp <2 x i64> %op1 to <2 x float>
1591 ret <2 x float> %res
1594 define <4 x float> @scvtf_v4i64_v4f32(<4 x i64>* %a) #0 {
1595 ; CHECK-LABEL: scvtf_v4i64_v4f32:
1596 ; CHECK: ptrue [[PG1:p[0-9]+]].d, vl4
1597 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1598 ; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].d
1599 ; CHECK-NEXT: scvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
1600 ; CHECK-NEXT: uzp1 z0.s, [[CVT]].s, [[CVT]].s
1602 %op1 = load <4 x i64>, <4 x i64>* %a
1603 %res = sitofp <4 x i64> %op1 to <4 x float>
1604 ret <4 x float> %res
1607 define void @scvtf_v8i64_v8f32(<8 x i64>* %a, <8 x float>* %b) #0 {
1608 ; CHECK-LABEL: scvtf_v8i64_v8f32:
1609 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].d, vl8
1610 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1611 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].d
1612 ; VBITS_GE_512-NEXT: scvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
1613 ; VBITS_GE_512-NEXT: uzp1 [[RES:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1614 ; VBITS_GE_512-NEXT: ptrue [[PG3:p[0-9]+]].s, vl8
1615 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG3]], [x1]
1616 ; VBITS_GE_512-NEXT: ret
1618 ; Ensure sensible type legalisation.
1619 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
1620 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1621 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1622 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1623 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
1624 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4
1625 ; VBITS_EQ_256-DAG: scvtf [[CVT_LO:z[0-9]+]].s, [[PG2]]/m, [[LO]].d
1626 ; VBITS_EQ_256-DAG: scvtf [[CVT_HI:z[0-9]+]].s, [[PG2]]/m, [[HI]].d
1627 ; VBITS_EQ_256-DAG: uzp1 [[RES_LO:z[0-9]+]].s, [[CVT_LO]].s, [[CVT_LO]].s
1628 ; VBITS_EQ_256-DAG: uzp1 [[RES_HI:z[0-9]+]].s, [[CVT_HI]].s, [[CVT_HI]].s
1629 ; VBITS_EQ_256-DAG: splice [[RES:z[0-9]+]].s, [[PG3]], [[RES_LO]].s, [[RES_HI]].s
1630 ; VBITS_EQ_256-DAG: ptrue [[PG4:p[0-9]+]].s, vl8
1631 ; VBITS_EQ_256-NEXT: st1w { [[RES]].s }, [[PG4]], [x1]
1632 ; VBITS_EQ_256-NEXT: ret
1633 %op1 = load <8 x i64>, <8 x i64>* %a
1634 %res = sitofp <8 x i64> %op1 to <8 x float>
1635 store <8 x float> %res, <8 x float>* %b
1639 define void @scvtf_v16i64_v16f32(<16 x i64>* %a, <16 x float>* %b) #0 {
1640 ; CHECK-LABEL: scvtf_v16i64_v16f32:
1641 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].d, vl16
1642 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1643 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d
1644 ; VBITS_GE_1024-NEXT: scvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
1645 ; VBITS_GE_1024-NEXT: uzp1 [[RES:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1646 ; VBITS_GE_1024-NEXT: ptrue [[PG3:p[0-9]+]].s, vl16
1647 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG3]], [x1]
1648 ; VBITS_GE_1024-NEXT: ret
1649 %op1 = load <16 x i64>, <16 x i64>* %a
1650 %res = sitofp <16 x i64> %op1 to <16 x float>
1651 store <16 x float> %res, <16 x float>* %b
1655 define void @scvtf_v32i64_v32f32(<32 x i64>* %a, <32 x float>* %b) #0 {
1656 ; CHECK-LABEL: scvtf_v32i64_v32f32:
1657 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].d, vl32
1658 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1659 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d
1660 ; VBITS_GE_2048-NEXT: scvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
1661 ; VBITS_GE_2048-NEXT: uzp1 [[RES:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1662 ; VBITS_GE_2048-NEXT: ptrue [[PG3:p[0-9]+]].s, vl32
1663 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG3]], [x1]
1664 ; VBITS_GE_2048-NEXT: ret
1665 %op1 = load <32 x i64>, <32 x i64>* %a
1666 %res = sitofp <32 x i64> %op1 to <32 x float>
1667 store <32 x float> %res, <32 x float>* %b
1675 ; Don't use SVE for 64-bit vectors.
1676 define <1 x double> @scvtf_v1i64_v1f64(<1 x i64> %op1) #0 {
1677 ; CHECK-LABEL: scvtf_v1i64_v1f64:
1678 ; CHECK: fmov x8, d0
1679 ; CHECK-NEXT: scvtf d0, x8
1681 %res = sitofp <1 x i64> %op1 to <1 x double>
1682 ret <1 x double> %res
1685 ; Don't use SVE for 128-bit vectors.
1686 define <2 x double> @scvtf_v2i64_v2f64(<2 x i64> %op1) #0 {
1687 ; CHECK-LABEL: scvtf_v2i64_v2f64:
1688 ; CHECK: scvtf v0.2d, v0.2d
1690 %res = sitofp <2 x i64> %op1 to <2 x double>
1691 ret <2 x double> %res
1694 define void @scvtf_v4i64_v4f64(<4 x i64>* %a, <4 x double>* %b) #0 {
1695 ; CHECK-LABEL: scvtf_v4i64_v4f64:
1696 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1697 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1698 ; CHECK-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1699 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1701 %op1 = load <4 x i64>, <4 x i64>* %a
1702 %res = sitofp <4 x i64> %op1 to <4 x double>
1703 store <4 x double> %res, <4 x double>* %b
1707 define void @scvtf_v8i64_v8f64(<8 x i64>* %a, <8 x double>* %b) #0 {
1708 ; CHECK-LABEL: scvtf_v8i64_v8f64:
1709 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1710 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1711 ; VBITS_GE_512-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1712 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1713 ; VBITS_GE_512-NEXT: ret
1715 ; Ensure sensible type legalisation.
1716 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1717 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1718 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1719 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1720 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[LO]].d
1721 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[HI]].d
1722 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1]
1723 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3]
1724 ; VBITS_EQ_256-NEXT: ret
1725 %op1 = load <8 x i64>, <8 x i64>* %a
1726 %res = sitofp <8 x i64> %op1 to <8 x double>
1727 store <8 x double> %res, <8 x double>* %b
1731 define void @scvtf_v16i64_v16f64(<16 x i64>* %a, <16 x double>* %b) #0 {
1732 ; CHECK-LABEL: scvtf_v16i64_v16f64:
1733 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1734 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1735 ; VBITS_GE_1024-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1736 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1737 ; VBITS_GE_1024-NEXT: ret
1738 %op1 = load <16 x i64>, <16 x i64>* %a
1739 %res = sitofp <16 x i64> %op1 to <16 x double>
1740 store <16 x double> %res, <16 x double>* %b
1744 define void @scvtf_v32i64_v32f64(<32 x i64>* %a, <32 x double>* %b) #0 {
1745 ; CHECK-LABEL: scvtf_v32i64_v32f64:
1746 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1747 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1748 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1749 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1750 ; VBITS_GE_2048-NEXT: ret
1751 %op1 = load <32 x i64>, <32 x i64>* %a
1752 %res = sitofp <32 x i64> %op1 to <32 x double>
1753 store <32 x double> %res, <32 x double>* %b
1757 attributes #0 = { "target-features"="+sve" }