1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
6 define <vscale x 8 x float> @fcvts_nxv8f16(<vscale x 8 x half> %a) {
7 ; CHECK-LABEL: fcvts_nxv8f16:
9 ; CHECK-NEXT: uunpklo z1.s, z0.h
10 ; CHECK-NEXT: ptrue p0.s
11 ; CHECK-NEXT: uunpkhi z2.s, z0.h
12 ; CHECK-NEXT: fcvt z0.s, p0/m, z1.h
13 ; CHECK-NEXT: fcvt z1.s, p0/m, z2.h
15 %res = fpext <vscale x 8 x half> %a to <vscale x 8 x float>
16 ret <vscale x 8 x float> %res
19 define <vscale x 4 x double> @fcvtd_nxv4f16(<vscale x 4 x half> %a) {
20 ; CHECK-LABEL: fcvtd_nxv4f16:
22 ; CHECK-NEXT: uunpklo z1.d, z0.s
23 ; CHECK-NEXT: ptrue p0.d
24 ; CHECK-NEXT: uunpkhi z2.d, z0.s
25 ; CHECK-NEXT: fcvt z0.d, p0/m, z1.h
26 ; CHECK-NEXT: fcvt z1.d, p0/m, z2.h
28 %res = fpext <vscale x 4 x half> %a to <vscale x 4 x double>
29 ret <vscale x 4 x double> %res
32 define <vscale x 8 x double> @fcvtd_nxv8f16(<vscale x 8 x half> %a) {
33 ; CHECK-LABEL: fcvtd_nxv8f16:
35 ; CHECK-NEXT: uunpklo z1.s, z0.h
36 ; CHECK-NEXT: uunpkhi z0.s, z0.h
37 ; CHECK-NEXT: ptrue p0.d
38 ; CHECK-NEXT: uunpklo z2.d, z1.s
39 ; CHECK-NEXT: uunpkhi z1.d, z1.s
40 ; CHECK-NEXT: uunpklo z3.d, z0.s
41 ; CHECK-NEXT: uunpkhi z4.d, z0.s
42 ; CHECK-NEXT: fcvt z0.d, p0/m, z2.h
43 ; CHECK-NEXT: fcvt z1.d, p0/m, z1.h
44 ; CHECK-NEXT: fcvt z2.d, p0/m, z3.h
45 ; CHECK-NEXT: fcvt z3.d, p0/m, z4.h
47 %res = fpext <vscale x 8 x half> %a to <vscale x 8 x double>
48 ret <vscale x 8 x double> %res
51 define <vscale x 4 x double> @fcvtd_nxv4f32(<vscale x 4 x float> %a) {
52 ; CHECK-LABEL: fcvtd_nxv4f32:
54 ; CHECK-NEXT: uunpklo z1.d, z0.s
55 ; CHECK-NEXT: ptrue p0.d
56 ; CHECK-NEXT: uunpkhi z2.d, z0.s
57 ; CHECK-NEXT: fcvt z0.d, p0/m, z1.s
58 ; CHECK-NEXT: fcvt z1.d, p0/m, z2.s
60 %res = fpext <vscale x 4 x float> %a to <vscale x 4 x double>
61 ret <vscale x 4 x double> %res
64 define <vscale x 8 x double> @fcvtd_nxv8f32(<vscale x 8 x float> %a) {
65 ; CHECK-LABEL: fcvtd_nxv8f32:
67 ; CHECK-NEXT: uunpklo z2.d, z0.s
68 ; CHECK-NEXT: ptrue p0.d
69 ; CHECK-NEXT: uunpkhi z3.d, z0.s
70 ; CHECK-NEXT: uunpklo z4.d, z1.s
71 ; CHECK-NEXT: uunpkhi z5.d, z1.s
72 ; CHECK-NEXT: fcvt z0.d, p0/m, z2.s
73 ; CHECK-NEXT: fcvt z1.d, p0/m, z3.s
74 ; CHECK-NEXT: fcvt z2.d, p0/m, z4.s
75 ; CHECK-NEXT: fcvt z3.d, p0/m, z5.s
77 %res = fpext <vscale x 8 x float> %a to <vscale x 8 x double>
78 ret <vscale x 8 x double> %res
83 define <vscale x 8 x half> @fcvth_nxv8f32(<vscale x 8 x float> %a) {
84 ; CHECK-LABEL: fcvth_nxv8f32:
86 ; CHECK-NEXT: ptrue p0.s
87 ; CHECK-NEXT: fcvt z1.h, p0/m, z1.s
88 ; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
89 ; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
91 %res = fptrunc <vscale x 8 x float> %a to <vscale x 8 x half>
92 ret <vscale x 8 x half> %res
95 define <vscale x 8 x half> @fcvth_nxv8f64(<vscale x 8 x double> %a) {
96 ; CHECK-LABEL: fcvth_nxv8f64:
98 ; CHECK-NEXT: ptrue p0.d
99 ; CHECK-NEXT: fcvt z3.h, p0/m, z3.d
100 ; CHECK-NEXT: fcvt z2.h, p0/m, z2.d
101 ; CHECK-NEXT: fcvt z1.h, p0/m, z1.d
102 ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
103 ; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s
104 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
105 ; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
107 %res = fptrunc <vscale x 8 x double> %a to <vscale x 8 x half>
108 ret <vscale x 8 x half> %res
111 define <vscale x 4 x half> @fcvth_nxv4f64(<vscale x 4 x double> %a) {
112 ; CHECK-LABEL: fcvth_nxv4f64:
114 ; CHECK-NEXT: ptrue p0.d
115 ; CHECK-NEXT: fcvt z1.h, p0/m, z1.d
116 ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
117 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
119 %res = fptrunc <vscale x 4 x double> %a to <vscale x 4 x half>
120 ret <vscale x 4 x half> %res
123 define <vscale x 4 x float> @fcvts_nxv4f64(<vscale x 4 x double> %a) {
124 ; CHECK-LABEL: fcvts_nxv4f64:
126 ; CHECK-NEXT: ptrue p0.d
127 ; CHECK-NEXT: fcvt z1.s, p0/m, z1.d
128 ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
129 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
131 %res = fptrunc <vscale x 4 x double> %a to <vscale x 4 x float>
132 ret <vscale x 4 x float> %res
135 define <vscale x 8 x float> @fcvts_nxv8f64(<vscale x 8 x double> %a) {
136 ; CHECK-LABEL: fcvts_nxv8f64:
138 ; CHECK-NEXT: ptrue p0.d
139 ; CHECK-NEXT: fcvt z1.s, p0/m, z1.d
140 ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
141 ; CHECK-NEXT: fcvt z3.s, p0/m, z3.d
142 ; CHECK-NEXT: fcvt z2.s, p0/m, z2.d
143 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
144 ; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s
146 %res = fptrunc <vscale x 8 x double> %a to <vscale x 8 x float>
147 ret <vscale x 8 x float> %res
153 define <vscale x 4 x i32> @fcvtzs_s_nxv4f64(<vscale x 4 x double> %a) {
154 ; CHECK-LABEL: fcvtzs_s_nxv4f64:
156 ; CHECK-NEXT: ptrue p0.d
157 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
158 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
159 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
161 %res = fptosi <vscale x 4 x double> %a to <vscale x 4 x i32>
162 ret <vscale x 4 x i32> %res
165 define <vscale x 8 x i16> @fcvtzs_h_nxv8f64(<vscale x 8 x double> %a) {
166 ; CHECK-LABEL: fcvtzs_h_nxv8f64:
168 ; CHECK-NEXT: ptrue p0.d
169 ; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
170 ; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
171 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
172 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
173 ; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s
174 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
175 ; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
177 %res = fptosi <vscale x 8 x double> %a to <vscale x 8 x i16>
178 ret <vscale x 8 x i16> %res
182 define <vscale x 4 x i64> @fcvtzs_d_nxv4f32(<vscale x 4 x float> %a) {
183 ; CHECK-LABEL: fcvtzs_d_nxv4f32:
185 ; CHECK-NEXT: uunpklo z1.d, z0.s
186 ; CHECK-NEXT: ptrue p0.d
187 ; CHECK-NEXT: uunpkhi z2.d, z0.s
188 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s
189 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s
191 %res = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64>
192 ret <vscale x 4 x i64> %res
195 define <vscale x 16 x i32> @fcvtzs_s_nxv16f16(<vscale x 16 x half> %a) {
196 ; CHECK-LABEL: fcvtzs_s_nxv16f16:
198 ; CHECK-NEXT: uunpklo z2.s, z0.h
199 ; CHECK-NEXT: ptrue p0.s
200 ; CHECK-NEXT: uunpkhi z3.s, z0.h
201 ; CHECK-NEXT: uunpklo z4.s, z1.h
202 ; CHECK-NEXT: uunpkhi z5.s, z1.h
203 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h
204 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h
205 ; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h
206 ; CHECK-NEXT: fcvtzs z3.s, p0/m, z5.h
208 %res = fptosi <vscale x 16 x half> %a to <vscale x 16 x i32>
209 ret <vscale x 16 x i32> %res
215 define <vscale x 4 x i32> @fcvtzu_s_nxv4f64(<vscale x 4 x double> %a) {
216 ; CHECK-LABEL: fcvtzu_s_nxv4f64:
218 ; CHECK-NEXT: ptrue p0.d
219 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
220 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
221 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
223 %res = fptoui <vscale x 4 x double> %a to <vscale x 4 x i32>
224 ret <vscale x 4 x i32> %res
228 define <vscale x 4 x i64> @fcvtzu_d_nxv4f32(<vscale x 4 x float> %a) {
229 ; CHECK-LABEL: fcvtzu_d_nxv4f32:
231 ; CHECK-NEXT: uunpklo z1.d, z0.s
232 ; CHECK-NEXT: ptrue p0.d
233 ; CHECK-NEXT: uunpkhi z2.d, z0.s
234 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s
235 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z2.s
237 %res = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>
238 ret <vscale x 4 x i64> %res
244 define <vscale x 4 x float> @scvtf_s_nxv4i64(<vscale x 4 x i64> %a) {
245 ; CHECK-LABEL: scvtf_s_nxv4i64:
247 ; CHECK-NEXT: ptrue p0.d
248 ; CHECK-NEXT: scvtf z1.s, p0/m, z1.d
249 ; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
250 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
252 %res = sitofp <vscale x 4 x i64> %a to <vscale x 4 x float>
253 ret <vscale x 4 x float> %res
256 define <vscale x 8 x half> @scvtf_h_nxv8i64(<vscale x 8 x i64> %a) {
257 ; CHECK-LABEL: scvtf_h_nxv8i64:
259 ; CHECK-NEXT: ptrue p0.d
260 ; CHECK-NEXT: scvtf z3.h, p0/m, z3.d
261 ; CHECK-NEXT: scvtf z2.h, p0/m, z2.d
262 ; CHECK-NEXT: scvtf z1.h, p0/m, z1.d
263 ; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
264 ; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s
265 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
266 ; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
268 %res = sitofp <vscale x 8 x i64> %a to <vscale x 8 x half>
269 ret <vscale x 8 x half> %res
273 define <vscale x 16 x float> @scvtf_s_nxv16i8(<vscale x 16 x i8> %a) {
274 ; CHECK-LABEL: scvtf_s_nxv16i8:
276 ; CHECK-NEXT: sunpklo z1.h, z0.b
277 ; CHECK-NEXT: sunpkhi z0.h, z0.b
278 ; CHECK-NEXT: ptrue p0.s
279 ; CHECK-NEXT: sunpklo z2.s, z1.h
280 ; CHECK-NEXT: sunpkhi z1.s, z1.h
281 ; CHECK-NEXT: sunpklo z3.s, z0.h
282 ; CHECK-NEXT: sunpkhi z4.s, z0.h
283 ; CHECK-NEXT: scvtf z0.s, p0/m, z2.s
284 ; CHECK-NEXT: scvtf z1.s, p0/m, z1.s
285 ; CHECK-NEXT: scvtf z2.s, p0/m, z3.s
286 ; CHECK-NEXT: scvtf z3.s, p0/m, z4.s
288 %res = sitofp <vscale x 16 x i8> %a to <vscale x 16 x float>
289 ret <vscale x 16 x float> %res
292 define <vscale x 4 x double> @scvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
293 ; CHECK-LABEL: scvtf_d_nxv4i32:
295 ; CHECK-NEXT: sunpklo z1.d, z0.s
296 ; CHECK-NEXT: ptrue p0.d
297 ; CHECK-NEXT: sunpkhi z2.d, z0.s
298 ; CHECK-NEXT: scvtf z0.d, p0/m, z1.d
299 ; CHECK-NEXT: scvtf z1.d, p0/m, z2.d
301 %res = sitofp <vscale x 4 x i32> %a to <vscale x 4 x double>
302 ret <vscale x 4 x double> %res
305 define <vscale x 4 x double> @scvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
306 ; CHECK-LABEL: scvtf_d_nxv4i1:
308 ; CHECK-NEXT: pfalse p1.b
309 ; CHECK-NEXT: zip1 p3.s, p0.s, p1.s
310 ; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
311 ; CHECK-NEXT: ptrue p2.d
312 ; CHECK-NEXT: mov z0.d, p3/z, #-1 // =0xffffffffffffffff
313 ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff
314 ; CHECK-NEXT: scvtf z0.d, p2/m, z0.d
315 ; CHECK-NEXT: scvtf z1.d, p2/m, z1.d
317 %res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
318 ret <vscale x 4 x double> %res
324 define <vscale x 4 x float> @ucvtf_s_nxv4i64(<vscale x 4 x i64> %a) {
325 ; CHECK-LABEL: ucvtf_s_nxv4i64:
327 ; CHECK-NEXT: ptrue p0.d
328 ; CHECK-NEXT: ucvtf z1.s, p0/m, z1.d
329 ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
330 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
332 %res = uitofp <vscale x 4 x i64> %a to <vscale x 4 x float>
333 ret <vscale x 4 x float> %res
336 define <vscale x 8 x half> @ucvtf_h_nxv8i64(<vscale x 8 x i64> %a) {
337 ; CHECK-LABEL: ucvtf_h_nxv8i64:
339 ; CHECK-NEXT: ptrue p0.d
340 ; CHECK-NEXT: ucvtf z3.h, p0/m, z3.d
341 ; CHECK-NEXT: ucvtf z2.h, p0/m, z2.d
342 ; CHECK-NEXT: ucvtf z1.h, p0/m, z1.d
343 ; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
344 ; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s
345 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
346 ; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
348 %res = uitofp <vscale x 8 x i64> %a to <vscale x 8 x half>
349 ret <vscale x 8 x half> %res
353 define <vscale x 4 x double> @ucvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
354 ; CHECK-LABEL: ucvtf_d_nxv4i32:
356 ; CHECK-NEXT: uunpklo z1.d, z0.s
357 ; CHECK-NEXT: ptrue p0.d
358 ; CHECK-NEXT: uunpkhi z2.d, z0.s
359 ; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d
360 ; CHECK-NEXT: ucvtf z1.d, p0/m, z2.d
362 %res = uitofp <vscale x 4 x i32> %a to <vscale x 4 x double>
363 ret <vscale x 4 x double> %res
366 define <vscale x 4 x double> @ucvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
367 ; CHECK-LABEL: ucvtf_d_nxv4i1:
369 ; CHECK-NEXT: pfalse p1.b
370 ; CHECK-NEXT: zip1 p3.s, p0.s, p1.s
371 ; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
372 ; CHECK-NEXT: ptrue p2.d
373 ; CHECK-NEXT: mov z0.d, p3/z, #1 // =0x1
374 ; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
375 ; CHECK-NEXT: ucvtf z0.d, p2/m, z0.d
376 ; CHECK-NEXT: ucvtf z1.d, p2/m, z1.d
378 %res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
379 ret <vscale x 4 x double> %res