1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s
6 declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f32.nxv2i32(<vscale x 2 x float>)
7 declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f32.nxv4i32(<vscale x 4 x float>)
8 declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f32.nxv8i32(<vscale x 8 x float>)
9 declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float>)
10 declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float>)
11 declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f32.nxv2i64(<vscale x 2 x float>)
12 declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f32.nxv4i64(<vscale x 4 x float>)
14 define <vscale x 2 x i32> @test_signed_v2f32_v2i32(<vscale x 2 x float> %f) {
15 ; CHECK-LABEL: test_signed_v2f32_v2i32:
17 ; CHECK-NEXT: ptrue p0.d
18 ; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff
19 ; CHECK-NEXT: mov z1.s, w8
20 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
21 ; CHECK-NEXT: movprfx z2, z0
22 ; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.s
23 ; CHECK-NEXT: not p1.b, p0/z, p1.b
24 ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
25 ; CHECK-NEXT: mov z0.d, #0xffffffff
26 ; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0
27 ; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
29 %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f32.nxv2i32(<vscale x 2 x float> %f)
30 ret <vscale x 2 x i32> %x
33 define <vscale x 4 x i32> @test_signed_v4f32_v4i32(<vscale x 4 x float> %f) {
34 ; CHECK-LABEL: test_signed_v4f32_v4i32:
36 ; CHECK-NEXT: ptrue p0.s
37 ; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff
38 ; CHECK-NEXT: mov z2.s, w8
39 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
40 ; CHECK-NEXT: movprfx z1, z0
41 ; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.s
42 ; CHECK-NEXT: not p1.b, p0/z, p1.b
43 ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s
44 ; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
45 ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff
46 ; CHECK-NEXT: mov z0.d, z1.d
48 %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f32.nxv4i32(<vscale x 4 x float> %f)
49 ret <vscale x 4 x i32> %x
52 define <vscale x 8 x i32> @test_signed_v8f32_v8i32(<vscale x 8 x float> %f) {
53 ; CHECK-LABEL: test_signed_v8f32_v8i32:
55 ; CHECK-NEXT: ptrue p0.s
56 ; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff
57 ; CHECK-NEXT: mov z4.s, w8
58 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
59 ; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0
60 ; CHECK-NEXT: movprfx z2, z0
61 ; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s
62 ; CHECK-NEXT: movprfx z3, z1
63 ; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s
64 ; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z4.s
65 ; CHECK-NEXT: not p1.b, p0/z, p1.b
66 ; CHECK-NEXT: not p2.b, p0/z, p2.b
67 ; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z4.s
68 ; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0
69 ; CHECK-NEXT: mov z3.s, p2/m, #0 // =0x0
70 ; CHECK-NEXT: mov z2.s, p3/m, #-1 // =0xffffffffffffffff
71 ; CHECK-NEXT: mov z3.s, p0/m, #-1 // =0xffffffffffffffff
72 ; CHECK-NEXT: mov z0.d, z2.d
73 ; CHECK-NEXT: mov z1.d, z3.d
75 %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f32.nxv8i32(<vscale x 8 x float> %f)
76 ret <vscale x 8 x i32> %x
79 define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
80 ; CHECK-LABEL: test_signed_v4f32_v4i16:
82 ; CHECK-NEXT: ptrue p0.s
83 ; CHECK-NEXT: mov w8, #65280 // =0xff00
84 ; CHECK-NEXT: movk w8, #18303, lsl #16
85 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
86 ; CHECK-NEXT: mov z1.s, w8
87 ; CHECK-NEXT: movprfx z2, z0
88 ; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s
89 ; CHECK-NEXT: not p1.b, p0/z, p1.b
90 ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
91 ; CHECK-NEXT: mov z0.s, #65535 // =0xffff
92 ; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0
93 ; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s
95 %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f)
96 ret <vscale x 4 x i16> %x
99 define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
100 ; CHECK-LABEL: test_signed_v8f32_v8i16:
102 ; CHECK-NEXT: ptrue p0.s
103 ; CHECK-NEXT: mov w8, #65280 // =0xff00
104 ; CHECK-NEXT: movk w8, #18303, lsl #16
105 ; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, #0.0
106 ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, #0.0
107 ; CHECK-NEXT: mov z2.s, w8
108 ; CHECK-NEXT: movprfx z3, z1
109 ; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s
110 ; CHECK-NEXT: movprfx z4, z0
111 ; CHECK-NEXT: fcvtzu z4.s, p0/m, z0.s
112 ; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z2.s
113 ; CHECK-NEXT: not p1.b, p0/z, p1.b
114 ; CHECK-NEXT: not p2.b, p0/z, p2.b
115 ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s
116 ; CHECK-NEXT: mov z0.s, #65535 // =0xffff
117 ; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0
118 ; CHECK-NEXT: mov z4.s, p2/m, #0 // =0x0
119 ; CHECK-NEXT: sel z1.s, p3, z0.s, z3.s
120 ; CHECK-NEXT: sel z0.s, p0, z0.s, z4.s
121 ; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
123 %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float> %f)
124 ret <vscale x 8 x i16> %x
127 define <vscale x 2 x i64> @test_signed_v2f32_v2i64(<vscale x 2 x float> %f) {
128 ; CHECK-LABEL: test_signed_v2f32_v2i64:
130 ; CHECK-NEXT: ptrue p0.d
131 ; CHECK-NEXT: mov w8, #1602224127 // =0x5f7fffff
132 ; CHECK-NEXT: mov z2.s, w8
133 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
134 ; CHECK-NEXT: movprfx z1, z0
135 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.s
136 ; CHECK-NEXT: not p1.b, p0/z, p1.b
137 ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s
138 ; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0
139 ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff
140 ; CHECK-NEXT: mov z0.d, z1.d
142 %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f32.nxv2i64(<vscale x 2 x float> %f)
143 ret <vscale x 2 x i64> %x
146 define <vscale x 4 x i64> @test_signed_v4f32_v4i64(<vscale x 4 x float> %f) {
147 ; CHECK-LABEL: test_signed_v4f32_v4i64:
149 ; CHECK-NEXT: uunpklo z2.d, z0.s
150 ; CHECK-NEXT: uunpkhi z3.d, z0.s
151 ; CHECK-NEXT: mov w8, #1602224127 // =0x5f7fffff
152 ; CHECK-NEXT: ptrue p0.d
153 ; CHECK-NEXT: mov z4.s, w8
154 ; CHECK-NEXT: fcmge p1.s, p0/z, z2.s, #0.0
155 ; CHECK-NEXT: fcmge p2.s, p0/z, z3.s, #0.0
156 ; CHECK-NEXT: movprfx z0, z2
157 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.s
158 ; CHECK-NEXT: movprfx z1, z3
159 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.s
160 ; CHECK-NEXT: fcmgt p3.s, p0/z, z2.s, z4.s
161 ; CHECK-NEXT: not p1.b, p0/z, p1.b
162 ; CHECK-NEXT: not p2.b, p0/z, p2.b
163 ; CHECK-NEXT: fcmgt p0.s, p0/z, z3.s, z4.s
164 ; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0
165 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0
166 ; CHECK-NEXT: mov z0.d, p3/m, #-1 // =0xffffffffffffffff
167 ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff
169 %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f32.nxv4i64(<vscale x 4 x float> %f)
170 ret <vscale x 4 x i64> %x
175 declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double>)
176 declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double>)
177 declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double>)
178 declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double>)
179 declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double>)
180 declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f64.nxv2i64(<vscale x 2 x double>)
181 declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f64.nxv4i64(<vscale x 4 x double>)
183 define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) {
184 ; CHECK-LABEL: test_signed_v2f64_v2i32:
186 ; CHECK-NEXT: ptrue p0.d
187 ; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000
188 ; CHECK-NEXT: movk x8, #16879, lsl #48
189 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0
190 ; CHECK-NEXT: mov z1.d, x8
191 ; CHECK-NEXT: movprfx z2, z0
192 ; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d
193 ; CHECK-NEXT: not p1.b, p0/z, p1.b
194 ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z1.d
195 ; CHECK-NEXT: mov z0.d, #0xffffffff
196 ; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0
197 ; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
199 %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double> %f)
200 ret <vscale x 2 x i32> %x
203 define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
204 ; CHECK-LABEL: test_signed_v4f64_v4i32:
206 ; CHECK-NEXT: ptrue p0.d
207 ; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000
208 ; CHECK-NEXT: movk x8, #16879, lsl #48
209 ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
210 ; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
211 ; CHECK-NEXT: mov z2.d, x8
212 ; CHECK-NEXT: movprfx z3, z1
213 ; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d
214 ; CHECK-NEXT: movprfx z4, z0
215 ; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d
216 ; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z2.d
217 ; CHECK-NEXT: not p1.b, p0/z, p1.b
218 ; CHECK-NEXT: not p2.b, p0/z, p2.b
219 ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d
220 ; CHECK-NEXT: mov z0.d, #0xffffffff
221 ; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0
222 ; CHECK-NEXT: mov z4.d, p2/m, #0 // =0x0
223 ; CHECK-NEXT: sel z1.d, p3, z0.d, z3.d
224 ; CHECK-NEXT: sel z0.d, p0, z0.d, z4.d
225 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
227 %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double> %f)
228 ret <vscale x 4 x i32> %x
231 define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
232 ; CHECK-LABEL: test_signed_v8f64_v8i32:
234 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
235 ; CHECK-NEXT: addvl sp, sp, #-1
236 ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
237 ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
238 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
239 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
240 ; CHECK-NEXT: .cfi_offset w29, -16
241 ; CHECK-NEXT: ptrue p0.d
242 ; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000
243 ; CHECK-NEXT: movk x8, #16879, lsl #48
244 ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
245 ; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
246 ; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, #0.0
247 ; CHECK-NEXT: fcmge p4.d, p0/z, z2.d, #0.0
248 ; CHECK-NEXT: movprfx z5, z1
249 ; CHECK-NEXT: fcvtzu z5.d, p0/m, z1.d
250 ; CHECK-NEXT: mov z4.d, x8
251 ; CHECK-NEXT: movprfx z6, z0
252 ; CHECK-NEXT: fcvtzu z6.d, p0/m, z0.d
253 ; CHECK-NEXT: movprfx z7, z3
254 ; CHECK-NEXT: fcvtzu z7.d, p0/m, z3.d
255 ; CHECK-NEXT: movprfx z24, z2
256 ; CHECK-NEXT: fcvtzu z24.d, p0/m, z2.d
257 ; CHECK-NEXT: not p1.b, p0/z, p1.b
258 ; CHECK-NEXT: fcmgt p5.d, p0/z, z1.d, z4.d
259 ; CHECK-NEXT: fcmgt p6.d, p0/z, z0.d, z4.d
260 ; CHECK-NEXT: not p2.b, p0/z, p2.b
261 ; CHECK-NEXT: mov z0.d, #0xffffffff
262 ; CHECK-NEXT: not p3.b, p0/z, p3.b
263 ; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0
264 ; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z4.d
265 ; CHECK-NEXT: not p4.b, p0/z, p4.b
266 ; CHECK-NEXT: fcmgt p0.d, p0/z, z2.d, z4.d
267 ; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0
268 ; CHECK-NEXT: mov z7.d, p3/m, #0 // =0x0
269 ; CHECK-NEXT: mov z24.d, p4/m, #0 // =0x0
270 ; CHECK-NEXT: sel z1.d, p5, z0.d, z5.d
271 ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
272 ; CHECK-NEXT: sel z2.d, p6, z0.d, z6.d
273 ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
274 ; CHECK-NEXT: sel z3.d, p1, z0.d, z7.d
275 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
276 ; CHECK-NEXT: sel z4.d, p0, z0.d, z24.d
277 ; CHECK-NEXT: uzp1 z0.s, z2.s, z1.s
278 ; CHECK-NEXT: uzp1 z1.s, z4.s, z3.s
279 ; CHECK-NEXT: addvl sp, sp, #1
280 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
282 %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double> %f)
283 ret <vscale x 8 x i32> %x
286 define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
287 ; CHECK-LABEL: test_signed_v4f64_v4i16:
289 ; CHECK-NEXT: ptrue p0.d
290 ; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000
291 ; CHECK-NEXT: movk x8, #16623, lsl #48
292 ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
293 ; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
294 ; CHECK-NEXT: mov z2.d, x8
295 ; CHECK-NEXT: movprfx z3, z1
296 ; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d
297 ; CHECK-NEXT: movprfx z4, z0
298 ; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d
299 ; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z2.d
300 ; CHECK-NEXT: not p1.b, p0/z, p1.b
301 ; CHECK-NEXT: not p2.b, p0/z, p2.b
302 ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d
303 ; CHECK-NEXT: mov z0.d, #65535 // =0xffff
304 ; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0
305 ; CHECK-NEXT: mov z4.d, p2/m, #0 // =0x0
306 ; CHECK-NEXT: sel z1.d, p3, z0.d, z3.d
307 ; CHECK-NEXT: sel z0.d, p0, z0.d, z4.d
308 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
310 %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f)
311 ret <vscale x 4 x i16> %x
314 define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
315 ; CHECK-LABEL: test_signed_v8f64_v8i16:
317 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
318 ; CHECK-NEXT: addvl sp, sp, #-1
319 ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
320 ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
321 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
322 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
323 ; CHECK-NEXT: .cfi_offset w29, -16
324 ; CHECK-NEXT: ptrue p0.d
325 ; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000
326 ; CHECK-NEXT: movk x8, #16623, lsl #48
327 ; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, #0.0
328 ; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, #0.0
329 ; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, #0.0
330 ; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, #0.0
331 ; CHECK-NEXT: movprfx z5, z3
332 ; CHECK-NEXT: fcvtzu z5.d, p0/m, z3.d
333 ; CHECK-NEXT: mov z4.d, x8
334 ; CHECK-NEXT: movprfx z6, z2
335 ; CHECK-NEXT: fcvtzu z6.d, p0/m, z2.d
336 ; CHECK-NEXT: movprfx z7, z1
337 ; CHECK-NEXT: fcvtzu z7.d, p0/m, z1.d
338 ; CHECK-NEXT: movprfx z24, z0
339 ; CHECK-NEXT: fcvtzu z24.d, p0/m, z0.d
340 ; CHECK-NEXT: not p1.b, p0/z, p1.b
341 ; CHECK-NEXT: fcmgt p5.d, p0/z, z3.d, z4.d
342 ; CHECK-NEXT: fcmgt p6.d, p0/z, z2.d, z4.d
343 ; CHECK-NEXT: not p2.b, p0/z, p2.b
344 ; CHECK-NEXT: mov z2.d, #65535 // =0xffff
345 ; CHECK-NEXT: not p3.b, p0/z, p3.b
346 ; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0
347 ; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d
348 ; CHECK-NEXT: not p4.b, p0/z, p4.b
349 ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d
350 ; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0
351 ; CHECK-NEXT: mov z7.d, p3/m, #0 // =0x0
352 ; CHECK-NEXT: mov z24.d, p4/m, #0 // =0x0
353 ; CHECK-NEXT: sel z0.d, p5, z2.d, z5.d
354 ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
355 ; CHECK-NEXT: sel z1.d, p6, z2.d, z6.d
356 ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
357 ; CHECK-NEXT: sel z3.d, p1, z2.d, z7.d
358 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
359 ; CHECK-NEXT: sel z2.d, p0, z2.d, z24.d
360 ; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
361 ; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s
362 ; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
363 ; CHECK-NEXT: addvl sp, sp, #1
364 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
366 %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f)
367 ret <vscale x 8 x i16> %x
370 define <vscale x 2 x i64> @test_signed_v2f64_v2i64(<vscale x 2 x double> %f) {
371 ; CHECK-LABEL: test_signed_v2f64_v2i64:
373 ; CHECK-NEXT: ptrue p0.d
374 ; CHECK-NEXT: mov x8, #4895412794951729151 // =0x43efffffffffffff
375 ; CHECK-NEXT: mov z2.d, x8
376 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0
377 ; CHECK-NEXT: movprfx z1, z0
378 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d
379 ; CHECK-NEXT: not p1.b, p0/z, p1.b
380 ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d
381 ; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0
382 ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff
383 ; CHECK-NEXT: mov z0.d, z1.d
385 %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f64.nxv2i64(<vscale x 2 x double> %f)
386 ret <vscale x 2 x i64> %x
389 define <vscale x 4 x i64> @test_signed_v4f64_v4i64(<vscale x 4 x double> %f) {
390 ; CHECK-LABEL: test_signed_v4f64_v4i64:
392 ; CHECK-NEXT: ptrue p0.d
393 ; CHECK-NEXT: mov x8, #4895412794951729151 // =0x43efffffffffffff
394 ; CHECK-NEXT: mov z4.d, x8
395 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0
396 ; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0
397 ; CHECK-NEXT: movprfx z2, z0
398 ; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d
399 ; CHECK-NEXT: movprfx z3, z1
400 ; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d
401 ; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z4.d
402 ; CHECK-NEXT: not p1.b, p0/z, p1.b
403 ; CHECK-NEXT: not p2.b, p0/z, p2.b
404 ; CHECK-NEXT: fcmgt p0.d, p0/z, z1.d, z4.d
405 ; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0
406 ; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0
407 ; CHECK-NEXT: mov z2.d, p3/m, #-1 // =0xffffffffffffffff
408 ; CHECK-NEXT: mov z3.d, p0/m, #-1 // =0xffffffffffffffff
409 ; CHECK-NEXT: mov z0.d, z2.d
410 ; CHECK-NEXT: mov z1.d, z3.d
412 %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f64.nxv4i64(<vscale x 4 x double> %f)
413 ret <vscale x 4 x i64> %x
419 declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f16.nxv2i32(<vscale x 2 x half>)
420 declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f16.nxv4i32(<vscale x 4 x half>)
421 declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f16.nxv8i32(<vscale x 8 x half>)
422 declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f16.nxv4i16(<vscale x 4 x half>)
423 declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f16.nxv8i16(<vscale x 8 x half>)
424 declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f16.nxv2i64(<vscale x 2 x half>)
425 declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f16.nxv4i64(<vscale x 4 x half>)
427 define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
428 ; CHECK-LABEL: test_signed_v2f16_v2i32:
430 ; CHECK-NEXT: ptrue p0.d
431 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
432 ; CHECK-NEXT: mov z1.h, w8
433 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
434 ; CHECK-NEXT: movprfx z2, z0
435 ; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.h
436 ; CHECK-NEXT: not p1.b, p0/z, p1.b
437 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h
438 ; CHECK-NEXT: mov z0.d, #0xffffffff
439 ; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0
440 ; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
442 %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f16.nxv2i32(<vscale x 2 x half> %f)
443 ret <vscale x 2 x i32> %x
446 define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
447 ; CHECK-LABEL: test_signed_v4f16_v4i32:
449 ; CHECK-NEXT: ptrue p0.s
450 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
451 ; CHECK-NEXT: mov z2.h, w8
452 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
453 ; CHECK-NEXT: movprfx z1, z0
454 ; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.h
455 ; CHECK-NEXT: not p1.b, p0/z, p1.b
456 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h
457 ; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
458 ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff
459 ; CHECK-NEXT: mov z0.d, z1.d
461 %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f16.nxv4i32(<vscale x 4 x half> %f)
462 ret <vscale x 4 x i32> %x
465 define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
466 ; CHECK-LABEL: test_signed_v8f16_v8i32:
468 ; CHECK-NEXT: uunpklo z2.s, z0.h
469 ; CHECK-NEXT: uunpkhi z3.s, z0.h
470 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
471 ; CHECK-NEXT: ptrue p0.s
472 ; CHECK-NEXT: mov z4.h, w8
473 ; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0
474 ; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0
475 ; CHECK-NEXT: movprfx z0, z2
476 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.h
477 ; CHECK-NEXT: movprfx z1, z3
478 ; CHECK-NEXT: fcvtzu z1.s, p0/m, z3.h
479 ; CHECK-NEXT: fcmgt p3.h, p0/z, z2.h, z4.h
480 ; CHECK-NEXT: not p1.b, p0/z, p1.b
481 ; CHECK-NEXT: not p2.b, p0/z, p2.b
482 ; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h
483 ; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0
484 ; CHECK-NEXT: mov z1.s, p2/m, #0 // =0x0
485 ; CHECK-NEXT: mov z0.s, p3/m, #-1 // =0xffffffffffffffff
486 ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff
488 %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f16.nxv8i32(<vscale x 8 x half> %f)
489 ret <vscale x 8 x i32> %x
492 define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
493 ; CHECK-LABEL: test_signed_v4f16_v4i16:
495 ; CHECK-NEXT: ptrue p0.s
496 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
497 ; CHECK-NEXT: mov z1.h, w8
498 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
499 ; CHECK-NEXT: movprfx z2, z0
500 ; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.h
501 ; CHECK-NEXT: not p1.b, p0/z, p1.b
502 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h
503 ; CHECK-NEXT: mov z0.s, #65535 // =0xffff
504 ; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0
505 ; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s
507 %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f16.nxv4i16(<vscale x 4 x half> %f)
508 ret <vscale x 4 x i16> %x
511 define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
512 ; CHECK-LABEL: test_signed_v8f16_v8i16:
514 ; CHECK-NEXT: ptrue p0.h
515 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
516 ; CHECK-NEXT: mov z2.h, w8
517 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
518 ; CHECK-NEXT: movprfx z1, z0
519 ; CHECK-NEXT: fcvtzu z1.h, p0/m, z0.h
520 ; CHECK-NEXT: not p1.b, p0/z, p1.b
521 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h
522 ; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0
523 ; CHECK-NEXT: mov z1.h, p0/m, #-1 // =0xffffffffffffffff
524 ; CHECK-NEXT: mov z0.d, z1.d
526 %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f16.nxv8i16(<vscale x 8 x half> %f)
527 ret <vscale x 8 x i16> %x
530 define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
531 ; CHECK-LABEL: test_signed_v2f16_v2i64:
533 ; CHECK-NEXT: ptrue p0.d
534 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
535 ; CHECK-NEXT: mov z2.h, w8
536 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
537 ; CHECK-NEXT: movprfx z1, z0
538 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.h
539 ; CHECK-NEXT: not p1.b, p0/z, p1.b
540 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h
541 ; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0
542 ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff
543 ; CHECK-NEXT: mov z0.d, z1.d
545 %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f)
546 ret <vscale x 2 x i64> %x
549 define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
550 ; CHECK-LABEL: test_signed_v4f16_v4i64:
552 ; CHECK-NEXT: uunpklo z2.d, z0.s
553 ; CHECK-NEXT: uunpkhi z3.d, z0.s
554 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
555 ; CHECK-NEXT: ptrue p0.d
556 ; CHECK-NEXT: mov z4.h, w8
557 ; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0
558 ; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0
559 ; CHECK-NEXT: movprfx z0, z2
560 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.h
561 ; CHECK-NEXT: movprfx z1, z3
562 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.h
563 ; CHECK-NEXT: fcmgt p3.h, p0/z, z2.h, z4.h
564 ; CHECK-NEXT: not p1.b, p0/z, p1.b
565 ; CHECK-NEXT: not p2.b, p0/z, p2.b
566 ; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h
567 ; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0
568 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0
569 ; CHECK-NEXT: mov z0.d, p3/m, #-1 // =0xffffffffffffffff
570 ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff
572 %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f16.nxv4i64(<vscale x 4 x half> %f)
573 ret <vscale x 4 x i64> %x