1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s
6 declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f32.nxv2i32(<vscale x 2 x float>)
7 declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f32.nxv4i32(<vscale x 4 x float>)
8 declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f32.nxv8i32(<vscale x 8 x float>)
9 declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float>)
10 declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float>)
11 declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f32.nxv2i64(<vscale x 2 x float>)
12 declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f32.nxv4i64(<vscale x 4 x float>)
14 define <vscale x 2 x i32> @test_signed_v2f32_v2i32(<vscale x 2 x float> %f) {
15 ; CHECK-LABEL: test_signed_v2f32_v2i32:
17 ; CHECK-NEXT: ptrue p0.d
18 ; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff
19 ; CHECK-NEXT: movprfx z2, z0
20 ; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.s
21 ; CHECK-NEXT: mov z1.s, w8
22 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
23 ; CHECK-NEXT: not p1.b, p0/z, p1.b
24 ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
25 ; CHECK-NEXT: mov z0.d, #0xffffffff
26 ; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0
27 ; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
29 %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f32.nxv2i32(<vscale x 2 x float> %f)
30 ret <vscale x 2 x i32> %x
33 define <vscale x 4 x i32> @test_signed_v4f32_v4i32(<vscale x 4 x float> %f) {
34 ; CHECK-LABEL: test_signed_v4f32_v4i32:
36 ; CHECK-NEXT: ptrue p0.s
37 ; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff
38 ; CHECK-NEXT: movprfx z1, z0
39 ; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.s
40 ; CHECK-NEXT: mov z2.s, w8
41 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
42 ; CHECK-NEXT: not p1.b, p0/z, p1.b
43 ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s
44 ; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
45 ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff
46 ; CHECK-NEXT: mov z0.d, z1.d
48 %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f32.nxv4i32(<vscale x 4 x float> %f)
49 ret <vscale x 4 x i32> %x
52 define <vscale x 8 x i32> @test_signed_v8f32_v8i32(<vscale x 8 x float> %f) {
53 ; CHECK-LABEL: test_signed_v8f32_v8i32:
55 ; CHECK-NEXT: ptrue p0.s
56 ; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff
57 ; CHECK-NEXT: movprfx z2, z0
58 ; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s
59 ; CHECK-NEXT: mov z4.s, w8
60 ; CHECK-NEXT: movprfx z3, z1
61 ; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s
62 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
63 ; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0
64 ; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z4.s
65 ; CHECK-NEXT: not p1.b, p0/z, p1.b
66 ; CHECK-NEXT: not p2.b, p0/z, p2.b
67 ; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z4.s
68 ; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0
69 ; CHECK-NEXT: mov z3.s, p2/m, #0 // =0x0
70 ; CHECK-NEXT: mov z2.s, p3/m, #-1 // =0xffffffffffffffff
71 ; CHECK-NEXT: mov z3.s, p0/m, #-1 // =0xffffffffffffffff
72 ; CHECK-NEXT: mov z0.d, z2.d
73 ; CHECK-NEXT: mov z1.d, z3.d
75 %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f32.nxv8i32(<vscale x 8 x float> %f)
76 ret <vscale x 8 x i32> %x
79 define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
80 ; CHECK-LABEL: test_signed_v4f32_v4i16:
82 ; CHECK-NEXT: ptrue p0.s
83 ; CHECK-NEXT: mov w8, #65280 // =0xff00
84 ; CHECK-NEXT: movprfx z2, z0
85 ; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s
86 ; CHECK-NEXT: movk w8, #18303, lsl #16
87 ; CHECK-NEXT: mov z1.s, w8
88 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
89 ; CHECK-NEXT: not p1.b, p0/z, p1.b
90 ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
91 ; CHECK-NEXT: mov z0.s, #65535 // =0xffff
92 ; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0
93 ; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s
95 %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f)
96 ret <vscale x 4 x i16> %x
99 define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
100 ; CHECK-LABEL: test_signed_v8f32_v8i16:
102 ; CHECK-NEXT: ptrue p0.s
103 ; CHECK-NEXT: mov w8, #65280 // =0xff00
104 ; CHECK-NEXT: movprfx z3, z1
105 ; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s
106 ; CHECK-NEXT: movk w8, #18303, lsl #16
107 ; CHECK-NEXT: movprfx z4, z0
108 ; CHECK-NEXT: fcvtzu z4.s, p0/m, z0.s
109 ; CHECK-NEXT: mov z2.s, w8
110 ; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, #0.0
111 ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, #0.0
112 ; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z2.s
113 ; CHECK-NEXT: not p1.b, p0/z, p1.b
114 ; CHECK-NEXT: not p2.b, p0/z, p2.b
115 ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s
116 ; CHECK-NEXT: mov z0.s, #65535 // =0xffff
117 ; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0
118 ; CHECK-NEXT: mov z4.s, p2/m, #0 // =0x0
119 ; CHECK-NEXT: sel z1.s, p3, z0.s, z3.s
120 ; CHECK-NEXT: sel z0.s, p0, z0.s, z4.s
121 ; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
123 %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float> %f)
124 ret <vscale x 8 x i16> %x
127 define <vscale x 2 x i64> @test_signed_v2f32_v2i64(<vscale x 2 x float> %f) {
128 ; CHECK-LABEL: test_signed_v2f32_v2i64:
130 ; CHECK-NEXT: ptrue p0.d
131 ; CHECK-NEXT: mov w8, #1602224127 // =0x5f7fffff
132 ; CHECK-NEXT: movprfx z1, z0
133 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.s
134 ; CHECK-NEXT: mov z2.s, w8
135 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
136 ; CHECK-NEXT: not p1.b, p0/z, p1.b
137 ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s
138 ; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0
139 ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff
140 ; CHECK-NEXT: mov z0.d, z1.d
142 %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f32.nxv2i64(<vscale x 2 x float> %f)
143 ret <vscale x 2 x i64> %x
146 define <vscale x 4 x i64> @test_signed_v4f32_v4i64(<vscale x 4 x float> %f) {
147 ; CHECK-LABEL: test_signed_v4f32_v4i64:
149 ; CHECK-NEXT: ptrue p0.d
150 ; CHECK-NEXT: uunpklo z2.d, z0.s
151 ; CHECK-NEXT: uunpkhi z3.d, z0.s
152 ; CHECK-NEXT: mov w8, #1602224127 // =0x5f7fffff
153 ; CHECK-NEXT: movprfx z0, z2
154 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.s
155 ; CHECK-NEXT: movprfx z1, z3
156 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.s
157 ; CHECK-NEXT: mov z4.s, w8
158 ; CHECK-NEXT: fcmge p1.s, p0/z, z2.s, #0.0
159 ; CHECK-NEXT: fcmge p2.s, p0/z, z3.s, #0.0
160 ; CHECK-NEXT: fcmgt p3.s, p0/z, z2.s, z4.s
161 ; CHECK-NEXT: not p1.b, p0/z, p1.b
162 ; CHECK-NEXT: not p2.b, p0/z, p2.b
163 ; CHECK-NEXT: fcmgt p0.s, p0/z, z3.s, z4.s
164 ; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0
165 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0
166 ; CHECK-NEXT: mov z0.d, p3/m, #-1 // =0xffffffffffffffff
167 ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff
169 %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f32.nxv4i64(<vscale x 4 x float> %f)
170 ret <vscale x 4 x i64> %x
175 declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double>)
176 declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double>)
177 declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double>)
178 declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double>)
179 declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double>)
180 declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f64.nxv2i64(<vscale x 2 x double>)
181 declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f64.nxv4i64(<vscale x 4 x double>)
183 define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) {
184 ; CHECK-LABEL: test_signed_v2f64_v2i32:
186 ; CHECK-NEXT: ptrue p0.d
187 ; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000
188 ; CHECK-NEXT: movprfx z2, z0
189 ; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d
190 ; CHECK-NEXT: movk x8, #16879, lsl #48
191 ; CHECK-NEXT: mov z1.d, x8
192 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0
193 ; CHECK-NEXT: not p1.b, p0/z, p1.b
194 ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z1.d
195 ; CHECK-NEXT: mov z0.d, #0xffffffff
196 ; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0
197 ; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
199 %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double> %f)
200 ret <vscale x 2 x i32> %x
203 define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
204 ; CHECK-LABEL: test_signed_v4f64_v4i32:
206 ; CHECK-NEXT: ptrue p0.d
207 ; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000
208 ; CHECK-NEXT: movprfx z3, z1
209 ; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d
210 ; CHECK-NEXT: movk x8, #16879, lsl #48
211 ; CHECK-NEXT: movprfx z4, z0
212 ; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d
213 ; CHECK-NEXT: mov z2.d, x8
214 ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
215 ; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
216 ; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z2.d
217 ; CHECK-NEXT: not p1.b, p0/z, p1.b
218 ; CHECK-NEXT: not p2.b, p0/z, p2.b
219 ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d
220 ; CHECK-NEXT: mov z0.d, #0xffffffff
221 ; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0
222 ; CHECK-NEXT: mov z4.d, p2/m, #0 // =0x0
223 ; CHECK-NEXT: sel z1.d, p3, z0.d, z3.d
224 ; CHECK-NEXT: sel z0.d, p0, z0.d, z4.d
225 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
227 %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double> %f)
228 ret <vscale x 4 x i32> %x
231 define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
232 ; CHECK-LABEL: test_signed_v8f64_v8i32:
234 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
235 ; CHECK-NEXT: addvl sp, sp, #-1
236 ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
237 ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
238 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
239 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
240 ; CHECK-NEXT: .cfi_offset w29, -16
241 ; CHECK-NEXT: ptrue p0.d
242 ; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000
243 ; CHECK-NEXT: movprfx z5, z1
244 ; CHECK-NEXT: fcvtzu z5.d, p0/m, z1.d
245 ; CHECK-NEXT: movk x8, #16879, lsl #48
246 ; CHECK-NEXT: movprfx z6, z0
247 ; CHECK-NEXT: fcvtzu z6.d, p0/m, z0.d
248 ; CHECK-NEXT: movprfx z7, z3
249 ; CHECK-NEXT: fcvtzu z7.d, p0/m, z3.d
250 ; CHECK-NEXT: mov z4.d, x8
251 ; CHECK-NEXT: movprfx z24, z2
252 ; CHECK-NEXT: fcvtzu z24.d, p0/m, z2.d
253 ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
254 ; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
255 ; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, #0.0
256 ; CHECK-NEXT: fcmge p4.d, p0/z, z2.d, #0.0
257 ; CHECK-NEXT: fcmgt p5.d, p0/z, z1.d, z4.d
258 ; CHECK-NEXT: fcmgt p6.d, p0/z, z0.d, z4.d
259 ; CHECK-NEXT: mov z0.d, #0xffffffff
260 ; CHECK-NEXT: not p1.b, p0/z, p1.b
261 ; CHECK-NEXT: not p2.b, p0/z, p2.b
262 ; CHECK-NEXT: not p3.b, p0/z, p3.b
263 ; CHECK-NEXT: not p4.b, p0/z, p4.b
264 ; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0
265 ; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z4.d
266 ; CHECK-NEXT: fcmgt p0.d, p0/z, z2.d, z4.d
267 ; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0
268 ; CHECK-NEXT: mov z7.d, p3/m, #0 // =0x0
269 ; CHECK-NEXT: mov z24.d, p4/m, #0 // =0x0
270 ; CHECK-NEXT: sel z1.d, p5, z0.d, z5.d
271 ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
272 ; CHECK-NEXT: sel z2.d, p6, z0.d, z6.d
273 ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
274 ; CHECK-NEXT: sel z3.d, p1, z0.d, z7.d
275 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
276 ; CHECK-NEXT: sel z4.d, p0, z0.d, z24.d
277 ; CHECK-NEXT: uzp1 z0.s, z2.s, z1.s
278 ; CHECK-NEXT: uzp1 z1.s, z4.s, z3.s
279 ; CHECK-NEXT: addvl sp, sp, #1
280 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
282 %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double> %f)
283 ret <vscale x 8 x i32> %x
286 define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
287 ; CHECK-LABEL: test_signed_v4f64_v4i16:
289 ; CHECK-NEXT: ptrue p0.d
290 ; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000
291 ; CHECK-NEXT: movprfx z3, z1
292 ; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d
293 ; CHECK-NEXT: movk x8, #16623, lsl #48
294 ; CHECK-NEXT: movprfx z4, z0
295 ; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d
296 ; CHECK-NEXT: mov z2.d, x8
297 ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
298 ; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
299 ; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z2.d
300 ; CHECK-NEXT: not p1.b, p0/z, p1.b
301 ; CHECK-NEXT: not p2.b, p0/z, p2.b
302 ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d
303 ; CHECK-NEXT: mov z0.d, #65535 // =0xffff
304 ; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0
305 ; CHECK-NEXT: mov z4.d, p2/m, #0 // =0x0
306 ; CHECK-NEXT: sel z1.d, p3, z0.d, z3.d
307 ; CHECK-NEXT: sel z0.d, p0, z0.d, z4.d
308 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
310 %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f)
311 ret <vscale x 4 x i16> %x
314 define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
315 ; CHECK-LABEL: test_signed_v8f64_v8i16:
317 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
318 ; CHECK-NEXT: addvl sp, sp, #-1
319 ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
320 ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
321 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
322 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
323 ; CHECK-NEXT: .cfi_offset w29, -16
324 ; CHECK-NEXT: ptrue p0.d
325 ; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000
326 ; CHECK-NEXT: movprfx z5, z3
327 ; CHECK-NEXT: fcvtzu z5.d, p0/m, z3.d
328 ; CHECK-NEXT: movk x8, #16623, lsl #48
329 ; CHECK-NEXT: movprfx z6, z2
330 ; CHECK-NEXT: fcvtzu z6.d, p0/m, z2.d
331 ; CHECK-NEXT: movprfx z7, z1
332 ; CHECK-NEXT: fcvtzu z7.d, p0/m, z1.d
333 ; CHECK-NEXT: mov z4.d, x8
334 ; CHECK-NEXT: movprfx z24, z0
335 ; CHECK-NEXT: fcvtzu z24.d, p0/m, z0.d
336 ; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, #0.0
337 ; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, #0.0
338 ; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, #0.0
339 ; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, #0.0
340 ; CHECK-NEXT: fcmgt p5.d, p0/z, z3.d, z4.d
341 ; CHECK-NEXT: fcmgt p6.d, p0/z, z2.d, z4.d
342 ; CHECK-NEXT: mov z2.d, #65535 // =0xffff
343 ; CHECK-NEXT: not p1.b, p0/z, p1.b
344 ; CHECK-NEXT: not p2.b, p0/z, p2.b
345 ; CHECK-NEXT: not p3.b, p0/z, p3.b
346 ; CHECK-NEXT: not p4.b, p0/z, p4.b
347 ; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0
348 ; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d
349 ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d
350 ; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0
351 ; CHECK-NEXT: mov z7.d, p3/m, #0 // =0x0
352 ; CHECK-NEXT: mov z24.d, p4/m, #0 // =0x0
353 ; CHECK-NEXT: sel z0.d, p5, z2.d, z5.d
354 ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
355 ; CHECK-NEXT: sel z1.d, p6, z2.d, z6.d
356 ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
357 ; CHECK-NEXT: sel z3.d, p1, z2.d, z7.d
358 ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
359 ; CHECK-NEXT: sel z2.d, p0, z2.d, z24.d
360 ; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
361 ; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s
362 ; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
363 ; CHECK-NEXT: addvl sp, sp, #1
364 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
366 %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f)
367 ret <vscale x 8 x i16> %x
370 define <vscale x 2 x i64> @test_signed_v2f64_v2i64(<vscale x 2 x double> %f) {
371 ; CHECK-LABEL: test_signed_v2f64_v2i64:
373 ; CHECK-NEXT: ptrue p0.d
374 ; CHECK-NEXT: mov x8, #4895412794951729151 // =0x43efffffffffffff
375 ; CHECK-NEXT: movprfx z1, z0
376 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d
377 ; CHECK-NEXT: mov z2.d, x8
378 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0
379 ; CHECK-NEXT: not p1.b, p0/z, p1.b
380 ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d
381 ; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0
382 ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff
383 ; CHECK-NEXT: mov z0.d, z1.d
385 %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f64.nxv2i64(<vscale x 2 x double> %f)
386 ret <vscale x 2 x i64> %x
389 define <vscale x 4 x i64> @test_signed_v4f64_v4i64(<vscale x 4 x double> %f) {
390 ; CHECK-LABEL: test_signed_v4f64_v4i64:
392 ; CHECK-NEXT: ptrue p0.d
393 ; CHECK-NEXT: mov x8, #4895412794951729151 // =0x43efffffffffffff
394 ; CHECK-NEXT: movprfx z2, z0
395 ; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d
396 ; CHECK-NEXT: mov z4.d, x8
397 ; CHECK-NEXT: movprfx z3, z1
398 ; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d
399 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0
400 ; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0
401 ; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z4.d
402 ; CHECK-NEXT: not p1.b, p0/z, p1.b
403 ; CHECK-NEXT: not p2.b, p0/z, p2.b
404 ; CHECK-NEXT: fcmgt p0.d, p0/z, z1.d, z4.d
405 ; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0
406 ; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0
407 ; CHECK-NEXT: mov z2.d, p3/m, #-1 // =0xffffffffffffffff
408 ; CHECK-NEXT: mov z3.d, p0/m, #-1 // =0xffffffffffffffff
409 ; CHECK-NEXT: mov z0.d, z2.d
410 ; CHECK-NEXT: mov z1.d, z3.d
412 %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f64.nxv4i64(<vscale x 4 x double> %f)
413 ret <vscale x 4 x i64> %x
419 declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f16.nxv2i32(<vscale x 2 x half>)
420 declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f16.nxv4i32(<vscale x 4 x half>)
421 declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f16.nxv8i32(<vscale x 8 x half>)
422 declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f16.nxv4i16(<vscale x 4 x half>)
423 declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f16.nxv8i16(<vscale x 8 x half>)
424 declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f16.nxv2i64(<vscale x 2 x half>)
425 declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f16.nxv4i64(<vscale x 4 x half>)
427 define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
428 ; CHECK-LABEL: test_signed_v2f16_v2i32:
430 ; CHECK-NEXT: ptrue p0.d
431 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
432 ; CHECK-NEXT: movprfx z2, z0
433 ; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.h
434 ; CHECK-NEXT: mov z1.h, w8
435 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
436 ; CHECK-NEXT: not p1.b, p0/z, p1.b
437 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h
438 ; CHECK-NEXT: mov z0.d, #0xffffffff
439 ; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0
440 ; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
442 %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f16.nxv2i32(<vscale x 2 x half> %f)
443 ret <vscale x 2 x i32> %x
446 define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
447 ; CHECK-LABEL: test_signed_v4f16_v4i32:
449 ; CHECK-NEXT: ptrue p0.s
450 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
451 ; CHECK-NEXT: movprfx z1, z0
452 ; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.h
453 ; CHECK-NEXT: mov z2.h, w8
454 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
455 ; CHECK-NEXT: not p1.b, p0/z, p1.b
456 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h
457 ; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
458 ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff
459 ; CHECK-NEXT: mov z0.d, z1.d
461 %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f16.nxv4i32(<vscale x 4 x half> %f)
462 ret <vscale x 4 x i32> %x
465 define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
466 ; CHECK-LABEL: test_signed_v8f16_v8i32:
468 ; CHECK-NEXT: ptrue p0.s
469 ; CHECK-NEXT: uunpklo z2.s, z0.h
470 ; CHECK-NEXT: uunpkhi z3.s, z0.h
471 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
472 ; CHECK-NEXT: movprfx z0, z2
473 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.h
474 ; CHECK-NEXT: movprfx z1, z3
475 ; CHECK-NEXT: fcvtzu z1.s, p0/m, z3.h
476 ; CHECK-NEXT: mov z4.h, w8
477 ; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0
478 ; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0
479 ; CHECK-NEXT: fcmgt p3.h, p0/z, z2.h, z4.h
480 ; CHECK-NEXT: not p1.b, p0/z, p1.b
481 ; CHECK-NEXT: not p2.b, p0/z, p2.b
482 ; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h
483 ; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0
484 ; CHECK-NEXT: mov z1.s, p2/m, #0 // =0x0
485 ; CHECK-NEXT: mov z0.s, p3/m, #-1 // =0xffffffffffffffff
486 ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff
488 %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f16.nxv8i32(<vscale x 8 x half> %f)
489 ret <vscale x 8 x i32> %x
492 define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
493 ; CHECK-LABEL: test_signed_v4f16_v4i16:
495 ; CHECK-NEXT: ptrue p0.s
496 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
497 ; CHECK-NEXT: movprfx z2, z0
498 ; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.h
499 ; CHECK-NEXT: mov z1.h, w8
500 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
501 ; CHECK-NEXT: not p1.b, p0/z, p1.b
502 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h
503 ; CHECK-NEXT: mov z0.s, #65535 // =0xffff
504 ; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0
505 ; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s
507 %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f16.nxv4i16(<vscale x 4 x half> %f)
508 ret <vscale x 4 x i16> %x
511 define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
512 ; CHECK-LABEL: test_signed_v8f16_v8i16:
514 ; CHECK-NEXT: ptrue p0.h
515 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
516 ; CHECK-NEXT: movprfx z1, z0
517 ; CHECK-NEXT: fcvtzu z1.h, p0/m, z0.h
518 ; CHECK-NEXT: mov z2.h, w8
519 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
520 ; CHECK-NEXT: not p1.b, p0/z, p1.b
521 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h
522 ; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0
523 ; CHECK-NEXT: mov z1.h, p0/m, #-1 // =0xffffffffffffffff
524 ; CHECK-NEXT: mov z0.d, z1.d
526 %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f16.nxv8i16(<vscale x 8 x half> %f)
527 ret <vscale x 8 x i16> %x
530 define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
531 ; CHECK-LABEL: test_signed_v2f16_v2i64:
533 ; CHECK-NEXT: ptrue p0.d
534 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
535 ; CHECK-NEXT: movprfx z1, z0
536 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.h
537 ; CHECK-NEXT: mov z2.h, w8
538 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
539 ; CHECK-NEXT: not p1.b, p0/z, p1.b
540 ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h
541 ; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0
542 ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff
543 ; CHECK-NEXT: mov z0.d, z1.d
545 %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f)
546 ret <vscale x 2 x i64> %x
549 define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
550 ; CHECK-LABEL: test_signed_v4f16_v4i64:
552 ; CHECK-NEXT: ptrue p0.d
553 ; CHECK-NEXT: uunpklo z2.d, z0.s
554 ; CHECK-NEXT: uunpkhi z3.d, z0.s
555 ; CHECK-NEXT: mov w8, #31743 // =0x7bff
556 ; CHECK-NEXT: movprfx z0, z2
557 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.h
558 ; CHECK-NEXT: movprfx z1, z3
559 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.h
560 ; CHECK-NEXT: mov z4.h, w8
561 ; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0
562 ; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0
563 ; CHECK-NEXT: fcmgt p3.h, p0/z, z2.h, z4.h
564 ; CHECK-NEXT: not p1.b, p0/z, p1.b
565 ; CHECK-NEXT: not p2.b, p0/z, p2.b
566 ; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h
567 ; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0
568 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0
569 ; CHECK-NEXT: mov z0.d, p3/m, #-1 // =0xffffffffffffffff
570 ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff
572 %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f16.nxv4i64(<vscale x 4 x half> %f)
573 ret <vscale x 4 x i64> %x