1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO16
3 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+fullfp16 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
5 define <2 x i32> @test1(<2 x float> %f) {
8 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
10 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
11 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
15 define <4 x i32> @test2(<4 x float> %f) {
18 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #3
20 %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
21 %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32>
25 define <2 x i64> @test3(<2 x double> %d) {
28 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #5
30 %mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00>
31 %vcvt.i = fptosi <2 x double> %mul.i to <2 x i64>
35 ; Truncate double to i32
36 define <2 x i32> @test4(<2 x double> %d) {
39 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #4
40 ; CHECK-NEXT: xtn v0.2s, v0.2d
42 %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
43 %vcvt.i = fptosi <2 x double> %mul.i to <2 x i32>
47 ; Truncate float to i16
48 define <2 x i16> @test5(<2 x float> %f) {
51 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
53 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
54 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i16>
58 ; Don't convert float to i64
59 define <2 x i64> @test6(<2 x float> %f) {
62 ; CHECK-NEXT: fmov v1.2s, #16.00000000
63 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
64 ; CHECK-NEXT: fcvtl v0.2d, v0.2s
65 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d
67 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
68 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i64>
72 define <2 x i32> @test7(<2 x float> %f) {
75 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #4
77 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
78 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
82 ; Test which should not fold due to non-power of 2.
83 define <2 x i32> @test8(<2 x float> %f) {
86 ; CHECK-NEXT: fmov v1.2s, #17.00000000
87 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
88 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s
90 %mul.i = fmul <2 x float> %f, <float 17.000000e+00, float 17.000000e+00>
91 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
95 ; Test which should not fold due to non-matching power of 2.
96 define <2 x i32> @test9(<2 x float> %f) {
99 ; CHECK-NEXT: adrp x8, .LCPI8_0
100 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
101 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
102 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s
104 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00>
105 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
106 ret <2 x i32> %vcvt.i
109 ; Combine all undefs.
110 define <2 x i32> @test10(<2 x float> %f) {
111 ; CHECK-LABEL: test10:
113 ; CHECK-NEXT: mov w8, #2143289344 // =0x7fc00000
114 ; CHECK-NEXT: dup v0.2s, w8
115 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s
117 %mul.i = fmul <2 x float> %f, <float undef, float undef>
118 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
119 ret <2 x i32> %vcvt.i
122 ; Combine if mix of undef and pow2.
123 define <2 x i32> @test11(<2 x float> %f) {
124 ; CHECK-LABEL: test11:
126 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #3
128 %mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00>
129 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
130 ret <2 x i32> %vcvt.i
133 ; Don't combine when multiplied by 0.0.
134 define <2 x i32> @test12(<2 x float> %f) {
135 ; CHECK-LABEL: test12:
137 ; CHECK-NEXT: movi d1, #0000000000000000
138 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
139 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s
141 %mul.i = fmul <2 x float> %f, <float 0.000000e+00, float 0.000000e+00>
142 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
143 ret <2 x i32> %vcvt.i
146 ; Test which should not fold due to power of 2 out of range (i.e., 2^33).
147 define <2 x i32> @test13(<2 x float> %f) {
148 ; CHECK-LABEL: test13:
150 ; CHECK-NEXT: movi v1.2s, #80, lsl #24
151 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
152 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s
154 %mul.i = fmul <2 x float> %f, <float 0x4200000000000000, float 0x4200000000000000>
155 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
156 ret <2 x i32> %vcvt.i
159 ; Test case where const is max power of 2 (i.e., 2^32).
160 define <2 x i32> @test14(<2 x float> %f) {
161 ; CHECK-LABEL: test14:
163 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #32
165 %mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000>
166 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
167 ret <2 x i32> %vcvt.i
170 define <3 x i32> @test_illegal_fp_to_int(<3 x float> %in) {
171 ; CHECK-LABEL: test_illegal_fp_to_int:
173 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #2
175 %scale = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0>
176 %val = fptosi <3 x float> %scale to <3 x i32>
180 define <8 x i16> @test_v8f16(<8 x half> %in) {
181 ; CHECK-NO16-LABEL: test_v8f16:
182 ; CHECK-NO16: // %bb.0:
183 ; CHECK-NO16-NEXT: mov h2, v0.h[1]
184 ; CHECK-NO16-NEXT: mov h3, v0.h[4]
185 ; CHECK-NO16-NEXT: mov h4, v0.h[5]
186 ; CHECK-NO16-NEXT: mov h5, v0.h[2]
187 ; CHECK-NO16-NEXT: fcvt s6, h0
188 ; CHECK-NO16-NEXT: mov h7, v0.h[6]
189 ; CHECK-NO16-NEXT: fmov s1, #4.00000000
190 ; CHECK-NO16-NEXT: mov h16, v0.h[3]
191 ; CHECK-NO16-NEXT: mov h0, v0.h[7]
192 ; CHECK-NO16-NEXT: fcvt s2, h2
193 ; CHECK-NO16-NEXT: fcvt s3, h3
194 ; CHECK-NO16-NEXT: fcvt s4, h4
195 ; CHECK-NO16-NEXT: fmul s6, s6, s1
196 ; CHECK-NO16-NEXT: fcvt s5, h5
197 ; CHECK-NO16-NEXT: fcvt s7, h7
198 ; CHECK-NO16-NEXT: fcvt s16, h16
199 ; CHECK-NO16-NEXT: fcvt s0, h0
200 ; CHECK-NO16-NEXT: fmul s2, s2, s1
201 ; CHECK-NO16-NEXT: fmul s3, s3, s1
202 ; CHECK-NO16-NEXT: fmul s4, s4, s1
203 ; CHECK-NO16-NEXT: fmul s5, s5, s1
204 ; CHECK-NO16-NEXT: fcvt h6, s6
205 ; CHECK-NO16-NEXT: fmul s7, s7, s1
206 ; CHECK-NO16-NEXT: fmul s16, s16, s1
207 ; CHECK-NO16-NEXT: fmul s0, s0, s1
208 ; CHECK-NO16-NEXT: fcvt h2, s2
209 ; CHECK-NO16-NEXT: fcvt h3, s3
210 ; CHECK-NO16-NEXT: fcvt h4, s4
211 ; CHECK-NO16-NEXT: fcvt h5, s5
212 ; CHECK-NO16-NEXT: fcvt h1, s7
213 ; CHECK-NO16-NEXT: fcvt h0, s0
214 ; CHECK-NO16-NEXT: mov v6.h[1], v2.h[0]
215 ; CHECK-NO16-NEXT: fcvt h2, s16
216 ; CHECK-NO16-NEXT: mov v3.h[1], v4.h[0]
217 ; CHECK-NO16-NEXT: mov v6.h[2], v5.h[0]
218 ; CHECK-NO16-NEXT: mov v3.h[2], v1.h[0]
219 ; CHECK-NO16-NEXT: mov v6.h[3], v2.h[0]
220 ; CHECK-NO16-NEXT: mov v3.h[3], v0.h[0]
221 ; CHECK-NO16-NEXT: fcvtl v1.4s, v6.4h
222 ; CHECK-NO16-NEXT: fcvtl v0.4s, v3.4h
223 ; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s
224 ; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s
225 ; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h
226 ; CHECK-NO16-NEXT: ret
228 ; CHECK-FP16-LABEL: test_v8f16:
229 ; CHECK-FP16: // %bb.0:
230 ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h, #2
231 ; CHECK-FP16-NEXT: ret
232 %scale = fmul <8 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0>
233 %val = fptosi <8 x half> %scale to <8 x i16>
237 define <4 x i16> @test_v4f16(<4 x half> %in) {
238 ; CHECK-NO16-LABEL: test_v4f16:
239 ; CHECK-NO16: // %bb.0:
240 ; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000
241 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
242 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s
243 ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s
244 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
245 ; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s
246 ; CHECK-NO16-NEXT: xtn v0.4h, v0.4s
247 ; CHECK-NO16-NEXT: ret
249 ; CHECK-FP16-LABEL: test_v4f16:
250 ; CHECK-FP16: // %bb.0:
251 ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h, #2
252 ; CHECK-FP16-NEXT: ret
253 %scale = fmul <4 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0>
254 %val = fptoui <4 x half> %scale to <4 x i16>
258 define <4 x i32> @test_v4f16_i32(<4 x half> %in) {
259 ; CHECK-NO16-LABEL: test_v4f16_i32:
260 ; CHECK-NO16: // %bb.0:
261 ; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000
262 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
263 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s
264 ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s
265 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
266 ; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s
267 ; CHECK-NO16-NEXT: ret
269 ; CHECK-FP16-LABEL: test_v4f16_i32:
270 ; CHECK-FP16: // %bb.0:
271 ; CHECK-FP16-NEXT: movi v1.4h, #68, lsl #8
272 ; CHECK-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h
273 ; CHECK-FP16-NEXT: fcvtl v0.4s, v0.4h
274 ; CHECK-FP16-NEXT: fcvtzs v0.4s, v0.4s
275 ; CHECK-FP16-NEXT: ret
276 %scale = fmul <4 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0>
277 %val = fptosi <4 x half> %scale to <4 x i32>
282 declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float>)
283 declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float>)
284 declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double>)
285 declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double>)
286 declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float>)
287 declare <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float>)
288 declare <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float>)
289 declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float>)
290 declare <3 x i32> @llvm.fptosi.sat.v3i32.v3f32(<3 x float>)
291 declare <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half>)
292 declare <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half>)
293 declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half>)
294 declare <4 x i24> @llvm.fptoui.sat.v4i24.v4f32(<4 x float>)
296 define <2 x i32> @test1_sat(<2 x float> %f) {
297 ; CHECK-LABEL: test1_sat:
299 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
301 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
302 %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
303 ret <2 x i32> %vcvt.i
306 define <4 x i32> @test2_sat(<4 x float> %f) {
307 ; CHECK-LABEL: test2_sat:
309 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #3
311 %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
312 %vcvt.i = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %mul.i)
313 ret <4 x i32> %vcvt.i
316 define <2 x i64> @test3_sat(<2 x double> %d) {
317 ; CHECK-LABEL: test3_sat:
319 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #5
321 %mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00>
322 %vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %mul.i)
323 ret <2 x i64> %vcvt.i
326 ; Truncate double to i32
327 define <2 x i32> @test4_sat(<2 x double> %d) {
328 ; CHECK-LABEL: test4_sat:
330 ; CHECK-NEXT: fmov v1.2d, #16.00000000
331 ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
332 ; CHECK-NEXT: mov d1, v0.d[1]
333 ; CHECK-NEXT: fcvtzs w8, d0
334 ; CHECK-NEXT: fcvtzs w9, d1
335 ; CHECK-NEXT: fmov s0, w8
336 ; CHECK-NEXT: mov v0.s[1], w9
337 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
339 %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
340 %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %mul.i)
341 ret <2 x i32> %vcvt.i
344 ; Truncate float to i16
345 define <2 x i16> @test5_sat(<2 x float> %f) {
346 ; CHECK-LABEL: test5_sat:
348 ; CHECK-NEXT: movi v1.2s, #127, msl #8
349 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
350 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
351 ; CHECK-NEXT: mvni v1.2s, #127, msl #8
352 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
354 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
355 %vcvt.i = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> %mul.i)
356 ret <2 x i16> %vcvt.i
359 ; Truncate float to i16
360 define <4 x i16> @test5l_sat(<4 x float> %f) {
361 ; CHECK-LABEL: test5l_sat:
363 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #4
364 ; CHECK-NEXT: sqxtn v0.4h, v0.4s
366 %mul.i = fmul <4 x float> %f, <float 16.000000e+00, float 16.000000e+00, float 16.000000e+00, float 16.000000e+00>
367 %vcvt.i = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> %mul.i)
368 ret <4 x i16> %vcvt.i
371 ; Don't convert float to i64
372 define <2 x i64> @test6_sat(<2 x float> %f) {
373 ; CHECK-LABEL: test6_sat:
375 ; CHECK-NEXT: fmov v1.2s, #16.00000000
376 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
377 ; CHECK-NEXT: mov s1, v0.s[1]
378 ; CHECK-NEXT: fcvtzs x8, s0
379 ; CHECK-NEXT: fcvtzs x9, s1
380 ; CHECK-NEXT: fmov d0, x8
381 ; CHECK-NEXT: mov v0.d[1], x9
383 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
384 %vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %mul.i)
385 ret <2 x i64> %vcvt.i
388 define <2 x i32> @test7_sat(<2 x float> %f) {
389 ; CHECK-LABEL: test7_sat:
391 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #4
393 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
394 %vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
395 ret <2 x i32> %vcvt.i
398 ; Test which should not fold due to non-power of 2.
399 define <2 x i32> @test8_sat(<2 x float> %f) {
400 ; CHECK-LABEL: test8_sat:
402 ; CHECK-NEXT: fmov v1.2s, #17.00000000
403 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
404 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s
406 %mul.i = fmul <2 x float> %f, <float 17.000000e+00, float 17.000000e+00>
407 %vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
408 ret <2 x i32> %vcvt.i
411 ; Test which should not fold due to non-matching power of 2.
412 define <2 x i32> @test9_sat(<2 x float> %f) {
413 ; CHECK-LABEL: test9_sat:
415 ; CHECK-NEXT: adrp x8, .LCPI27_0
416 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI27_0]
417 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
418 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s
420 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00>
421 %vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
422 ret <2 x i32> %vcvt.i
425 ; Combine all undefs.
426 define <2 x i32> @test10_sat(<2 x float> %f) {
427 ; CHECK-LABEL: test10_sat:
429 ; CHECK-NEXT: mov w8, #2143289344 // =0x7fc00000
430 ; CHECK-NEXT: dup v0.2s, w8
431 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s
433 %mul.i = fmul <2 x float> %f, <float undef, float undef>
434 %vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
435 ret <2 x i32> %vcvt.i
438 ; Combine if mix of undef and pow2.
439 define <2 x i32> @test11_sat(<2 x float> %f) {
440 ; CHECK-LABEL: test11_sat:
442 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #3
444 %mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00>
445 %vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
446 ret <2 x i32> %vcvt.i
449 ; Don't combine when multiplied by 0.0.
450 define <2 x i32> @test12_sat(<2 x float> %f) {
451 ; CHECK-LABEL: test12_sat:
453 ; CHECK-NEXT: movi d1, #0000000000000000
454 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
455 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s
457 %mul.i = fmul <2 x float> %f, <float 0.000000e+00, float 0.000000e+00>
458 %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
459 ret <2 x i32> %vcvt.i
462 ; Test which should not fold due to power of 2 out of range (i.e., 2^33).
463 define <2 x i32> @test13_sat(<2 x float> %f) {
464 ; CHECK-LABEL: test13_sat:
466 ; CHECK-NEXT: movi v1.2s, #80, lsl #24
467 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
468 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s
470 %mul.i = fmul <2 x float> %f, <float 0x4200000000000000, float 0x4200000000000000>
471 %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
472 ret <2 x i32> %vcvt.i
475 ; Test case where const is max power of 2 (i.e., 2^32).
476 define <2 x i32> @test14_sat(<2 x float> %f) {
477 ; CHECK-LABEL: test14_sat:
479 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #32
481 %mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000>
482 %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
483 ret <2 x i32> %vcvt.i
486 define <3 x i32> @test_illegal_fp_to_int_sat_sat(<3 x float> %in) {
487 ; CHECK-LABEL: test_illegal_fp_to_int_sat_sat:
489 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #2
491 %mul.i = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0>
492 %vcvt.i = call <3 x i32> @llvm.fptosi.sat.v3i32.v3f32(<3 x float> %mul.i)
493 ret <3 x i32> %vcvt.i
496 define <8 x i16> @test_v8f16_sat(<8 x half> %in) {
497 ; CHECK-NO16-LABEL: test_v8f16_sat:
498 ; CHECK-NO16: // %bb.0:
499 ; CHECK-NO16-NEXT: mov h2, v0.h[4]
500 ; CHECK-NO16-NEXT: mov h3, v0.h[5]
501 ; CHECK-NO16-NEXT: mov w9, #32767 // =0x7fff
502 ; CHECK-NO16-NEXT: mov h4, v0.h[6]
503 ; CHECK-NO16-NEXT: fmov s1, #4.00000000
504 ; CHECK-NO16-NEXT: mov w11, #-32768 // =0xffff8000
505 ; CHECK-NO16-NEXT: mov h5, v0.h[7]
506 ; CHECK-NO16-NEXT: mov h6, v0.h[1]
507 ; CHECK-NO16-NEXT: mov h7, v0.h[2]
508 ; CHECK-NO16-NEXT: fcvt s16, h0
509 ; CHECK-NO16-NEXT: mov h0, v0.h[3]
510 ; CHECK-NO16-NEXT: fcvt s2, h2
511 ; CHECK-NO16-NEXT: fcvt s3, h3
512 ; CHECK-NO16-NEXT: fcvt s4, h4
513 ; CHECK-NO16-NEXT: fcvt s5, h5
514 ; CHECK-NO16-NEXT: fcvt s6, h6
515 ; CHECK-NO16-NEXT: fcvt s0, h0
516 ; CHECK-NO16-NEXT: fmul s2, s2, s1
517 ; CHECK-NO16-NEXT: fmul s3, s3, s1
518 ; CHECK-NO16-NEXT: fmul s4, s4, s1
519 ; CHECK-NO16-NEXT: fmul s5, s5, s1
520 ; CHECK-NO16-NEXT: fmul s6, s6, s1
521 ; CHECK-NO16-NEXT: fmul s0, s0, s1
522 ; CHECK-NO16-NEXT: fcvt h2, s2
523 ; CHECK-NO16-NEXT: fcvt h3, s3
524 ; CHECK-NO16-NEXT: fcvt h4, s4
525 ; CHECK-NO16-NEXT: fcvt h5, s5
526 ; CHECK-NO16-NEXT: fcvt h6, s6
527 ; CHECK-NO16-NEXT: fcvt h0, s0
528 ; CHECK-NO16-NEXT: mov v2.h[1], v3.h[0]
529 ; CHECK-NO16-NEXT: fcvt s3, h7
530 ; CHECK-NO16-NEXT: fmul s7, s16, s1
531 ; CHECK-NO16-NEXT: mov v2.h[2], v4.h[0]
532 ; CHECK-NO16-NEXT: fmul s3, s3, s1
533 ; CHECK-NO16-NEXT: fcvt h4, s7
534 ; CHECK-NO16-NEXT: mov v2.h[3], v5.h[0]
535 ; CHECK-NO16-NEXT: fcvt h1, s3
536 ; CHECK-NO16-NEXT: mov v4.h[1], v6.h[0]
537 ; CHECK-NO16-NEXT: fcvtl v2.4s, v2.4h
538 ; CHECK-NO16-NEXT: mov v4.h[2], v1.h[0]
539 ; CHECK-NO16-NEXT: mov s3, v2.s[1]
540 ; CHECK-NO16-NEXT: mov v4.h[3], v0.h[0]
541 ; CHECK-NO16-NEXT: mov s0, v2.s[2]
542 ; CHECK-NO16-NEXT: fcvtzs w10, s2
543 ; CHECK-NO16-NEXT: mov s2, v2.s[3]
544 ; CHECK-NO16-NEXT: fcvtzs w8, s3
545 ; CHECK-NO16-NEXT: fcvtl v1.4s, v4.4h
546 ; CHECK-NO16-NEXT: fcvtzs w12, s0
547 ; CHECK-NO16-NEXT: fcvtzs w13, s2
548 ; CHECK-NO16-NEXT: cmp w8, w9
549 ; CHECK-NO16-NEXT: mov s0, v1.s[1]
550 ; CHECK-NO16-NEXT: fcvtzs w15, s1
551 ; CHECK-NO16-NEXT: csel w8, w8, w9, lt
552 ; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768
553 ; CHECK-NO16-NEXT: csel w8, w8, w11, gt
554 ; CHECK-NO16-NEXT: cmp w10, w9
555 ; CHECK-NO16-NEXT: csel w10, w10, w9, lt
556 ; CHECK-NO16-NEXT: fcvtzs w14, s0
557 ; CHECK-NO16-NEXT: mov s0, v1.s[2]
558 ; CHECK-NO16-NEXT: cmn w10, #8, lsl #12 // =32768
559 ; CHECK-NO16-NEXT: csel w10, w10, w11, gt
560 ; CHECK-NO16-NEXT: cmp w12, w9
561 ; CHECK-NO16-NEXT: csel w12, w12, w9, lt
562 ; CHECK-NO16-NEXT: cmn w12, #8, lsl #12 // =32768
563 ; CHECK-NO16-NEXT: fcvtzs w16, s0
564 ; CHECK-NO16-NEXT: mov s0, v1.s[3]
565 ; CHECK-NO16-NEXT: csel w12, w12, w11, gt
566 ; CHECK-NO16-NEXT: cmp w13, w9
567 ; CHECK-NO16-NEXT: fmov s1, w10
568 ; CHECK-NO16-NEXT: csel w13, w13, w9, lt
569 ; CHECK-NO16-NEXT: cmn w13, #8, lsl #12 // =32768
570 ; CHECK-NO16-NEXT: csel w13, w13, w11, gt
571 ; CHECK-NO16-NEXT: cmp w14, w9
572 ; CHECK-NO16-NEXT: mov v1.s[1], w8
573 ; CHECK-NO16-NEXT: csel w14, w14, w9, lt
574 ; CHECK-NO16-NEXT: fcvtzs w8, s0
575 ; CHECK-NO16-NEXT: cmn w14, #8, lsl #12 // =32768
576 ; CHECK-NO16-NEXT: csel w14, w14, w11, gt
577 ; CHECK-NO16-NEXT: cmp w15, w9
578 ; CHECK-NO16-NEXT: csel w15, w15, w9, lt
579 ; CHECK-NO16-NEXT: mov v1.s[2], w12
580 ; CHECK-NO16-NEXT: cmn w15, #8, lsl #12 // =32768
581 ; CHECK-NO16-NEXT: csel w10, w15, w11, gt
582 ; CHECK-NO16-NEXT: cmp w16, w9
583 ; CHECK-NO16-NEXT: fmov s2, w10
584 ; CHECK-NO16-NEXT: csel w10, w16, w9, lt
585 ; CHECK-NO16-NEXT: cmn w10, #8, lsl #12 // =32768
586 ; CHECK-NO16-NEXT: mov v1.s[3], w13
587 ; CHECK-NO16-NEXT: csel w10, w10, w11, gt
588 ; CHECK-NO16-NEXT: cmp w8, w9
589 ; CHECK-NO16-NEXT: mov v2.s[1], w14
590 ; CHECK-NO16-NEXT: csel w8, w8, w9, lt
591 ; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768
592 ; CHECK-NO16-NEXT: csel w8, w8, w11, gt
593 ; CHECK-NO16-NEXT: mov v2.s[2], w10
594 ; CHECK-NO16-NEXT: mov v2.s[3], w8
595 ; CHECK-NO16-NEXT: uzp1 v0.8h, v2.8h, v1.8h
596 ; CHECK-NO16-NEXT: ret
598 ; CHECK-FP16-LABEL: test_v8f16_sat:
599 ; CHECK-FP16: // %bb.0:
600 ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h, #2
601 ; CHECK-FP16-NEXT: ret
602 %mul.i = fmul <8 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0>
603 %val = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %mul.i)
607 define <4 x i16> @test_v4f16_sat(<4 x half> %in) {
608 ; CHECK-NO16-LABEL: test_v4f16_sat:
609 ; CHECK-NO16: // %bb.0:
610 ; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000
611 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
612 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s
613 ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s
614 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
615 ; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s
616 ; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s
617 ; CHECK-NO16-NEXT: ret
619 ; CHECK-FP16-LABEL: test_v4f16_sat:
620 ; CHECK-FP16: // %bb.0:
621 ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h, #2
622 ; CHECK-FP16-NEXT: ret
623 %mul.i = fmul <4 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0>
624 %val = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %mul.i)
628 define <4 x i32> @test_v4f16_i32_sat(<4 x half> %in) {
629 ; CHECK-NO16-LABEL: test_v4f16_i32_sat:
630 ; CHECK-NO16: // %bb.0:
631 ; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000
632 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
633 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s
634 ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s
635 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
636 ; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s
637 ; CHECK-NO16-NEXT: ret
639 ; CHECK-FP16-LABEL: test_v4f16_i32_sat:
640 ; CHECK-FP16: // %bb.0:
641 ; CHECK-FP16-NEXT: movi v1.4h, #68, lsl #8
642 ; CHECK-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h
643 ; CHECK-FP16-NEXT: fcvtl v0.4s, v0.4h
644 ; CHECK-FP16-NEXT: fcvtzs v0.4s, v0.4s
645 ; CHECK-FP16-NEXT: ret
646 %mul.i = fmul <4 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0>
647 %val = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> %mul.i)
651 define <4 x i32> @test_extrasat(<4 x float> %f) {
652 ; CHECK-LABEL: test_extrasat:
654 ; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff
655 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s, #3
656 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
658 %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
659 %vcvt.i = call <4 x i24> @llvm.fptoui.sat.v4i24.v4f32(<4 x float> %mul.i)
660 %t = zext <4 x i24> %vcvt.i to <4 x i32>