1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO16
3 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+fullfp16 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
5 define <2 x i32> @test1(<2 x float> %f) {
8 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
10 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
11 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
15 define <4 x i32> @test2(<4 x float> %f) {
18 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #3
20 %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
21 %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32>
25 define <2 x i64> @test3(<2 x double> %d) {
28 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #5
30 %mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00>
31 %vcvt.i = fptosi <2 x double> %mul.i to <2 x i64>
35 ; Truncate double to i32
36 define <2 x i32> @test4(<2 x double> %d) {
39 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #4
40 ; CHECK-NEXT: xtn v0.2s, v0.2d
42 %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
43 %vcvt.i = fptosi <2 x double> %mul.i to <2 x i32>
47 ; Truncate float to i16
48 define <2 x i16> @test5(<2 x float> %f) {
51 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
53 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
54 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i16>
58 ; Don't convert float to i64
59 define <2 x i64> @test6(<2 x float> %f) {
62 ; CHECK-NEXT: fmov v1.2s, #16.00000000
63 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
64 ; CHECK-NEXT: fcvtl v0.2d, v0.2s
65 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d
67 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
68 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i64>
72 define <2 x i32> @test7(<2 x float> %f) {
75 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #4
77 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
78 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
82 ; Test which should not fold due to non-power of 2.
83 define <2 x i32> @test8(<2 x float> %f) {
86 ; CHECK-NEXT: fmov v1.2s, #17.00000000
87 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
88 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s
90 %mul.i = fmul <2 x float> %f, <float 17.000000e+00, float 17.000000e+00>
91 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
95 ; Test which should not fold due to non-matching power of 2.
96 define <2 x i32> @test9(<2 x float> %f) {
99 ; CHECK-NEXT: adrp x8, .LCPI8_0
100 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
101 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
102 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s
104 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00>
105 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
106 ret <2 x i32> %vcvt.i
109 ; Combine all undefs.
110 define <2 x i32> @test10(<2 x float> %f) {
111 ; CHECK-LABEL: test10:
113 ; CHECK-NEXT: mvni v0.2s, #63, msl #16
114 ; CHECK-NEXT: fneg v0.2s, v0.2s
115 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s
117 %mul.i = fmul <2 x float> %f, <float undef, float undef>
118 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
119 ret <2 x i32> %vcvt.i
122 ; Combine if mix of undef and pow2.
123 define <2 x i32> @test11(<2 x float> %f) {
124 ; CHECK-LABEL: test11:
126 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #3
128 %mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00>
129 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
130 ret <2 x i32> %vcvt.i
133 ; Don't combine when multiplied by 0.0.
134 define <2 x i32> @test12(<2 x float> %f) {
135 ; CHECK-LABEL: test12:
137 ; CHECK-NEXT: movi d1, #0000000000000000
138 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
139 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s
141 %mul.i = fmul <2 x float> %f, <float 0.000000e+00, float 0.000000e+00>
142 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
143 ret <2 x i32> %vcvt.i
146 ; Test which should not fold due to power of 2 out of range (i.e., 2^33).
147 define <2 x i32> @test13(<2 x float> %f) {
148 ; CHECK-LABEL: test13:
150 ; CHECK-NEXT: movi v1.2s, #80, lsl #24
151 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
152 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s
154 %mul.i = fmul <2 x float> %f, <float 0x4200000000000000, float 0x4200000000000000>
155 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
156 ret <2 x i32> %vcvt.i
159 ; Test case where const is max power of 2 (i.e., 2^32).
160 define <2 x i32> @test14(<2 x float> %f) {
161 ; CHECK-LABEL: test14:
163 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #32
165 %mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000>
166 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
167 ret <2 x i32> %vcvt.i
170 define <3 x i32> @test_illegal_fp_to_int(<3 x float> %in) {
171 ; CHECK-LABEL: test_illegal_fp_to_int:
173 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #2
175 %scale = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0>
176 %val = fptosi <3 x float> %scale to <3 x i32>
180 define <8 x i16> @test_v8f16(<8 x half> %in) {
181 ; CHECK-NO16-LABEL: test_v8f16:
182 ; CHECK-NO16: // %bb.0:
183 ; CHECK-NO16-NEXT: movi v1.8h, #68, lsl #8
184 ; CHECK-NO16-NEXT: fcvtl v2.4s, v0.4h
185 ; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h
186 ; CHECK-NO16-NEXT: fcvtl v3.4s, v1.4h
187 ; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h
188 ; CHECK-NO16-NEXT: fmul v2.4s, v2.4s, v3.4s
189 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s
190 ; CHECK-NO16-NEXT: fcvtn v1.4h, v2.4s
191 ; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s
192 ; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h
193 ; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h
194 ; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s
195 ; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s
196 ; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h
197 ; CHECK-NO16-NEXT: ret
199 ; CHECK-FP16-LABEL: test_v8f16:
200 ; CHECK-FP16: // %bb.0:
201 ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h, #2
202 ; CHECK-FP16-NEXT: ret
203 %scale = fmul <8 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0>
204 %val = fptosi <8 x half> %scale to <8 x i16>
208 define <4 x i16> @test_v4f16(<4 x half> %in) {
209 ; CHECK-NO16-LABEL: test_v4f16:
210 ; CHECK-NO16: // %bb.0:
211 ; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000
212 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
213 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s
214 ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s
215 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
216 ; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s
217 ; CHECK-NO16-NEXT: xtn v0.4h, v0.4s
218 ; CHECK-NO16-NEXT: ret
220 ; CHECK-FP16-LABEL: test_v4f16:
221 ; CHECK-FP16: // %bb.0:
222 ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h, #2
223 ; CHECK-FP16-NEXT: ret
224 %scale = fmul <4 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0>
225 %val = fptoui <4 x half> %scale to <4 x i16>
229 define <4 x i32> @test_v4f16_i32(<4 x half> %in) {
230 ; CHECK-NO16-LABEL: test_v4f16_i32:
231 ; CHECK-NO16: // %bb.0:
232 ; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000
233 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
234 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s
235 ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s
236 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
237 ; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s
238 ; CHECK-NO16-NEXT: ret
240 ; CHECK-FP16-LABEL: test_v4f16_i32:
241 ; CHECK-FP16: // %bb.0:
242 ; CHECK-FP16-NEXT: movi v1.4h, #68, lsl #8
243 ; CHECK-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h
244 ; CHECK-FP16-NEXT: fcvtl v0.4s, v0.4h
245 ; CHECK-FP16-NEXT: fcvtzs v0.4s, v0.4s
246 ; CHECK-FP16-NEXT: ret
247 %scale = fmul <4 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0>
248 %val = fptosi <4 x half> %scale to <4 x i32>
253 declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float>)
254 declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float>)
255 declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double>)
256 declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double>)
257 declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float>)
258 declare <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float>)
259 declare <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float>)
260 declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float>)
261 declare <3 x i32> @llvm.fptosi.sat.v3i32.v3f32(<3 x float>)
262 declare <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half>)
263 declare <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half>)
264 declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half>)
265 declare <4 x i24> @llvm.fptoui.sat.v4i24.v4f32(<4 x float>)
267 define <2 x i32> @test1_sat(<2 x float> %f) {
268 ; CHECK-LABEL: test1_sat:
270 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
272 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
273 %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
274 ret <2 x i32> %vcvt.i
277 define <4 x i32> @test2_sat(<4 x float> %f) {
278 ; CHECK-LABEL: test2_sat:
280 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #3
282 %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
283 %vcvt.i = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %mul.i)
284 ret <4 x i32> %vcvt.i
287 define <2 x i64> @test3_sat(<2 x double> %d) {
288 ; CHECK-LABEL: test3_sat:
290 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #5
292 %mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00>
293 %vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %mul.i)
294 ret <2 x i64> %vcvt.i
297 ; Truncate double to i32
298 define <2 x i32> @test4_sat(<2 x double> %d) {
299 ; CHECK-LABEL: test4_sat:
301 ; CHECK-NEXT: fmov v1.2d, #16.00000000
302 ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
303 ; CHECK-NEXT: mov d1, v0.d[1]
304 ; CHECK-NEXT: fcvtzs w8, d0
305 ; CHECK-NEXT: fcvtzs w9, d1
306 ; CHECK-NEXT: fmov s0, w8
307 ; CHECK-NEXT: mov v0.s[1], w9
308 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
310 %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
311 %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %mul.i)
312 ret <2 x i32> %vcvt.i
315 ; Truncate float to i16
316 define <2 x i16> @test5_sat(<2 x float> %f) {
317 ; CHECK-LABEL: test5_sat:
319 ; CHECK-NEXT: movi v1.2s, #127, msl #8
320 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
321 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
322 ; CHECK-NEXT: mvni v1.2s, #127, msl #8
323 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
325 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
326 %vcvt.i = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> %mul.i)
327 ret <2 x i16> %vcvt.i
330 ; Truncate float to i16
331 define <4 x i16> @test5l_sat(<4 x float> %f) {
332 ; CHECK-LABEL: test5l_sat:
334 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #4
335 ; CHECK-NEXT: sqxtn v0.4h, v0.4s
337 %mul.i = fmul <4 x float> %f, <float 16.000000e+00, float 16.000000e+00, float 16.000000e+00, float 16.000000e+00>
338 %vcvt.i = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f32(<4 x float> %mul.i)
339 ret <4 x i16> %vcvt.i
342 ; Don't convert float to i64
343 define <2 x i64> @test6_sat(<2 x float> %f) {
344 ; CHECK-LABEL: test6_sat:
346 ; CHECK-NEXT: fmov v1.2s, #16.00000000
347 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
348 ; CHECK-NEXT: fcvtl v0.2d, v0.2s
349 ; CHECK-NEXT: fcvtzs v0.2d, v0.2d
351 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
352 %vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %mul.i)
353 ret <2 x i64> %vcvt.i
356 define <2 x i32> @test7_sat(<2 x float> %f) {
357 ; CHECK-LABEL: test7_sat:
359 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #4
361 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
362 %vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
363 ret <2 x i32> %vcvt.i
366 ; Test which should not fold due to non-power of 2.
367 define <2 x i32> @test8_sat(<2 x float> %f) {
368 ; CHECK-LABEL: test8_sat:
370 ; CHECK-NEXT: fmov v1.2s, #17.00000000
371 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
372 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s
374 %mul.i = fmul <2 x float> %f, <float 17.000000e+00, float 17.000000e+00>
375 %vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
376 ret <2 x i32> %vcvt.i
379 ; Test which should not fold due to non-matching power of 2.
380 define <2 x i32> @test9_sat(<2 x float> %f) {
381 ; CHECK-LABEL: test9_sat:
383 ; CHECK-NEXT: adrp x8, .LCPI27_0
384 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI27_0]
385 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
386 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s
388 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00>
389 %vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
390 ret <2 x i32> %vcvt.i
393 ; Combine all undefs.
394 define <2 x i32> @test10_sat(<2 x float> %f) {
395 ; CHECK-LABEL: test10_sat:
397 ; CHECK-NEXT: mvni v0.2s, #63, msl #16
398 ; CHECK-NEXT: fneg v0.2s, v0.2s
399 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s
401 %mul.i = fmul <2 x float> %f, <float undef, float undef>
402 %vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
403 ret <2 x i32> %vcvt.i
406 ; Combine if mix of undef and pow2.
407 define <2 x i32> @test11_sat(<2 x float> %f) {
408 ; CHECK-LABEL: test11_sat:
410 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #3
412 %mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00>
413 %vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
414 ret <2 x i32> %vcvt.i
417 ; Don't combine when multiplied by 0.0.
418 define <2 x i32> @test12_sat(<2 x float> %f) {
419 ; CHECK-LABEL: test12_sat:
421 ; CHECK-NEXT: movi d1, #0000000000000000
422 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
423 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s
425 %mul.i = fmul <2 x float> %f, <float 0.000000e+00, float 0.000000e+00>
426 %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
427 ret <2 x i32> %vcvt.i
430 ; Test which should not fold due to power of 2 out of range (i.e., 2^33).
431 define <2 x i32> @test13_sat(<2 x float> %f) {
432 ; CHECK-LABEL: test13_sat:
434 ; CHECK-NEXT: movi v1.2s, #80, lsl #24
435 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
436 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s
438 %mul.i = fmul <2 x float> %f, <float 0x4200000000000000, float 0x4200000000000000>
439 %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
440 ret <2 x i32> %vcvt.i
443 ; Test case where const is max power of 2 (i.e., 2^32).
444 define <2 x i32> @test14_sat(<2 x float> %f) {
445 ; CHECK-LABEL: test14_sat:
447 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #32
449 %mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000>
450 %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
451 ret <2 x i32> %vcvt.i
454 define <3 x i32> @test_illegal_fp_to_int_sat_sat(<3 x float> %in) {
455 ; CHECK-LABEL: test_illegal_fp_to_int_sat_sat:
457 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #2
459 %mul.i = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0>
460 %vcvt.i = call <3 x i32> @llvm.fptosi.sat.v3i32.v3f32(<3 x float> %mul.i)
461 ret <3 x i32> %vcvt.i
464 define <8 x i16> @test_v8f16_sat(<8 x half> %in) {
465 ; CHECK-NO16-LABEL: test_v8f16_sat:
466 ; CHECK-NO16: // %bb.0:
467 ; CHECK-NO16-NEXT: movi v1.8h, #68, lsl #8
468 ; CHECK-NO16-NEXT: fcvtl v2.4s, v0.4h
469 ; CHECK-NO16-NEXT: mov w8, #32767 // =0x7fff
470 ; CHECK-NO16-NEXT: fcvtl2 v0.4s, v0.8h
471 ; CHECK-NO16-NEXT: mov w11, #-32768 // =0xffff8000
472 ; CHECK-NO16-NEXT: fcvtl v3.4s, v1.4h
473 ; CHECK-NO16-NEXT: fcvtl2 v1.4s, v1.8h
474 ; CHECK-NO16-NEXT: fmul v2.4s, v2.4s, v3.4s
475 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s
476 ; CHECK-NO16-NEXT: fcvtn v1.4h, v2.4s
477 ; CHECK-NO16-NEXT: fcvtn2 v1.8h, v0.4s
478 ; CHECK-NO16-NEXT: fcvtl2 v0.4s, v1.8h
479 ; CHECK-NO16-NEXT: fcvtl v1.4s, v1.4h
480 ; CHECK-NO16-NEXT: mov s2, v0.s[1]
481 ; CHECK-NO16-NEXT: fcvtzs w10, s0
482 ; CHECK-NO16-NEXT: fcvtzs w15, s1
483 ; CHECK-NO16-NEXT: fcvtzs w9, s2
484 ; CHECK-NO16-NEXT: mov s2, v0.s[2]
485 ; CHECK-NO16-NEXT: mov s0, v0.s[3]
486 ; CHECK-NO16-NEXT: cmp w9, w8
487 ; CHECK-NO16-NEXT: fcvtzs w12, s2
488 ; CHECK-NO16-NEXT: mov s2, v1.s[1]
489 ; CHECK-NO16-NEXT: csel w9, w9, w8, lt
490 ; CHECK-NO16-NEXT: fcvtzs w13, s0
491 ; CHECK-NO16-NEXT: mov s0, v1.s[2]
492 ; CHECK-NO16-NEXT: cmn w9, #8, lsl #12 // =32768
493 ; CHECK-NO16-NEXT: csel w9, w9, w11, gt
494 ; CHECK-NO16-NEXT: cmp w10, w8
495 ; CHECK-NO16-NEXT: csel w10, w10, w8, lt
496 ; CHECK-NO16-NEXT: fcvtzs w14, s2
497 ; CHECK-NO16-NEXT: cmn w10, #8, lsl #12 // =32768
498 ; CHECK-NO16-NEXT: fcvtzs w16, s0
499 ; CHECK-NO16-NEXT: mov s0, v1.s[3]
500 ; CHECK-NO16-NEXT: csel w10, w10, w11, gt
501 ; CHECK-NO16-NEXT: cmp w12, w8
502 ; CHECK-NO16-NEXT: csel w12, w12, w8, lt
503 ; CHECK-NO16-NEXT: fmov s1, w10
504 ; CHECK-NO16-NEXT: cmn w12, #8, lsl #12 // =32768
505 ; CHECK-NO16-NEXT: csel w12, w12, w11, gt
506 ; CHECK-NO16-NEXT: cmp w13, w8
507 ; CHECK-NO16-NEXT: csel w13, w13, w8, lt
508 ; CHECK-NO16-NEXT: mov v1.s[1], w9
509 ; CHECK-NO16-NEXT: fcvtzs w9, s0
510 ; CHECK-NO16-NEXT: cmn w13, #8, lsl #12 // =32768
511 ; CHECK-NO16-NEXT: csel w13, w13, w11, gt
512 ; CHECK-NO16-NEXT: cmp w14, w8
513 ; CHECK-NO16-NEXT: csel w14, w14, w8, lt
514 ; CHECK-NO16-NEXT: cmn w14, #8, lsl #12 // =32768
515 ; CHECK-NO16-NEXT: mov v1.s[2], w12
516 ; CHECK-NO16-NEXT: csel w14, w14, w11, gt
517 ; CHECK-NO16-NEXT: cmp w15, w8
518 ; CHECK-NO16-NEXT: csel w15, w15, w8, lt
519 ; CHECK-NO16-NEXT: cmn w15, #8, lsl #12 // =32768
520 ; CHECK-NO16-NEXT: csel w10, w15, w11, gt
521 ; CHECK-NO16-NEXT: cmp w16, w8
522 ; CHECK-NO16-NEXT: mov v1.s[3], w13
523 ; CHECK-NO16-NEXT: fmov s2, w10
524 ; CHECK-NO16-NEXT: csel w10, w16, w8, lt
525 ; CHECK-NO16-NEXT: cmn w10, #8, lsl #12 // =32768
526 ; CHECK-NO16-NEXT: csel w10, w10, w11, gt
527 ; CHECK-NO16-NEXT: cmp w9, w8
528 ; CHECK-NO16-NEXT: mov v2.s[1], w14
529 ; CHECK-NO16-NEXT: csel w8, w9, w8, lt
530 ; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768
531 ; CHECK-NO16-NEXT: csel w8, w8, w11, gt
532 ; CHECK-NO16-NEXT: mov v2.s[2], w10
533 ; CHECK-NO16-NEXT: mov v2.s[3], w8
534 ; CHECK-NO16-NEXT: uzp1 v0.8h, v2.8h, v1.8h
535 ; CHECK-NO16-NEXT: ret
537 ; CHECK-FP16-LABEL: test_v8f16_sat:
538 ; CHECK-FP16: // %bb.0:
539 ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h, #2
540 ; CHECK-FP16-NEXT: ret
541 %mul.i = fmul <8 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0>
542 %val = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %mul.i)
546 define <4 x i16> @test_v4f16_sat(<4 x half> %in) {
547 ; CHECK-NO16-LABEL: test_v4f16_sat:
548 ; CHECK-NO16: // %bb.0:
549 ; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000
550 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
551 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s
552 ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s
553 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
554 ; CHECK-NO16-NEXT: fcvtzu v0.4s, v0.4s
555 ; CHECK-NO16-NEXT: uqxtn v0.4h, v0.4s
556 ; CHECK-NO16-NEXT: ret
558 ; CHECK-FP16-LABEL: test_v4f16_sat:
559 ; CHECK-FP16: // %bb.0:
560 ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h, #2
561 ; CHECK-FP16-NEXT: ret
562 %mul.i = fmul <4 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0>
563 %val = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %mul.i)
567 define <4 x i32> @test_v4f16_i32_sat(<4 x half> %in) {
568 ; CHECK-NO16-LABEL: test_v4f16_i32_sat:
569 ; CHECK-NO16: // %bb.0:
570 ; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000
571 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
572 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s
573 ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s
574 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h
575 ; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s
576 ; CHECK-NO16-NEXT: ret
578 ; CHECK-FP16-LABEL: test_v4f16_i32_sat:
579 ; CHECK-FP16: // %bb.0:
580 ; CHECK-FP16-NEXT: movi v1.4h, #68, lsl #8
581 ; CHECK-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h
582 ; CHECK-FP16-NEXT: fcvtl v0.4s, v0.4h
583 ; CHECK-FP16-NEXT: fcvtzs v0.4s, v0.4s
584 ; CHECK-FP16-NEXT: ret
585 %mul.i = fmul <4 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0>
586 %val = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> %mul.i)
590 define <4 x i32> @test_extrasat(<4 x float> %f) {
591 ; CHECK-LABEL: test_extrasat:
593 ; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff
594 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s, #3
595 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
597 %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
598 %vcvt.i = call <4 x i24> @llvm.fptoui.sat.v4i24.v4f32(<4 x float> %mul.i)
599 %t = zext <4 x i24> %vcvt.i to <4 x i32>