1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
3 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
5 define arm_aapcs_vfpcc <4 x float> @fceil_float32_t(<4 x float> %src) {
6 ; CHECK-MVE-LABEL: fceil_float32_t:
7 ; CHECK-MVE: @ %bb.0: @ %entry
8 ; CHECK-MVE-NEXT: vrintp.f32 s7, s3
9 ; CHECK-MVE-NEXT: vrintp.f32 s6, s2
10 ; CHECK-MVE-NEXT: vrintp.f32 s5, s1
11 ; CHECK-MVE-NEXT: vrintp.f32 s4, s0
12 ; CHECK-MVE-NEXT: vmov q0, q1
13 ; CHECK-MVE-NEXT: bx lr
15 ; CHECK-MVEFP-LABEL: fceil_float32_t:
16 ; CHECK-MVEFP: @ %bb.0: @ %entry
17 ; CHECK-MVEFP-NEXT: vrintp.f32 q0, q0
18 ; CHECK-MVEFP-NEXT: bx lr
20 %0 = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> %src)
24 define arm_aapcs_vfpcc <8 x half> @fceil_float16_t(<8 x half> %src) {
25 ; CHECK-MVE-LABEL: fceil_float16_t:
26 ; CHECK-MVE: @ %bb.0: @ %entry
27 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[0]
28 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1]
29 ; CHECK-MVE-NEXT: vmov s4, r0
30 ; CHECK-MVE-NEXT: vrintp.f16 s4, s4
31 ; CHECK-MVE-NEXT: vmov r0, s4
32 ; CHECK-MVE-NEXT: vmov s4, r1
33 ; CHECK-MVE-NEXT: vrintp.f16 s4, s4
34 ; CHECK-MVE-NEXT: vmov r1, s4
35 ; CHECK-MVE-NEXT: vmov.16 q1[0], r0
36 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2]
37 ; CHECK-MVE-NEXT: vmov.16 q1[1], r1
38 ; CHECK-MVE-NEXT: vmov s8, r0
39 ; CHECK-MVE-NEXT: vrintp.f16 s8, s8
40 ; CHECK-MVE-NEXT: vmov r0, s8
41 ; CHECK-MVE-NEXT: vmov.16 q1[2], r0
42 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[3]
43 ; CHECK-MVE-NEXT: vmov s8, r0
44 ; CHECK-MVE-NEXT: vrintp.f16 s8, s8
45 ; CHECK-MVE-NEXT: vmov r0, s8
46 ; CHECK-MVE-NEXT: vmov.16 q1[3], r0
47 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[4]
48 ; CHECK-MVE-NEXT: vmov s8, r0
49 ; CHECK-MVE-NEXT: vrintp.f16 s8, s8
50 ; CHECK-MVE-NEXT: vmov r0, s8
51 ; CHECK-MVE-NEXT: vmov.16 q1[4], r0
52 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[5]
53 ; CHECK-MVE-NEXT: vmov s8, r0
54 ; CHECK-MVE-NEXT: vrintp.f16 s8, s8
55 ; CHECK-MVE-NEXT: vmov r0, s8
56 ; CHECK-MVE-NEXT: vmov.16 q1[5], r0
57 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[6]
58 ; CHECK-MVE-NEXT: vmov s8, r0
59 ; CHECK-MVE-NEXT: vrintp.f16 s8, s8
60 ; CHECK-MVE-NEXT: vmov r0, s8
61 ; CHECK-MVE-NEXT: vmov.16 q1[6], r0
62 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[7]
63 ; CHECK-MVE-NEXT: vmov s0, r0
64 ; CHECK-MVE-NEXT: vrintp.f16 s0, s0
65 ; CHECK-MVE-NEXT: vmov r0, s0
66 ; CHECK-MVE-NEXT: vmov.16 q1[7], r0
67 ; CHECK-MVE-NEXT: vmov q0, q1
68 ; CHECK-MVE-NEXT: bx lr
70 ; CHECK-MVEFP-LABEL: fceil_float16_t:
71 ; CHECK-MVEFP: @ %bb.0: @ %entry
72 ; CHECK-MVEFP-NEXT: vrintp.f16 q0, q0
73 ; CHECK-MVEFP-NEXT: bx lr
75 %0 = call fast <8 x half> @llvm.ceil.v8f16(<8 x half> %src)
79 define arm_aapcs_vfpcc <2 x double> @fceil_float64_t(<2 x double> %src) {
80 ; CHECK-LABEL: fceil_float64_t:
81 ; CHECK: @ %bb.0: @ %entry
82 ; CHECK-NEXT: .save {r7, lr}
83 ; CHECK-NEXT: push {r7, lr}
84 ; CHECK-NEXT: .vsave {d8, d9}
85 ; CHECK-NEXT: vpush {d8, d9}
86 ; CHECK-NEXT: vmov q4, q0
87 ; CHECK-NEXT: vmov r0, r1, d9
89 ; CHECK-NEXT: vmov r2, r3, d8
90 ; CHECK-NEXT: vmov d9, r0, r1
91 ; CHECK-NEXT: mov r0, r2
92 ; CHECK-NEXT: mov r1, r3
94 ; CHECK-NEXT: vmov d8, r0, r1
95 ; CHECK-NEXT: vmov q0, q4
96 ; CHECK-NEXT: vpop {d8, d9}
97 ; CHECK-NEXT: pop {r7, pc}
99 %0 = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> %src)
103 define arm_aapcs_vfpcc <4 x float> @ftrunc_float32_t(<4 x float> %src) {
104 ; CHECK-MVE-LABEL: ftrunc_float32_t:
105 ; CHECK-MVE: @ %bb.0: @ %entry
106 ; CHECK-MVE-NEXT: vrintz.f32 s7, s3
107 ; CHECK-MVE-NEXT: vrintz.f32 s6, s2
108 ; CHECK-MVE-NEXT: vrintz.f32 s5, s1
109 ; CHECK-MVE-NEXT: vrintz.f32 s4, s0
110 ; CHECK-MVE-NEXT: vmov q0, q1
111 ; CHECK-MVE-NEXT: bx lr
113 ; CHECK-MVEFP-LABEL: ftrunc_float32_t:
114 ; CHECK-MVEFP: @ %bb.0: @ %entry
115 ; CHECK-MVEFP-NEXT: vrintz.f32 q0, q0
116 ; CHECK-MVEFP-NEXT: bx lr
118 %0 = call fast <4 x float> @llvm.trunc.v4f32(<4 x float> %src)
122 define arm_aapcs_vfpcc <8 x half> @ftrunc_float16_t(<8 x half> %src) {
123 ; CHECK-MVE-LABEL: ftrunc_float16_t:
124 ; CHECK-MVE: @ %bb.0: @ %entry
125 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[0]
126 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1]
127 ; CHECK-MVE-NEXT: vmov s4, r0
128 ; CHECK-MVE-NEXT: vrintz.f16 s4, s4
129 ; CHECK-MVE-NEXT: vmov r0, s4
130 ; CHECK-MVE-NEXT: vmov s4, r1
131 ; CHECK-MVE-NEXT: vrintz.f16 s4, s4
132 ; CHECK-MVE-NEXT: vmov r1, s4
133 ; CHECK-MVE-NEXT: vmov.16 q1[0], r0
134 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2]
135 ; CHECK-MVE-NEXT: vmov.16 q1[1], r1
136 ; CHECK-MVE-NEXT: vmov s8, r0
137 ; CHECK-MVE-NEXT: vrintz.f16 s8, s8
138 ; CHECK-MVE-NEXT: vmov r0, s8
139 ; CHECK-MVE-NEXT: vmov.16 q1[2], r0
140 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[3]
141 ; CHECK-MVE-NEXT: vmov s8, r0
142 ; CHECK-MVE-NEXT: vrintz.f16 s8, s8
143 ; CHECK-MVE-NEXT: vmov r0, s8
144 ; CHECK-MVE-NEXT: vmov.16 q1[3], r0
145 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[4]
146 ; CHECK-MVE-NEXT: vmov s8, r0
147 ; CHECK-MVE-NEXT: vrintz.f16 s8, s8
148 ; CHECK-MVE-NEXT: vmov r0, s8
149 ; CHECK-MVE-NEXT: vmov.16 q1[4], r0
150 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[5]
151 ; CHECK-MVE-NEXT: vmov s8, r0
152 ; CHECK-MVE-NEXT: vrintz.f16 s8, s8
153 ; CHECK-MVE-NEXT: vmov r0, s8
154 ; CHECK-MVE-NEXT: vmov.16 q1[5], r0
155 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[6]
156 ; CHECK-MVE-NEXT: vmov s8, r0
157 ; CHECK-MVE-NEXT: vrintz.f16 s8, s8
158 ; CHECK-MVE-NEXT: vmov r0, s8
159 ; CHECK-MVE-NEXT: vmov.16 q1[6], r0
160 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[7]
161 ; CHECK-MVE-NEXT: vmov s0, r0
162 ; CHECK-MVE-NEXT: vrintz.f16 s0, s0
163 ; CHECK-MVE-NEXT: vmov r0, s0
164 ; CHECK-MVE-NEXT: vmov.16 q1[7], r0
165 ; CHECK-MVE-NEXT: vmov q0, q1
166 ; CHECK-MVE-NEXT: bx lr
168 ; CHECK-MVEFP-LABEL: ftrunc_float16_t:
169 ; CHECK-MVEFP: @ %bb.0: @ %entry
170 ; CHECK-MVEFP-NEXT: vrintz.f16 q0, q0
171 ; CHECK-MVEFP-NEXT: bx lr
173 %0 = call fast <8 x half> @llvm.trunc.v8f16(<8 x half> %src)
177 define arm_aapcs_vfpcc <2 x double> @ftrunc_float64_t(<2 x double> %src) {
178 ; CHECK-LABEL: ftrunc_float64_t:
179 ; CHECK: @ %bb.0: @ %entry
180 ; CHECK-NEXT: .save {r7, lr}
181 ; CHECK-NEXT: push {r7, lr}
182 ; CHECK-NEXT: .vsave {d8, d9}
183 ; CHECK-NEXT: vpush {d8, d9}
184 ; CHECK-NEXT: vmov q4, q0
185 ; CHECK-NEXT: vmov r0, r1, d9
186 ; CHECK-NEXT: bl trunc
187 ; CHECK-NEXT: vmov r2, r3, d8
188 ; CHECK-NEXT: vmov d9, r0, r1
189 ; CHECK-NEXT: mov r0, r2
190 ; CHECK-NEXT: mov r1, r3
191 ; CHECK-NEXT: bl trunc
192 ; CHECK-NEXT: vmov d8, r0, r1
193 ; CHECK-NEXT: vmov q0, q4
194 ; CHECK-NEXT: vpop {d8, d9}
195 ; CHECK-NEXT: pop {r7, pc}
197 %0 = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> %src)
201 define arm_aapcs_vfpcc <4 x float> @frint_float32_t(<4 x float> %src) {
202 ; CHECK-MVE-LABEL: frint_float32_t:
203 ; CHECK-MVE: @ %bb.0: @ %entry
204 ; CHECK-MVE-NEXT: vrintx.f32 s7, s3
205 ; CHECK-MVE-NEXT: vrintx.f32 s6, s2
206 ; CHECK-MVE-NEXT: vrintx.f32 s5, s1
207 ; CHECK-MVE-NEXT: vrintx.f32 s4, s0
208 ; CHECK-MVE-NEXT: vmov q0, q1
209 ; CHECK-MVE-NEXT: bx lr
211 ; CHECK-MVEFP-LABEL: frint_float32_t:
212 ; CHECK-MVEFP: @ %bb.0: @ %entry
213 ; CHECK-MVEFP-NEXT: vrintx.f32 q0, q0
214 ; CHECK-MVEFP-NEXT: bx lr
216 %0 = call fast <4 x float> @llvm.rint.v4f32(<4 x float> %src)
220 define arm_aapcs_vfpcc <8 x half> @frint_float16_t(<8 x half> %src) {
221 ; CHECK-MVE-LABEL: frint_float16_t:
222 ; CHECK-MVE: @ %bb.0: @ %entry
223 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[0]
224 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1]
225 ; CHECK-MVE-NEXT: vmov s4, r0
226 ; CHECK-MVE-NEXT: vrintx.f16 s4, s4
227 ; CHECK-MVE-NEXT: vmov r0, s4
228 ; CHECK-MVE-NEXT: vmov s4, r1
229 ; CHECK-MVE-NEXT: vrintx.f16 s4, s4
230 ; CHECK-MVE-NEXT: vmov r1, s4
231 ; CHECK-MVE-NEXT: vmov.16 q1[0], r0
232 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2]
233 ; CHECK-MVE-NEXT: vmov.16 q1[1], r1
234 ; CHECK-MVE-NEXT: vmov s8, r0
235 ; CHECK-MVE-NEXT: vrintx.f16 s8, s8
236 ; CHECK-MVE-NEXT: vmov r0, s8
237 ; CHECK-MVE-NEXT: vmov.16 q1[2], r0
238 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[3]
239 ; CHECK-MVE-NEXT: vmov s8, r0
240 ; CHECK-MVE-NEXT: vrintx.f16 s8, s8
241 ; CHECK-MVE-NEXT: vmov r0, s8
242 ; CHECK-MVE-NEXT: vmov.16 q1[3], r0
243 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[4]
244 ; CHECK-MVE-NEXT: vmov s8, r0
245 ; CHECK-MVE-NEXT: vrintx.f16 s8, s8
246 ; CHECK-MVE-NEXT: vmov r0, s8
247 ; CHECK-MVE-NEXT: vmov.16 q1[4], r0
248 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[5]
249 ; CHECK-MVE-NEXT: vmov s8, r0
250 ; CHECK-MVE-NEXT: vrintx.f16 s8, s8
251 ; CHECK-MVE-NEXT: vmov r0, s8
252 ; CHECK-MVE-NEXT: vmov.16 q1[5], r0
253 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[6]
254 ; CHECK-MVE-NEXT: vmov s8, r0
255 ; CHECK-MVE-NEXT: vrintx.f16 s8, s8
256 ; CHECK-MVE-NEXT: vmov r0, s8
257 ; CHECK-MVE-NEXT: vmov.16 q1[6], r0
258 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[7]
259 ; CHECK-MVE-NEXT: vmov s0, r0
260 ; CHECK-MVE-NEXT: vrintx.f16 s0, s0
261 ; CHECK-MVE-NEXT: vmov r0, s0
262 ; CHECK-MVE-NEXT: vmov.16 q1[7], r0
263 ; CHECK-MVE-NEXT: vmov q0, q1
264 ; CHECK-MVE-NEXT: bx lr
266 ; CHECK-MVEFP-LABEL: frint_float16_t:
267 ; CHECK-MVEFP: @ %bb.0: @ %entry
268 ; CHECK-MVEFP-NEXT: vrintx.f16 q0, q0
269 ; CHECK-MVEFP-NEXT: bx lr
271 %0 = call fast <8 x half> @llvm.rint.v8f16(<8 x half> %src)
275 define arm_aapcs_vfpcc <2 x double> @frint_float64_t(<2 x double> %src) {
276 ; CHECK-LABEL: frint_float64_t:
277 ; CHECK: @ %bb.0: @ %entry
278 ; CHECK-NEXT: .save {r7, lr}
279 ; CHECK-NEXT: push {r7, lr}
280 ; CHECK-NEXT: .vsave {d8, d9}
281 ; CHECK-NEXT: vpush {d8, d9}
282 ; CHECK-NEXT: vmov q4, q0
283 ; CHECK-NEXT: vmov r0, r1, d9
284 ; CHECK-NEXT: bl rint
285 ; CHECK-NEXT: vmov r2, r3, d8
286 ; CHECK-NEXT: vmov d9, r0, r1
287 ; CHECK-NEXT: mov r0, r2
288 ; CHECK-NEXT: mov r1, r3
289 ; CHECK-NEXT: bl rint
290 ; CHECK-NEXT: vmov d8, r0, r1
291 ; CHECK-NEXT: vmov q0, q4
292 ; CHECK-NEXT: vpop {d8, d9}
293 ; CHECK-NEXT: pop {r7, pc}
295 %0 = call fast <2 x double> @llvm.rint.v2f64(<2 x double> %src)
299 define arm_aapcs_vfpcc <4 x float> @fnearbyint_float32_t(<4 x float> %src) {
300 ; CHECK-LABEL: fnearbyint_float32_t:
301 ; CHECK: @ %bb.0: @ %entry
302 ; CHECK-NEXT: vrintr.f32 s7, s3
303 ; CHECK-NEXT: vrintr.f32 s6, s2
304 ; CHECK-NEXT: vrintr.f32 s5, s1
305 ; CHECK-NEXT: vrintr.f32 s4, s0
306 ; CHECK-NEXT: vmov q0, q1
309 %0 = call fast <4 x float> @llvm.nearbyint.v4f32(<4 x float> %src)
313 define arm_aapcs_vfpcc <8 x half> @fnearbyint_float16_t(<8 x half> %src) {
314 ; CHECK-LABEL: fnearbyint_float16_t:
315 ; CHECK: @ %bb.0: @ %entry
316 ; CHECK-NEXT: vmov.u16 r0, q0[0]
317 ; CHECK-NEXT: vmov.u16 r1, q0[1]
318 ; CHECK-NEXT: vmov s4, r0
319 ; CHECK-NEXT: vrintr.f16 s4, s4
320 ; CHECK-NEXT: vmov r0, s4
321 ; CHECK-NEXT: vmov s4, r1
322 ; CHECK-NEXT: vrintr.f16 s4, s4
323 ; CHECK-NEXT: vmov r1, s4
324 ; CHECK-NEXT: vmov.16 q1[0], r0
325 ; CHECK-NEXT: vmov.u16 r0, q0[2]
326 ; CHECK-NEXT: vmov.16 q1[1], r1
327 ; CHECK-NEXT: vmov s8, r0
328 ; CHECK-NEXT: vrintr.f16 s8, s8
329 ; CHECK-NEXT: vmov r0, s8
330 ; CHECK-NEXT: vmov.16 q1[2], r0
331 ; CHECK-NEXT: vmov.u16 r0, q0[3]
332 ; CHECK-NEXT: vmov s8, r0
333 ; CHECK-NEXT: vrintr.f16 s8, s8
334 ; CHECK-NEXT: vmov r0, s8
335 ; CHECK-NEXT: vmov.16 q1[3], r0
336 ; CHECK-NEXT: vmov.u16 r0, q0[4]
337 ; CHECK-NEXT: vmov s8, r0
338 ; CHECK-NEXT: vrintr.f16 s8, s8
339 ; CHECK-NEXT: vmov r0, s8
340 ; CHECK-NEXT: vmov.16 q1[4], r0
341 ; CHECK-NEXT: vmov.u16 r0, q0[5]
342 ; CHECK-NEXT: vmov s8, r0
343 ; CHECK-NEXT: vrintr.f16 s8, s8
344 ; CHECK-NEXT: vmov r0, s8
345 ; CHECK-NEXT: vmov.16 q1[5], r0
346 ; CHECK-NEXT: vmov.u16 r0, q0[6]
347 ; CHECK-NEXT: vmov s8, r0
348 ; CHECK-NEXT: vrintr.f16 s8, s8
349 ; CHECK-NEXT: vmov r0, s8
350 ; CHECK-NEXT: vmov.16 q1[6], r0
351 ; CHECK-NEXT: vmov.u16 r0, q0[7]
352 ; CHECK-NEXT: vmov s0, r0
353 ; CHECK-NEXT: vrintr.f16 s0, s0
354 ; CHECK-NEXT: vmov r0, s0
355 ; CHECK-NEXT: vmov.16 q1[7], r0
356 ; CHECK-NEXT: vmov q0, q1
359 %0 = call fast <8 x half> @llvm.nearbyint.v8f16(<8 x half> %src)
363 define arm_aapcs_vfpcc <2 x double> @fnearbyint_float64_t(<2 x double> %src) {
364 ; CHECK-LABEL: fnearbyint_float64_t:
365 ; CHECK: @ %bb.0: @ %entry
366 ; CHECK-NEXT: .save {r7, lr}
367 ; CHECK-NEXT: push {r7, lr}
368 ; CHECK-NEXT: .vsave {d8, d9}
369 ; CHECK-NEXT: vpush {d8, d9}
370 ; CHECK-NEXT: vmov q4, q0
371 ; CHECK-NEXT: vmov r0, r1, d9
372 ; CHECK-NEXT: bl nearbyint
373 ; CHECK-NEXT: vmov r2, r3, d8
374 ; CHECK-NEXT: vmov d9, r0, r1
375 ; CHECK-NEXT: mov r0, r2
376 ; CHECK-NEXT: mov r1, r3
377 ; CHECK-NEXT: bl nearbyint
378 ; CHECK-NEXT: vmov d8, r0, r1
379 ; CHECK-NEXT: vmov q0, q4
380 ; CHECK-NEXT: vpop {d8, d9}
381 ; CHECK-NEXT: pop {r7, pc}
383 %0 = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> %src)
387 define arm_aapcs_vfpcc <4 x float> @ffloor_float32_t(<4 x float> %src) {
388 ; CHECK-MVE-LABEL: ffloor_float32_t:
389 ; CHECK-MVE: @ %bb.0: @ %entry
390 ; CHECK-MVE-NEXT: vrintm.f32 s7, s3
391 ; CHECK-MVE-NEXT: vrintm.f32 s6, s2
392 ; CHECK-MVE-NEXT: vrintm.f32 s5, s1
393 ; CHECK-MVE-NEXT: vrintm.f32 s4, s0
394 ; CHECK-MVE-NEXT: vmov q0, q1
395 ; CHECK-MVE-NEXT: bx lr
397 ; CHECK-MVEFP-LABEL: ffloor_float32_t:
398 ; CHECK-MVEFP: @ %bb.0: @ %entry
399 ; CHECK-MVEFP-NEXT: vrintm.f32 q0, q0
400 ; CHECK-MVEFP-NEXT: bx lr
402 %0 = call fast <4 x float> @llvm.floor.v4f32(<4 x float> %src)
406 define arm_aapcs_vfpcc <8 x half> @ffloor_float16_t(<8 x half> %src) {
407 ; CHECK-MVE-LABEL: ffloor_float16_t:
408 ; CHECK-MVE: @ %bb.0: @ %entry
409 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[0]
410 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1]
411 ; CHECK-MVE-NEXT: vmov s4, r0
412 ; CHECK-MVE-NEXT: vrintm.f16 s4, s4
413 ; CHECK-MVE-NEXT: vmov r0, s4
414 ; CHECK-MVE-NEXT: vmov s4, r1
415 ; CHECK-MVE-NEXT: vrintm.f16 s4, s4
416 ; CHECK-MVE-NEXT: vmov r1, s4
417 ; CHECK-MVE-NEXT: vmov.16 q1[0], r0
418 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2]
419 ; CHECK-MVE-NEXT: vmov.16 q1[1], r1
420 ; CHECK-MVE-NEXT: vmov s8, r0
421 ; CHECK-MVE-NEXT: vrintm.f16 s8, s8
422 ; CHECK-MVE-NEXT: vmov r0, s8
423 ; CHECK-MVE-NEXT: vmov.16 q1[2], r0
424 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[3]
425 ; CHECK-MVE-NEXT: vmov s8, r0
426 ; CHECK-MVE-NEXT: vrintm.f16 s8, s8
427 ; CHECK-MVE-NEXT: vmov r0, s8
428 ; CHECK-MVE-NEXT: vmov.16 q1[3], r0
429 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[4]
430 ; CHECK-MVE-NEXT: vmov s8, r0
431 ; CHECK-MVE-NEXT: vrintm.f16 s8, s8
432 ; CHECK-MVE-NEXT: vmov r0, s8
433 ; CHECK-MVE-NEXT: vmov.16 q1[4], r0
434 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[5]
435 ; CHECK-MVE-NEXT: vmov s8, r0
436 ; CHECK-MVE-NEXT: vrintm.f16 s8, s8
437 ; CHECK-MVE-NEXT: vmov r0, s8
438 ; CHECK-MVE-NEXT: vmov.16 q1[5], r0
439 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[6]
440 ; CHECK-MVE-NEXT: vmov s8, r0
441 ; CHECK-MVE-NEXT: vrintm.f16 s8, s8
442 ; CHECK-MVE-NEXT: vmov r0, s8
443 ; CHECK-MVE-NEXT: vmov.16 q1[6], r0
444 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[7]
445 ; CHECK-MVE-NEXT: vmov s0, r0
446 ; CHECK-MVE-NEXT: vrintm.f16 s0, s0
447 ; CHECK-MVE-NEXT: vmov r0, s0
448 ; CHECK-MVE-NEXT: vmov.16 q1[7], r0
449 ; CHECK-MVE-NEXT: vmov q0, q1
450 ; CHECK-MVE-NEXT: bx lr
452 ; CHECK-MVEFP-LABEL: ffloor_float16_t:
453 ; CHECK-MVEFP: @ %bb.0: @ %entry
454 ; CHECK-MVEFP-NEXT: vrintm.f16 q0, q0
455 ; CHECK-MVEFP-NEXT: bx lr
457 %0 = call fast <8 x half> @llvm.floor.v8f16(<8 x half> %src)
461 define arm_aapcs_vfpcc <2 x double> @ffloor_float64_t(<2 x double> %src) {
462 ; CHECK-LABEL: ffloor_float64_t:
463 ; CHECK: @ %bb.0: @ %entry
464 ; CHECK-NEXT: .save {r7, lr}
465 ; CHECK-NEXT: push {r7, lr}
466 ; CHECK-NEXT: .vsave {d8, d9}
467 ; CHECK-NEXT: vpush {d8, d9}
468 ; CHECK-NEXT: vmov q4, q0
469 ; CHECK-NEXT: vmov r0, r1, d9
470 ; CHECK-NEXT: bl floor
471 ; CHECK-NEXT: vmov r2, r3, d8
472 ; CHECK-NEXT: vmov d9, r0, r1
473 ; CHECK-NEXT: mov r0, r2
474 ; CHECK-NEXT: mov r1, r3
475 ; CHECK-NEXT: bl floor
476 ; CHECK-NEXT: vmov d8, r0, r1
477 ; CHECK-NEXT: vmov q0, q4
478 ; CHECK-NEXT: vpop {d8, d9}
479 ; CHECK-NEXT: pop {r7, pc}
481 %0 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %src)
485 define arm_aapcs_vfpcc <4 x float> @fround_float32_t(<4 x float> %src) {
486 ; CHECK-MVE-LABEL: fround_float32_t:
487 ; CHECK-MVE: @ %bb.0: @ %entry
488 ; CHECK-MVE-NEXT: vrinta.f32 s7, s3
489 ; CHECK-MVE-NEXT: vrinta.f32 s6, s2
490 ; CHECK-MVE-NEXT: vrinta.f32 s5, s1
491 ; CHECK-MVE-NEXT: vrinta.f32 s4, s0
492 ; CHECK-MVE-NEXT: vmov q0, q1
493 ; CHECK-MVE-NEXT: bx lr
495 ; CHECK-MVEFP-LABEL: fround_float32_t:
496 ; CHECK-MVEFP: @ %bb.0: @ %entry
497 ; CHECK-MVEFP-NEXT: vrinta.f32 q0, q0
498 ; CHECK-MVEFP-NEXT: bx lr
500 %0 = call fast <4 x float> @llvm.round.v4f32(<4 x float> %src)
504 define arm_aapcs_vfpcc <8 x half> @fround_float16_t(<8 x half> %src) {
505 ; CHECK-MVE-LABEL: fround_float16_t:
506 ; CHECK-MVE: @ %bb.0: @ %entry
507 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[0]
508 ; CHECK-MVE-NEXT: vmov.u16 r1, q0[1]
509 ; CHECK-MVE-NEXT: vmov s4, r0
510 ; CHECK-MVE-NEXT: vrinta.f16 s4, s4
511 ; CHECK-MVE-NEXT: vmov r0, s4
512 ; CHECK-MVE-NEXT: vmov s4, r1
513 ; CHECK-MVE-NEXT: vrinta.f16 s4, s4
514 ; CHECK-MVE-NEXT: vmov r1, s4
515 ; CHECK-MVE-NEXT: vmov.16 q1[0], r0
516 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[2]
517 ; CHECK-MVE-NEXT: vmov.16 q1[1], r1
518 ; CHECK-MVE-NEXT: vmov s8, r0
519 ; CHECK-MVE-NEXT: vrinta.f16 s8, s8
520 ; CHECK-MVE-NEXT: vmov r0, s8
521 ; CHECK-MVE-NEXT: vmov.16 q1[2], r0
522 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[3]
523 ; CHECK-MVE-NEXT: vmov s8, r0
524 ; CHECK-MVE-NEXT: vrinta.f16 s8, s8
525 ; CHECK-MVE-NEXT: vmov r0, s8
526 ; CHECK-MVE-NEXT: vmov.16 q1[3], r0
527 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[4]
528 ; CHECK-MVE-NEXT: vmov s8, r0
529 ; CHECK-MVE-NEXT: vrinta.f16 s8, s8
530 ; CHECK-MVE-NEXT: vmov r0, s8
531 ; CHECK-MVE-NEXT: vmov.16 q1[4], r0
532 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[5]
533 ; CHECK-MVE-NEXT: vmov s8, r0
534 ; CHECK-MVE-NEXT: vrinta.f16 s8, s8
535 ; CHECK-MVE-NEXT: vmov r0, s8
536 ; CHECK-MVE-NEXT: vmov.16 q1[5], r0
537 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[6]
538 ; CHECK-MVE-NEXT: vmov s8, r0
539 ; CHECK-MVE-NEXT: vrinta.f16 s8, s8
540 ; CHECK-MVE-NEXT: vmov r0, s8
541 ; CHECK-MVE-NEXT: vmov.16 q1[6], r0
542 ; CHECK-MVE-NEXT: vmov.u16 r0, q0[7]
543 ; CHECK-MVE-NEXT: vmov s0, r0
544 ; CHECK-MVE-NEXT: vrinta.f16 s0, s0
545 ; CHECK-MVE-NEXT: vmov r0, s0
546 ; CHECK-MVE-NEXT: vmov.16 q1[7], r0
547 ; CHECK-MVE-NEXT: vmov q0, q1
548 ; CHECK-MVE-NEXT: bx lr
550 ; CHECK-MVEFP-LABEL: fround_float16_t:
551 ; CHECK-MVEFP: @ %bb.0: @ %entry
552 ; CHECK-MVEFP-NEXT: vrinta.f16 q0, q0
553 ; CHECK-MVEFP-NEXT: bx lr
555 %0 = call fast <8 x half> @llvm.round.v8f16(<8 x half> %src)
559 define arm_aapcs_vfpcc <2 x double> @fround_float64_t(<2 x double> %src) {
560 ; CHECK-LABEL: fround_float64_t:
561 ; CHECK: @ %bb.0: @ %entry
562 ; CHECK-NEXT: .save {r7, lr}
563 ; CHECK-NEXT: push {r7, lr}
564 ; CHECK-NEXT: .vsave {d8, d9}
565 ; CHECK-NEXT: vpush {d8, d9}
566 ; CHECK-NEXT: vmov q4, q0
567 ; CHECK-NEXT: vmov r0, r1, d9
568 ; CHECK-NEXT: bl round
569 ; CHECK-NEXT: vmov r2, r3, d8
570 ; CHECK-NEXT: vmov d9, r0, r1
571 ; CHECK-NEXT: mov r0, r2
572 ; CHECK-NEXT: mov r1, r3
573 ; CHECK-NEXT: bl round
574 ; CHECK-NEXT: vmov d8, r0, r1
575 ; CHECK-NEXT: vmov q0, q4
576 ; CHECK-NEXT: vpop {d8, d9}
577 ; CHECK-NEXT: pop {r7, pc}
579 %0 = call fast <2 x double> @llvm.round.v2f64(<2 x double> %src)
583 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
584 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
585 declare <4 x float> @llvm.rint.v4f32(<4 x float>)
586 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
587 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
588 declare <4 x float> @llvm.round.v4f32(<4 x float>)
589 declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
590 declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
591 declare <8 x half> @llvm.rint.v8f16(<8 x half>)
592 declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
593 declare <8 x half> @llvm.floor.v8f16(<8 x half>)
594 declare <8 x half> @llvm.round.v8f16(<8 x half>)
595 declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
596 declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
597 declare <2 x double> @llvm.rint.v2f64(<2 x double>)
598 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
599 declare <2 x double> @llvm.floor.v2f64(<2 x double>)
600 declare <2 x double> @llvm.round.v2f64(<2 x double>)