1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
3 ; RUN: llc -mattr=+sme -force-streaming-compatible-sve < %s | FileCheck %s
5 target triple = "aarch64-unknown-linux-gnu"
11 define <2 x half> @frintp_v2f16(<2 x half> %op) {
12 ; CHECK-LABEL: frintp_v2f16:
14 ; CHECK-NEXT: ptrue p0.h, vl4
15 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
16 ; CHECK-NEXT: frintp z0.h, p0/m, z0.h
17 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
19 %res = call <2 x half> @llvm.ceil.v2f16(<2 x half> %op)
23 define <4 x half> @frintp_v4f16(<4 x half> %op) {
24 ; CHECK-LABEL: frintp_v4f16:
26 ; CHECK-NEXT: ptrue p0.h, vl4
27 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
28 ; CHECK-NEXT: frintp z0.h, p0/m, z0.h
29 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
31 %res = call <4 x half> @llvm.ceil.v4f16(<4 x half> %op)
35 define <8 x half> @frintp_v8f16(<8 x half> %op) {
36 ; CHECK-LABEL: frintp_v8f16:
38 ; CHECK-NEXT: ptrue p0.h, vl8
39 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
40 ; CHECK-NEXT: frintp z0.h, p0/m, z0.h
41 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
43 %res = call <8 x half> @llvm.ceil.v8f16(<8 x half> %op)
47 define void @frintp_v16f16(ptr %a) {
48 ; CHECK-LABEL: frintp_v16f16:
50 ; CHECK-NEXT: ptrue p0.h, vl8
51 ; CHECK-NEXT: ldp q0, q1, [x0]
52 ; CHECK-NEXT: frintp z0.h, p0/m, z0.h
53 ; CHECK-NEXT: frintp z1.h, p0/m, z1.h
54 ; CHECK-NEXT: stp q0, q1, [x0]
56 %op = load <16 x half>, ptr %a
57 %res = call <16 x half> @llvm.ceil.v16f16(<16 x half> %op)
58 store <16 x half> %res, ptr %a
62 define <2 x float> @frintp_v2f32(<2 x float> %op) {
63 ; CHECK-LABEL: frintp_v2f32:
65 ; CHECK-NEXT: ptrue p0.s, vl2
66 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
67 ; CHECK-NEXT: frintp z0.s, p0/m, z0.s
68 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
70 %res = call <2 x float> @llvm.ceil.v2f32(<2 x float> %op)
74 define <4 x float> @frintp_v4f32(<4 x float> %op) {
75 ; CHECK-LABEL: frintp_v4f32:
77 ; CHECK-NEXT: ptrue p0.s, vl4
78 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
79 ; CHECK-NEXT: frintp z0.s, p0/m, z0.s
80 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
82 %res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %op)
86 define void @frintp_v8f32(ptr %a) {
87 ; CHECK-LABEL: frintp_v8f32:
89 ; CHECK-NEXT: ptrue p0.s, vl4
90 ; CHECK-NEXT: ldp q0, q1, [x0]
91 ; CHECK-NEXT: frintp z0.s, p0/m, z0.s
92 ; CHECK-NEXT: frintp z1.s, p0/m, z1.s
93 ; CHECK-NEXT: stp q0, q1, [x0]
95 %op = load <8 x float>, ptr %a
96 %res = call <8 x float> @llvm.ceil.v8f32(<8 x float> %op)
97 store <8 x float> %res, ptr %a
101 define <1 x double> @frintp_v1f64(<1 x double> %op) {
102 ; CHECK-LABEL: frintp_v1f64:
104 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
105 ; CHECK-NEXT: frintp d0, d0
107 %res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op)
108 ret <1 x double> %res
111 define <2 x double> @frintp_v2f64(<2 x double> %op) {
112 ; CHECK-LABEL: frintp_v2f64:
114 ; CHECK-NEXT: ptrue p0.d, vl2
115 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
116 ; CHECK-NEXT: frintp z0.d, p0/m, z0.d
117 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
119 %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %op)
120 ret <2 x double> %res
123 define void @frintp_v4f64(ptr %a) {
124 ; CHECK-LABEL: frintp_v4f64:
126 ; CHECK-NEXT: ptrue p0.d, vl2
127 ; CHECK-NEXT: ldp q0, q1, [x0]
128 ; CHECK-NEXT: frintp z0.d, p0/m, z0.d
129 ; CHECK-NEXT: frintp z1.d, p0/m, z1.d
130 ; CHECK-NEXT: stp q0, q1, [x0]
132 %op = load <4 x double>, ptr %a
133 %res = call <4 x double> @llvm.ceil.v4f64(<4 x double> %op)
134 store <4 x double> %res, ptr %a
142 define <2 x half> @frintm_v2f16(<2 x half> %op) {
143 ; CHECK-LABEL: frintm_v2f16:
145 ; CHECK-NEXT: ptrue p0.h, vl4
146 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
147 ; CHECK-NEXT: frintm z0.h, p0/m, z0.h
148 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
150 %res = call <2 x half> @llvm.floor.v2f16(<2 x half> %op)
154 define <4 x half> @frintm_v4f16(<4 x half> %op) {
155 ; CHECK-LABEL: frintm_v4f16:
157 ; CHECK-NEXT: ptrue p0.h, vl4
158 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
159 ; CHECK-NEXT: frintm z0.h, p0/m, z0.h
160 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
162 %res = call <4 x half> @llvm.floor.v4f16(<4 x half> %op)
166 define <8 x half> @frintm_v8f16(<8 x half> %op) {
167 ; CHECK-LABEL: frintm_v8f16:
169 ; CHECK-NEXT: ptrue p0.h, vl8
170 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
171 ; CHECK-NEXT: frintm z0.h, p0/m, z0.h
172 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
174 %res = call <8 x half> @llvm.floor.v8f16(<8 x half> %op)
178 define void @frintm_v16f16(ptr %a) {
179 ; CHECK-LABEL: frintm_v16f16:
181 ; CHECK-NEXT: ptrue p0.h, vl8
182 ; CHECK-NEXT: ldp q0, q1, [x0]
183 ; CHECK-NEXT: frintm z0.h, p0/m, z0.h
184 ; CHECK-NEXT: frintm z1.h, p0/m, z1.h
185 ; CHECK-NEXT: stp q0, q1, [x0]
187 %op = load <16 x half>, ptr %a
188 %res = call <16 x half> @llvm.floor.v16f16(<16 x half> %op)
189 store <16 x half> %res, ptr %a
193 define <2 x float> @frintm_v2f32(<2 x float> %op) {
194 ; CHECK-LABEL: frintm_v2f32:
196 ; CHECK-NEXT: ptrue p0.s, vl2
197 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
198 ; CHECK-NEXT: frintm z0.s, p0/m, z0.s
199 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
201 %res = call <2 x float> @llvm.floor.v2f32(<2 x float> %op)
205 define <4 x float> @frintm_v4f32(<4 x float> %op) {
206 ; CHECK-LABEL: frintm_v4f32:
208 ; CHECK-NEXT: ptrue p0.s, vl4
209 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
210 ; CHECK-NEXT: frintm z0.s, p0/m, z0.s
211 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
213 %res = call <4 x float> @llvm.floor.v4f32(<4 x float> %op)
217 define void @frintm_v8f32(ptr %a) {
218 ; CHECK-LABEL: frintm_v8f32:
220 ; CHECK-NEXT: ptrue p0.s, vl4
221 ; CHECK-NEXT: ldp q0, q1, [x0]
222 ; CHECK-NEXT: frintm z0.s, p0/m, z0.s
223 ; CHECK-NEXT: frintm z1.s, p0/m, z1.s
224 ; CHECK-NEXT: stp q0, q1, [x0]
226 %op = load <8 x float>, ptr %a
227 %res = call <8 x float> @llvm.floor.v8f32(<8 x float> %op)
228 store <8 x float> %res, ptr %a
232 define <1 x double> @frintm_v1f64(<1 x double> %op) {
233 ; CHECK-LABEL: frintm_v1f64:
235 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
236 ; CHECK-NEXT: frintm d0, d0
238 %res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op)
239 ret <1 x double> %res
242 define <2 x double> @frintm_v2f64(<2 x double> %op) {
243 ; CHECK-LABEL: frintm_v2f64:
245 ; CHECK-NEXT: ptrue p0.d, vl2
246 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
247 ; CHECK-NEXT: frintm z0.d, p0/m, z0.d
248 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
250 %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %op)
251 ret <2 x double> %res
254 define void @frintm_v4f64(ptr %a) {
255 ; CHECK-LABEL: frintm_v4f64:
257 ; CHECK-NEXT: ptrue p0.d, vl2
258 ; CHECK-NEXT: ldp q0, q1, [x0]
259 ; CHECK-NEXT: frintm z0.d, p0/m, z0.d
260 ; CHECK-NEXT: frintm z1.d, p0/m, z1.d
261 ; CHECK-NEXT: stp q0, q1, [x0]
263 %op = load <4 x double>, ptr %a
264 %res = call <4 x double> @llvm.floor.v4f64(<4 x double> %op)
265 store <4 x double> %res, ptr %a
270 ; FNEARBYINT -> FRINTI
273 define <2 x half> @frinti_v2f16(<2 x half> %op) {
274 ; CHECK-LABEL: frinti_v2f16:
276 ; CHECK-NEXT: ptrue p0.h, vl4
277 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
278 ; CHECK-NEXT: frinti z0.h, p0/m, z0.h
279 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
281 %res = call <2 x half> @llvm.nearbyint.v2f16(<2 x half> %op)
285 define <4 x half> @frinti_v4f16(<4 x half> %op) {
286 ; CHECK-LABEL: frinti_v4f16:
288 ; CHECK-NEXT: ptrue p0.h, vl4
289 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
290 ; CHECK-NEXT: frinti z0.h, p0/m, z0.h
291 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
293 %res = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %op)
297 define <8 x half> @frinti_v8f16(<8 x half> %op) {
298 ; CHECK-LABEL: frinti_v8f16:
300 ; CHECK-NEXT: ptrue p0.h, vl8
301 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
302 ; CHECK-NEXT: frinti z0.h, p0/m, z0.h
303 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
305 %res = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %op)
309 define void @frinti_v16f16(ptr %a) {
310 ; CHECK-LABEL: frinti_v16f16:
312 ; CHECK-NEXT: ptrue p0.h, vl8
313 ; CHECK-NEXT: ldp q0, q1, [x0]
314 ; CHECK-NEXT: frinti z0.h, p0/m, z0.h
315 ; CHECK-NEXT: frinti z1.h, p0/m, z1.h
316 ; CHECK-NEXT: stp q0, q1, [x0]
318 %op = load <16 x half>, ptr %a
319 %res = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %op)
320 store <16 x half> %res, ptr %a
324 define <2 x float> @frinti_v2f32(<2 x float> %op) {
325 ; CHECK-LABEL: frinti_v2f32:
327 ; CHECK-NEXT: ptrue p0.s, vl2
328 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
329 ; CHECK-NEXT: frinti z0.s, p0/m, z0.s
330 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
332 %res = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %op)
336 define <4 x float> @frinti_v4f32(<4 x float> %op) {
337 ; CHECK-LABEL: frinti_v4f32:
339 ; CHECK-NEXT: ptrue p0.s, vl4
340 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
341 ; CHECK-NEXT: frinti z0.s, p0/m, z0.s
342 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
344 %res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %op)
348 define void @frinti_v8f32(ptr %a) {
349 ; CHECK-LABEL: frinti_v8f32:
351 ; CHECK-NEXT: ptrue p0.s, vl4
352 ; CHECK-NEXT: ldp q0, q1, [x0]
353 ; CHECK-NEXT: frinti z0.s, p0/m, z0.s
354 ; CHECK-NEXT: frinti z1.s, p0/m, z1.s
355 ; CHECK-NEXT: stp q0, q1, [x0]
357 %op = load <8 x float>, ptr %a
358 %res = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %op)
359 store <8 x float> %res, ptr %a
363 define <1 x double> @frinti_v1f64(<1 x double> %op) {
364 ; CHECK-LABEL: frinti_v1f64:
366 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
367 ; CHECK-NEXT: frinti d0, d0
369 %res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op)
370 ret <1 x double> %res
373 define <2 x double> @frinti_v2f64(<2 x double> %op) {
374 ; CHECK-LABEL: frinti_v2f64:
376 ; CHECK-NEXT: ptrue p0.d, vl2
377 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
378 ; CHECK-NEXT: frinti z0.d, p0/m, z0.d
379 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
381 %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %op)
382 ret <2 x double> %res
385 define void @frinti_v4f64(ptr %a) {
386 ; CHECK-LABEL: frinti_v4f64:
388 ; CHECK-NEXT: ptrue p0.d, vl2
389 ; CHECK-NEXT: ldp q0, q1, [x0]
390 ; CHECK-NEXT: frinti z0.d, p0/m, z0.d
391 ; CHECK-NEXT: frinti z1.d, p0/m, z1.d
392 ; CHECK-NEXT: stp q0, q1, [x0]
394 %op = load <4 x double>, ptr %a
395 %res = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %op)
396 store <4 x double> %res, ptr %a
404 define <2 x half> @frintx_v2f16(<2 x half> %op) {
405 ; CHECK-LABEL: frintx_v2f16:
407 ; CHECK-NEXT: ptrue p0.h, vl4
408 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
409 ; CHECK-NEXT: frintx z0.h, p0/m, z0.h
410 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
412 %res = call <2 x half> @llvm.rint.v2f16(<2 x half> %op)
416 define <4 x half> @frintx_v4f16(<4 x half> %op) {
417 ; CHECK-LABEL: frintx_v4f16:
419 ; CHECK-NEXT: ptrue p0.h, vl4
420 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
421 ; CHECK-NEXT: frintx z0.h, p0/m, z0.h
422 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
424 %res = call <4 x half> @llvm.rint.v4f16(<4 x half> %op)
428 define <8 x half> @frintx_v8f16(<8 x half> %op) {
429 ; CHECK-LABEL: frintx_v8f16:
431 ; CHECK-NEXT: ptrue p0.h, vl8
432 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
433 ; CHECK-NEXT: frintx z0.h, p0/m, z0.h
434 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
436 %res = call <8 x half> @llvm.rint.v8f16(<8 x half> %op)
440 define void @frintx_v16f16(ptr %a) {
441 ; CHECK-LABEL: frintx_v16f16:
443 ; CHECK-NEXT: ptrue p0.h, vl8
444 ; CHECK-NEXT: ldp q0, q1, [x0]
445 ; CHECK-NEXT: frintx z0.h, p0/m, z0.h
446 ; CHECK-NEXT: frintx z1.h, p0/m, z1.h
447 ; CHECK-NEXT: stp q0, q1, [x0]
449 %op = load <16 x half>, ptr %a
450 %res = call <16 x half> @llvm.rint.v16f16(<16 x half> %op)
451 store <16 x half> %res, ptr %a
455 define <2 x float> @frintx_v2f32(<2 x float> %op) {
456 ; CHECK-LABEL: frintx_v2f32:
458 ; CHECK-NEXT: ptrue p0.s, vl2
459 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
460 ; CHECK-NEXT: frintx z0.s, p0/m, z0.s
461 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
463 %res = call <2 x float> @llvm.rint.v2f32(<2 x float> %op)
467 define <4 x float> @frintx_v4f32(<4 x float> %op) {
468 ; CHECK-LABEL: frintx_v4f32:
470 ; CHECK-NEXT: ptrue p0.s, vl4
471 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
472 ; CHECK-NEXT: frintx z0.s, p0/m, z0.s
473 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
475 %res = call <4 x float> @llvm.rint.v4f32(<4 x float> %op)
479 define void @frintx_v8f32(ptr %a) {
480 ; CHECK-LABEL: frintx_v8f32:
482 ; CHECK-NEXT: ptrue p0.s, vl4
483 ; CHECK-NEXT: ldp q0, q1, [x0]
484 ; CHECK-NEXT: frintx z0.s, p0/m, z0.s
485 ; CHECK-NEXT: frintx z1.s, p0/m, z1.s
486 ; CHECK-NEXT: stp q0, q1, [x0]
488 %op = load <8 x float>, ptr %a
489 %res = call <8 x float> @llvm.rint.v8f32(<8 x float> %op)
490 store <8 x float> %res, ptr %a
494 define <1 x double> @frintx_v1f64(<1 x double> %op) {
495 ; CHECK-LABEL: frintx_v1f64:
497 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
498 ; CHECK-NEXT: frintx d0, d0
500 %res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op)
501 ret <1 x double> %res
504 define <2 x double> @frintx_v2f64(<2 x double> %op) {
505 ; CHECK-LABEL: frintx_v2f64:
507 ; CHECK-NEXT: ptrue p0.d, vl2
508 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
509 ; CHECK-NEXT: frintx z0.d, p0/m, z0.d
510 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
512 %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %op)
513 ret <2 x double> %res
516 define void @frintx_v4f64(ptr %a) {
517 ; CHECK-LABEL: frintx_v4f64:
519 ; CHECK-NEXT: ptrue p0.d, vl2
520 ; CHECK-NEXT: ldp q0, q1, [x0]
521 ; CHECK-NEXT: frintx z0.d, p0/m, z0.d
522 ; CHECK-NEXT: frintx z1.d, p0/m, z1.d
523 ; CHECK-NEXT: stp q0, q1, [x0]
525 %op = load <4 x double>, ptr %a
526 %res = call <4 x double> @llvm.rint.v4f64(<4 x double> %op)
527 store <4 x double> %res, ptr %a
535 define <2 x half> @frinta_v2f16(<2 x half> %op) {
536 ; CHECK-LABEL: frinta_v2f16:
538 ; CHECK-NEXT: ptrue p0.h, vl4
539 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
540 ; CHECK-NEXT: frinta z0.h, p0/m, z0.h
541 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
543 %res = call <2 x half> @llvm.round.v2f16(<2 x half> %op)
547 define <4 x half> @frinta_v4f16(<4 x half> %op) {
548 ; CHECK-LABEL: frinta_v4f16:
550 ; CHECK-NEXT: ptrue p0.h, vl4
551 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
552 ; CHECK-NEXT: frinta z0.h, p0/m, z0.h
553 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
555 %res = call <4 x half> @llvm.round.v4f16(<4 x half> %op)
559 define <8 x half> @frinta_v8f16(<8 x half> %op) {
560 ; CHECK-LABEL: frinta_v8f16:
562 ; CHECK-NEXT: ptrue p0.h, vl8
563 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
564 ; CHECK-NEXT: frinta z0.h, p0/m, z0.h
565 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
567 %res = call <8 x half> @llvm.round.v8f16(<8 x half> %op)
571 define void @frinta_v16f16(ptr %a) {
572 ; CHECK-LABEL: frinta_v16f16:
574 ; CHECK-NEXT: ptrue p0.h, vl8
575 ; CHECK-NEXT: ldp q0, q1, [x0]
576 ; CHECK-NEXT: frinta z0.h, p0/m, z0.h
577 ; CHECK-NEXT: frinta z1.h, p0/m, z1.h
578 ; CHECK-NEXT: stp q0, q1, [x0]
580 %op = load <16 x half>, ptr %a
581 %res = call <16 x half> @llvm.round.v16f16(<16 x half> %op)
582 store <16 x half> %res, ptr %a
586 define <2 x float> @frinta_v2f32(<2 x float> %op) {
587 ; CHECK-LABEL: frinta_v2f32:
589 ; CHECK-NEXT: ptrue p0.s, vl2
590 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
591 ; CHECK-NEXT: frinta z0.s, p0/m, z0.s
592 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
594 %res = call <2 x float> @llvm.round.v2f32(<2 x float> %op)
598 define <4 x float> @frinta_v4f32(<4 x float> %op) {
599 ; CHECK-LABEL: frinta_v4f32:
601 ; CHECK-NEXT: ptrue p0.s, vl4
602 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
603 ; CHECK-NEXT: frinta z0.s, p0/m, z0.s
604 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
606 %res = call <4 x float> @llvm.round.v4f32(<4 x float> %op)
610 define void @frinta_v8f32(ptr %a) {
611 ; CHECK-LABEL: frinta_v8f32:
613 ; CHECK-NEXT: ptrue p0.s, vl4
614 ; CHECK-NEXT: ldp q0, q1, [x0]
615 ; CHECK-NEXT: frinta z0.s, p0/m, z0.s
616 ; CHECK-NEXT: frinta z1.s, p0/m, z1.s
617 ; CHECK-NEXT: stp q0, q1, [x0]
619 %op = load <8 x float>, ptr %a
620 %res = call <8 x float> @llvm.round.v8f32(<8 x float> %op)
621 store <8 x float> %res, ptr %a
625 define <1 x double> @frinta_v1f64(<1 x double> %op) {
626 ; CHECK-LABEL: frinta_v1f64:
628 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
629 ; CHECK-NEXT: frinta d0, d0
631 %res = call <1 x double> @llvm.round.v1f64(<1 x double> %op)
632 ret <1 x double> %res
635 define <2 x double> @frinta_v2f64(<2 x double> %op) {
636 ; CHECK-LABEL: frinta_v2f64:
638 ; CHECK-NEXT: ptrue p0.d, vl2
639 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
640 ; CHECK-NEXT: frinta z0.d, p0/m, z0.d
641 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
643 %res = call <2 x double> @llvm.round.v2f64(<2 x double> %op)
644 ret <2 x double> %res
647 define void @frinta_v4f64(ptr %a) {
648 ; CHECK-LABEL: frinta_v4f64:
650 ; CHECK-NEXT: ptrue p0.d, vl2
651 ; CHECK-NEXT: ldp q0, q1, [x0]
652 ; CHECK-NEXT: frinta z0.d, p0/m, z0.d
653 ; CHECK-NEXT: frinta z1.d, p0/m, z1.d
654 ; CHECK-NEXT: stp q0, q1, [x0]
656 %op = load <4 x double>, ptr %a
657 %res = call <4 x double> @llvm.round.v4f64(<4 x double> %op)
658 store <4 x double> %res, ptr %a
663 ; ROUNDEVEN -> FRINTN
666 define <2 x half> @frintn_v2f16(<2 x half> %op) {
667 ; CHECK-LABEL: frintn_v2f16:
669 ; CHECK-NEXT: ptrue p0.h, vl4
670 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
671 ; CHECK-NEXT: frintn z0.h, p0/m, z0.h
672 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
674 %res = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %op)
678 define <4 x half> @frintn_v4f16(<4 x half> %op) {
679 ; CHECK-LABEL: frintn_v4f16:
681 ; CHECK-NEXT: ptrue p0.h, vl4
682 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
683 ; CHECK-NEXT: frintn z0.h, p0/m, z0.h
684 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
686 %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
690 define <8 x half> @frintn_v8f16(<8 x half> %op) {
691 ; CHECK-LABEL: frintn_v8f16:
693 ; CHECK-NEXT: ptrue p0.h, vl8
694 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
695 ; CHECK-NEXT: frintn z0.h, p0/m, z0.h
696 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
698 %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
702 define void @frintn_v16f16(ptr %a) {
703 ; CHECK-LABEL: frintn_v16f16:
705 ; CHECK-NEXT: ptrue p0.h, vl8
706 ; CHECK-NEXT: ldp q0, q1, [x0]
707 ; CHECK-NEXT: frintn z0.h, p0/m, z0.h
708 ; CHECK-NEXT: frintn z1.h, p0/m, z1.h
709 ; CHECK-NEXT: stp q0, q1, [x0]
711 %op = load <16 x half>, ptr %a
712 %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
713 store <16 x half> %res, ptr %a
717 define <2 x float> @frintn_v2f32(<2 x float> %op) {
718 ; CHECK-LABEL: frintn_v2f32:
720 ; CHECK-NEXT: ptrue p0.s, vl2
721 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
722 ; CHECK-NEXT: frintn z0.s, p0/m, z0.s
723 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
725 %res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op)
729 define <4 x float> @frintn_v4f32(<4 x float> %op) {
730 ; CHECK-LABEL: frintn_v4f32:
732 ; CHECK-NEXT: ptrue p0.s, vl4
733 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
734 ; CHECK-NEXT: frintn z0.s, p0/m, z0.s
735 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
737 %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op)
741 define void @frintn_v8f32(ptr %a) {
742 ; CHECK-LABEL: frintn_v8f32:
744 ; CHECK-NEXT: ptrue p0.s, vl4
745 ; CHECK-NEXT: ldp q0, q1, [x0]
746 ; CHECK-NEXT: frintn z0.s, p0/m, z0.s
747 ; CHECK-NEXT: frintn z1.s, p0/m, z1.s
748 ; CHECK-NEXT: stp q0, q1, [x0]
750 %op = load <8 x float>, ptr %a
751 %res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op)
752 store <8 x float> %res, ptr %a
756 define <1 x double> @frintn_v1f64(<1 x double> %op) {
757 ; CHECK-LABEL: frintn_v1f64:
759 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
760 ; CHECK-NEXT: frintn d0, d0
762 %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op)
763 ret <1 x double> %res
766 define <2 x double> @frintn_v2f64(<2 x double> %op) {
767 ; CHECK-LABEL: frintn_v2f64:
769 ; CHECK-NEXT: ptrue p0.d, vl2
770 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
771 ; CHECK-NEXT: frintn z0.d, p0/m, z0.d
772 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
774 %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op)
775 ret <2 x double> %res
778 define void @frintn_v4f64(ptr %a) {
779 ; CHECK-LABEL: frintn_v4f64:
781 ; CHECK-NEXT: ptrue p0.d, vl2
782 ; CHECK-NEXT: ldp q0, q1, [x0]
783 ; CHECK-NEXT: frintn z0.d, p0/m, z0.d
784 ; CHECK-NEXT: frintn z1.d, p0/m, z1.d
785 ; CHECK-NEXT: stp q0, q1, [x0]
787 %op = load <4 x double>, ptr %a
788 %res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op)
789 store <4 x double> %res, ptr %a
797 define <2 x half> @frintz_v2f16(<2 x half> %op) {
798 ; CHECK-LABEL: frintz_v2f16:
800 ; CHECK-NEXT: ptrue p0.h, vl4
801 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
802 ; CHECK-NEXT: frintz z0.h, p0/m, z0.h
803 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
805 %res = call <2 x half> @llvm.trunc.v2f16(<2 x half> %op)
809 define <4 x half> @frintz_v4f16(<4 x half> %op) {
810 ; CHECK-LABEL: frintz_v4f16:
812 ; CHECK-NEXT: ptrue p0.h, vl4
813 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
814 ; CHECK-NEXT: frintz z0.h, p0/m, z0.h
815 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
817 %res = call <4 x half> @llvm.trunc.v4f16(<4 x half> %op)
821 define <8 x half> @frintz_v8f16(<8 x half> %op) {
822 ; CHECK-LABEL: frintz_v8f16:
824 ; CHECK-NEXT: ptrue p0.h, vl8
825 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
826 ; CHECK-NEXT: frintz z0.h, p0/m, z0.h
827 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
829 %res = call <8 x half> @llvm.trunc.v8f16(<8 x half> %op)
833 define void @frintz_v16f16(ptr %a) {
834 ; CHECK-LABEL: frintz_v16f16:
836 ; CHECK-NEXT: ptrue p0.h, vl8
837 ; CHECK-NEXT: ldp q0, q1, [x0]
838 ; CHECK-NEXT: frintz z0.h, p0/m, z0.h
839 ; CHECK-NEXT: frintz z1.h, p0/m, z1.h
840 ; CHECK-NEXT: stp q0, q1, [x0]
842 %op = load <16 x half>, ptr %a
843 %res = call <16 x half> @llvm.trunc.v16f16(<16 x half> %op)
844 store <16 x half> %res, ptr %a
848 define <2 x float> @frintz_v2f32(<2 x float> %op) {
849 ; CHECK-LABEL: frintz_v2f32:
851 ; CHECK-NEXT: ptrue p0.s, vl2
852 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
853 ; CHECK-NEXT: frintz z0.s, p0/m, z0.s
854 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
856 %res = call <2 x float> @llvm.trunc.v2f32(<2 x float> %op)
860 define <4 x float> @frintz_v4f32(<4 x float> %op) {
861 ; CHECK-LABEL: frintz_v4f32:
863 ; CHECK-NEXT: ptrue p0.s, vl4
864 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
865 ; CHECK-NEXT: frintz z0.s, p0/m, z0.s
866 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
868 %res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %op)
872 define void @frintz_v8f32(ptr %a) {
873 ; CHECK-LABEL: frintz_v8f32:
875 ; CHECK-NEXT: ptrue p0.s, vl4
876 ; CHECK-NEXT: ldp q0, q1, [x0]
877 ; CHECK-NEXT: frintz z0.s, p0/m, z0.s
878 ; CHECK-NEXT: frintz z1.s, p0/m, z1.s
879 ; CHECK-NEXT: stp q0, q1, [x0]
881 %op = load <8 x float>, ptr %a
882 %res = call <8 x float> @llvm.trunc.v8f32(<8 x float> %op)
883 store <8 x float> %res, ptr %a
887 define <1 x double> @frintz_v1f64(<1 x double> %op) {
888 ; CHECK-LABEL: frintz_v1f64:
890 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
891 ; CHECK-NEXT: frintz d0, d0
893 %res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op)
894 ret <1 x double> %res
897 define <2 x double> @frintz_v2f64(<2 x double> %op) {
898 ; CHECK-LABEL: frintz_v2f64:
900 ; CHECK-NEXT: ptrue p0.d, vl2
901 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
902 ; CHECK-NEXT: frintz z0.d, p0/m, z0.d
903 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
905 %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %op)
906 ret <2 x double> %res
909 define void @frintz_v4f64(ptr %a) {
910 ; CHECK-LABEL: frintz_v4f64:
912 ; CHECK-NEXT: ptrue p0.d, vl2
913 ; CHECK-NEXT: ldp q0, q1, [x0]
914 ; CHECK-NEXT: frintz z0.d, p0/m, z0.d
915 ; CHECK-NEXT: frintz z1.d, p0/m, z1.d
916 ; CHECK-NEXT: stp q0, q1, [x0]
918 %op = load <4 x double>, ptr %a
919 %res = call <4 x double> @llvm.trunc.v4f64(<4 x double> %op)
920 store <4 x double> %res, ptr %a
924 declare <2 x half> @llvm.ceil.v2f16(<2 x half>)
925 declare <4 x half> @llvm.ceil.v4f16(<4 x half>)
926 declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
927 declare <16 x half> @llvm.ceil.v16f16(<16 x half>)
928 declare <32 x half> @llvm.ceil.v32f16(<32 x half>)
929 declare <64 x half> @llvm.ceil.v64f16(<64 x half>)
930 declare <128 x half> @llvm.ceil.v128f16(<128 x half>)
931 declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
932 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
933 declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
934 declare <16 x float> @llvm.ceil.v16f32(<16 x float>)
935 declare <32 x float> @llvm.ceil.v32f32(<32 x float>)
936 declare <64 x float> @llvm.ceil.v64f32(<64 x float>)
937 declare <1 x double> @llvm.ceil.v1f64(<1 x double>)
938 declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
939 declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
940 declare <8 x double> @llvm.ceil.v8f64(<8 x double>)
941 declare <16 x double> @llvm.ceil.v16f64(<16 x double>)
942 declare <32 x double> @llvm.ceil.v32f64(<32 x double>)
944 declare <2 x half> @llvm.floor.v2f16(<2 x half>)
945 declare <4 x half> @llvm.floor.v4f16(<4 x half>)
946 declare <8 x half> @llvm.floor.v8f16(<8 x half>)
947 declare <16 x half> @llvm.floor.v16f16(<16 x half>)
948 declare <32 x half> @llvm.floor.v32f16(<32 x half>)
949 declare <64 x half> @llvm.floor.v64f16(<64 x half>)
950 declare <128 x half> @llvm.floor.v128f16(<128 x half>)
951 declare <2 x float> @llvm.floor.v2f32(<2 x float>)
952 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
953 declare <8 x float> @llvm.floor.v8f32(<8 x float>)
954 declare <16 x float> @llvm.floor.v16f32(<16 x float>)
955 declare <32 x float> @llvm.floor.v32f32(<32 x float>)
956 declare <64 x float> @llvm.floor.v64f32(<64 x float>)
957 declare <1 x double> @llvm.floor.v1f64(<1 x double>)
958 declare <2 x double> @llvm.floor.v2f64(<2 x double>)
959 declare <4 x double> @llvm.floor.v4f64(<4 x double>)
960 declare <8 x double> @llvm.floor.v8f64(<8 x double>)
961 declare <16 x double> @llvm.floor.v16f64(<16 x double>)
962 declare <32 x double> @llvm.floor.v32f64(<32 x double>)
964 declare <2 x half> @llvm.nearbyint.v2f16(<2 x half>)
965 declare <4 x half> @llvm.nearbyint.v4f16(<4 x half>)
966 declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
967 declare <16 x half> @llvm.nearbyint.v16f16(<16 x half>)
968 declare <32 x half> @llvm.nearbyint.v32f16(<32 x half>)
969 declare <64 x half> @llvm.nearbyint.v64f16(<64 x half>)
970 declare <128 x half> @llvm.nearbyint.v128f16(<128 x half>)
971 declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>)
972 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
973 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>)
974 declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>)
975 declare <32 x float> @llvm.nearbyint.v32f32(<32 x float>)
976 declare <64 x float> @llvm.nearbyint.v64f32(<64 x float>)
977 declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>)
978 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
979 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
980 declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>)
981 declare <16 x double> @llvm.nearbyint.v16f64(<16 x double>)
982 declare <32 x double> @llvm.nearbyint.v32f64(<32 x double>)
984 declare <2 x half> @llvm.rint.v2f16(<2 x half>)
985 declare <4 x half> @llvm.rint.v4f16(<4 x half>)
986 declare <8 x half> @llvm.rint.v8f16(<8 x half>)
987 declare <16 x half> @llvm.rint.v16f16(<16 x half>)
988 declare <32 x half> @llvm.rint.v32f16(<32 x half>)
989 declare <64 x half> @llvm.rint.v64f16(<64 x half>)
990 declare <128 x half> @llvm.rint.v128f16(<128 x half>)
991 declare <2 x float> @llvm.rint.v2f32(<2 x float>)
992 declare <4 x float> @llvm.rint.v4f32(<4 x float>)
993 declare <8 x float> @llvm.rint.v8f32(<8 x float>)
994 declare <16 x float> @llvm.rint.v16f32(<16 x float>)
995 declare <32 x float> @llvm.rint.v32f32(<32 x float>)
996 declare <64 x float> @llvm.rint.v64f32(<64 x float>)
997 declare <1 x double> @llvm.rint.v1f64(<1 x double>)
998 declare <2 x double> @llvm.rint.v2f64(<2 x double>)
999 declare <4 x double> @llvm.rint.v4f64(<4 x double>)
1000 declare <8 x double> @llvm.rint.v8f64(<8 x double>)
1001 declare <16 x double> @llvm.rint.v16f64(<16 x double>)
1002 declare <32 x double> @llvm.rint.v32f64(<32 x double>)
1004 declare <2 x half> @llvm.round.v2f16(<2 x half>)
1005 declare <4 x half> @llvm.round.v4f16(<4 x half>)
1006 declare <8 x half> @llvm.round.v8f16(<8 x half>)
1007 declare <16 x half> @llvm.round.v16f16(<16 x half>)
1008 declare <32 x half> @llvm.round.v32f16(<32 x half>)
1009 declare <64 x half> @llvm.round.v64f16(<64 x half>)
1010 declare <128 x half> @llvm.round.v128f16(<128 x half>)
1011 declare <2 x float> @llvm.round.v2f32(<2 x float>)
1012 declare <4 x float> @llvm.round.v4f32(<4 x float>)
1013 declare <8 x float> @llvm.round.v8f32(<8 x float>)
1014 declare <16 x float> @llvm.round.v16f32(<16 x float>)
1015 declare <32 x float> @llvm.round.v32f32(<32 x float>)
1016 declare <64 x float> @llvm.round.v64f32(<64 x float>)
1017 declare <1 x double> @llvm.round.v1f64(<1 x double>)
1018 declare <2 x double> @llvm.round.v2f64(<2 x double>)
1019 declare <4 x double> @llvm.round.v4f64(<4 x double>)
1020 declare <8 x double> @llvm.round.v8f64(<8 x double>)
1021 declare <16 x double> @llvm.round.v16f64(<16 x double>)
1022 declare <32 x double> @llvm.round.v32f64(<32 x double>)
1024 declare <2 x half> @llvm.roundeven.v2f16(<2 x half>)
1025 declare <4 x half> @llvm.roundeven.v4f16(<4 x half>)
1026 declare <8 x half> @llvm.roundeven.v8f16(<8 x half>)
1027 declare <16 x half> @llvm.roundeven.v16f16(<16 x half>)
1028 declare <32 x half> @llvm.roundeven.v32f16(<32 x half>)
1029 declare <64 x half> @llvm.roundeven.v64f16(<64 x half>)
1030 declare <128 x half> @llvm.roundeven.v128f16(<128 x half>)
1031 declare <2 x float> @llvm.roundeven.v2f32(<2 x float>)
1032 declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
1033 declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
1034 declare <16 x float> @llvm.roundeven.v16f32(<16 x float>)
1035 declare <32 x float> @llvm.roundeven.v32f32(<32 x float>)
1036 declare <64 x float> @llvm.roundeven.v64f32(<64 x float>)
1037 declare <1 x double> @llvm.roundeven.v1f64(<1 x double>)
1038 declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
1039 declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
1040 declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
1041 declare <16 x double> @llvm.roundeven.v16f64(<16 x double>)
1042 declare <32 x double> @llvm.roundeven.v32f64(<32 x double>)
1044 declare <2 x half> @llvm.trunc.v2f16(<2 x half>)
1045 declare <4 x half> @llvm.trunc.v4f16(<4 x half>)
1046 declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
1047 declare <16 x half> @llvm.trunc.v16f16(<16 x half>)
1048 declare <32 x half> @llvm.trunc.v32f16(<32 x half>)
1049 declare <64 x half> @llvm.trunc.v64f16(<64 x half>)
1050 declare <128 x half> @llvm.trunc.v128f16(<128 x half>)
1051 declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
1052 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
1053 declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
1054 declare <16 x float> @llvm.trunc.v16f32(<16 x float>)
1055 declare <32 x float> @llvm.trunc.v32f32(<32 x float>)
1056 declare <64 x float> @llvm.trunc.v64f32(<64 x float>)
1057 declare <1 x double> @llvm.trunc.v1f64(<1 x double>)
1058 declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
1059 declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
1060 declare <8 x double> @llvm.trunc.v8f64(<8 x double>)
1061 declare <16 x double> @llvm.trunc.v16f64(<16 x double>)
1062 declare <32 x double> @llvm.trunc.v32f64(<32 x double>)