1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 target triple = "aarch64-unknown-linux-gnu"
12 ; Don't use SVE for 64-bit vectors.
13 define <4 x half> @frintp_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
14 ; CHECK-LABEL: frintp_v4f16:
16 ; CHECK-NEXT: frintp v0.4h, v0.4h
18 %res = call <4 x half> @llvm.ceil.v4f16(<4 x half> %op)
22 ; Don't use SVE for 128-bit vectors.
23 define <8 x half> @frintp_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
24 ; CHECK-LABEL: frintp_v8f16:
26 ; CHECK-NEXT: frintp v0.8h, v0.8h
28 %res = call <8 x half> @llvm.ceil.v8f16(<8 x half> %op)
32 define void @frintp_v16f16(ptr %a) vscale_range(2,0) #0 {
33 ; CHECK-LABEL: frintp_v16f16:
35 ; CHECK-NEXT: ptrue p0.h, vl16
36 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
37 ; CHECK-NEXT: frintp z0.h, p0/m, z0.h
38 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
40 %op = load <16 x half>, ptr %a
41 %res = call <16 x half> @llvm.ceil.v16f16(<16 x half> %op)
42 store <16 x half> %res, ptr %a
46 define void @frintp_v32f16(ptr %a) #0 {
47 ; VBITS_GE_256-LABEL: frintp_v32f16:
48 ; VBITS_GE_256: // %bb.0:
49 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
50 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
51 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
52 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
53 ; VBITS_GE_256-NEXT: frintp z0.h, p0/m, z0.h
54 ; VBITS_GE_256-NEXT: frintp z1.h, p0/m, z1.h
55 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
56 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
57 ; VBITS_GE_256-NEXT: ret
59 ; VBITS_GE_512-LABEL: frintp_v32f16:
60 ; VBITS_GE_512: // %bb.0:
61 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
62 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
63 ; VBITS_GE_512-NEXT: frintp z0.h, p0/m, z0.h
64 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
65 ; VBITS_GE_512-NEXT: ret
66 %op = load <32 x half>, ptr %a
67 %res = call <32 x half> @llvm.ceil.v32f16(<32 x half> %op)
68 store <32 x half> %res, ptr %a
72 define void @frintp_v64f16(ptr %a) vscale_range(8,0) #0 {
73 ; CHECK-LABEL: frintp_v64f16:
75 ; CHECK-NEXT: ptrue p0.h, vl64
76 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
77 ; CHECK-NEXT: frintp z0.h, p0/m, z0.h
78 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
80 %op = load <64 x half>, ptr %a
81 %res = call <64 x half> @llvm.ceil.v64f16(<64 x half> %op)
82 store <64 x half> %res, ptr %a
86 define void @frintp_v128f16(ptr %a) vscale_range(16,0) #0 {
87 ; CHECK-LABEL: frintp_v128f16:
89 ; CHECK-NEXT: ptrue p0.h, vl128
90 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
91 ; CHECK-NEXT: frintp z0.h, p0/m, z0.h
92 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
94 %op = load <128 x half>, ptr %a
95 %res = call <128 x half> @llvm.ceil.v128f16(<128 x half> %op)
96 store <128 x half> %res, ptr %a
100 ; Don't use SVE for 64-bit vectors.
101 define <2 x float> @frintp_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
102 ; CHECK-LABEL: frintp_v2f32:
104 ; CHECK-NEXT: frintp v0.2s, v0.2s
106 %res = call <2 x float> @llvm.ceil.v2f32(<2 x float> %op)
110 ; Don't use SVE for 128-bit vectors.
111 define <4 x float> @frintp_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
112 ; CHECK-LABEL: frintp_v4f32:
114 ; CHECK-NEXT: frintp v0.4s, v0.4s
116 %res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %op)
120 define void @frintp_v8f32(ptr %a) vscale_range(2,0) #0 {
121 ; CHECK-LABEL: frintp_v8f32:
123 ; CHECK-NEXT: ptrue p0.s, vl8
124 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
125 ; CHECK-NEXT: frintp z0.s, p0/m, z0.s
126 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
128 %op = load <8 x float>, ptr %a
129 %res = call <8 x float> @llvm.ceil.v8f32(<8 x float> %op)
130 store <8 x float> %res, ptr %a
134 define void @frintp_v16f32(ptr %a) #0 {
135 ; VBITS_GE_256-LABEL: frintp_v16f32:
136 ; VBITS_GE_256: // %bb.0:
137 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
138 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
139 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
140 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
141 ; VBITS_GE_256-NEXT: frintp z0.s, p0/m, z0.s
142 ; VBITS_GE_256-NEXT: frintp z1.s, p0/m, z1.s
143 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
144 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
145 ; VBITS_GE_256-NEXT: ret
147 ; VBITS_GE_512-LABEL: frintp_v16f32:
148 ; VBITS_GE_512: // %bb.0:
149 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
150 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
151 ; VBITS_GE_512-NEXT: frintp z0.s, p0/m, z0.s
152 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
153 ; VBITS_GE_512-NEXT: ret
154 %op = load <16 x float>, ptr %a
155 %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %op)
156 store <16 x float> %res, ptr %a
160 define void @frintp_v32f32(ptr %a) vscale_range(8,0) #0 {
161 ; CHECK-LABEL: frintp_v32f32:
163 ; CHECK-NEXT: ptrue p0.s, vl32
164 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
165 ; CHECK-NEXT: frintp z0.s, p0/m, z0.s
166 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
168 %op = load <32 x float>, ptr %a
169 %res = call <32 x float> @llvm.ceil.v32f32(<32 x float> %op)
170 store <32 x float> %res, ptr %a
174 define void @frintp_v64f32(ptr %a) vscale_range(16,0) #0 {
175 ; CHECK-LABEL: frintp_v64f32:
177 ; CHECK-NEXT: ptrue p0.s, vl64
178 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
179 ; CHECK-NEXT: frintp z0.s, p0/m, z0.s
180 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
182 %op = load <64 x float>, ptr %a
183 %res = call <64 x float> @llvm.ceil.v64f32(<64 x float> %op)
184 store <64 x float> %res, ptr %a
188 ; Don't use SVE for 64-bit vectors.
189 define <1 x double> @frintp_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
190 ; CHECK-LABEL: frintp_v1f64:
192 ; CHECK-NEXT: frintp d0, d0
194 %res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op)
195 ret <1 x double> %res
198 ; Don't use SVE for 128-bit vectors.
199 define <2 x double> @frintp_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
200 ; CHECK-LABEL: frintp_v2f64:
202 ; CHECK-NEXT: frintp v0.2d, v0.2d
204 %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %op)
205 ret <2 x double> %res
208 define void @frintp_v4f64(ptr %a) vscale_range(2,0) #0 {
209 ; CHECK-LABEL: frintp_v4f64:
211 ; CHECK-NEXT: ptrue p0.d, vl4
212 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
213 ; CHECK-NEXT: frintp z0.d, p0/m, z0.d
214 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
216 %op = load <4 x double>, ptr %a
217 %res = call <4 x double> @llvm.ceil.v4f64(<4 x double> %op)
218 store <4 x double> %res, ptr %a
222 define void @frintp_v8f64(ptr %a) #0 {
223 ; VBITS_GE_256-LABEL: frintp_v8f64:
224 ; VBITS_GE_256: // %bb.0:
225 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
226 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
227 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
228 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
229 ; VBITS_GE_256-NEXT: frintp z0.d, p0/m, z0.d
230 ; VBITS_GE_256-NEXT: frintp z1.d, p0/m, z1.d
231 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
232 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
233 ; VBITS_GE_256-NEXT: ret
235 ; VBITS_GE_512-LABEL: frintp_v8f64:
236 ; VBITS_GE_512: // %bb.0:
237 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
238 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
239 ; VBITS_GE_512-NEXT: frintp z0.d, p0/m, z0.d
240 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
241 ; VBITS_GE_512-NEXT: ret
242 %op = load <8 x double>, ptr %a
243 %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %op)
244 store <8 x double> %res, ptr %a
248 define void @frintp_v16f64(ptr %a) vscale_range(8,0) #0 {
249 ; CHECK-LABEL: frintp_v16f64:
251 ; CHECK-NEXT: ptrue p0.d, vl16
252 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
253 ; CHECK-NEXT: frintp z0.d, p0/m, z0.d
254 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
256 %op = load <16 x double>, ptr %a
257 %res = call <16 x double> @llvm.ceil.v16f64(<16 x double> %op)
258 store <16 x double> %res, ptr %a
262 define void @frintp_v32f64(ptr %a) vscale_range(16,0) #0 {
263 ; CHECK-LABEL: frintp_v32f64:
265 ; CHECK-NEXT: ptrue p0.d, vl32
266 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
267 ; CHECK-NEXT: frintp z0.d, p0/m, z0.d
268 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
270 %op = load <32 x double>, ptr %a
271 %res = call <32 x double> @llvm.ceil.v32f64(<32 x double> %op)
272 store <32 x double> %res, ptr %a
280 ; Don't use SVE for 64-bit vectors.
281 define <4 x half> @frintm_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
282 ; CHECK-LABEL: frintm_v4f16:
284 ; CHECK-NEXT: frintm v0.4h, v0.4h
286 %res = call <4 x half> @llvm.floor.v4f16(<4 x half> %op)
290 ; Don't use SVE for 128-bit vectors.
291 define <8 x half> @frintm_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
292 ; CHECK-LABEL: frintm_v8f16:
294 ; CHECK-NEXT: frintm v0.8h, v0.8h
296 %res = call <8 x half> @llvm.floor.v8f16(<8 x half> %op)
300 define void @frintm_v16f16(ptr %a) vscale_range(2,0) #0 {
301 ; CHECK-LABEL: frintm_v16f16:
303 ; CHECK-NEXT: ptrue p0.h, vl16
304 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
305 ; CHECK-NEXT: frintm z0.h, p0/m, z0.h
306 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
308 %op = load <16 x half>, ptr %a
309 %res = call <16 x half> @llvm.floor.v16f16(<16 x half> %op)
310 store <16 x half> %res, ptr %a
314 define void @frintm_v32f16(ptr %a) #0 {
315 ; VBITS_GE_256-LABEL: frintm_v32f16:
316 ; VBITS_GE_256: // %bb.0:
317 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
318 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
319 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
320 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
321 ; VBITS_GE_256-NEXT: frintm z0.h, p0/m, z0.h
322 ; VBITS_GE_256-NEXT: frintm z1.h, p0/m, z1.h
323 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
324 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
325 ; VBITS_GE_256-NEXT: ret
327 ; VBITS_GE_512-LABEL: frintm_v32f16:
328 ; VBITS_GE_512: // %bb.0:
329 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
330 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
331 ; VBITS_GE_512-NEXT: frintm z0.h, p0/m, z0.h
332 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
333 ; VBITS_GE_512-NEXT: ret
334 %op = load <32 x half>, ptr %a
335 %res = call <32 x half> @llvm.floor.v32f16(<32 x half> %op)
336 store <32 x half> %res, ptr %a
340 define void @frintm_v64f16(ptr %a) vscale_range(8,0) #0 {
341 ; CHECK-LABEL: frintm_v64f16:
343 ; CHECK-NEXT: ptrue p0.h, vl64
344 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
345 ; CHECK-NEXT: frintm z0.h, p0/m, z0.h
346 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
348 %op = load <64 x half>, ptr %a
349 %res = call <64 x half> @llvm.floor.v64f16(<64 x half> %op)
350 store <64 x half> %res, ptr %a
354 define void @frintm_v128f16(ptr %a) vscale_range(16,0) #0 {
355 ; CHECK-LABEL: frintm_v128f16:
357 ; CHECK-NEXT: ptrue p0.h, vl128
358 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
359 ; CHECK-NEXT: frintm z0.h, p0/m, z0.h
360 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
362 %op = load <128 x half>, ptr %a
363 %res = call <128 x half> @llvm.floor.v128f16(<128 x half> %op)
364 store <128 x half> %res, ptr %a
368 ; Don't use SVE for 64-bit vectors.
369 define <2 x float> @frintm_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
370 ; CHECK-LABEL: frintm_v2f32:
372 ; CHECK-NEXT: frintm v0.2s, v0.2s
374 %res = call <2 x float> @llvm.floor.v2f32(<2 x float> %op)
378 ; Don't use SVE for 128-bit vectors.
379 define <4 x float> @frintm_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
380 ; CHECK-LABEL: frintm_v4f32:
382 ; CHECK-NEXT: frintm v0.4s, v0.4s
384 %res = call <4 x float> @llvm.floor.v4f32(<4 x float> %op)
388 define void @frintm_v8f32(ptr %a) vscale_range(2,0) #0 {
389 ; CHECK-LABEL: frintm_v8f32:
391 ; CHECK-NEXT: ptrue p0.s, vl8
392 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
393 ; CHECK-NEXT: frintm z0.s, p0/m, z0.s
394 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
396 %op = load <8 x float>, ptr %a
397 %res = call <8 x float> @llvm.floor.v8f32(<8 x float> %op)
398 store <8 x float> %res, ptr %a
402 define void @frintm_v16f32(ptr %a) #0 {
403 ; VBITS_GE_256-LABEL: frintm_v16f32:
404 ; VBITS_GE_256: // %bb.0:
405 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
406 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
407 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
408 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
409 ; VBITS_GE_256-NEXT: frintm z0.s, p0/m, z0.s
410 ; VBITS_GE_256-NEXT: frintm z1.s, p0/m, z1.s
411 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
412 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
413 ; VBITS_GE_256-NEXT: ret
415 ; VBITS_GE_512-LABEL: frintm_v16f32:
416 ; VBITS_GE_512: // %bb.0:
417 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
418 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
419 ; VBITS_GE_512-NEXT: frintm z0.s, p0/m, z0.s
420 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
421 ; VBITS_GE_512-NEXT: ret
422 %op = load <16 x float>, ptr %a
423 %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %op)
424 store <16 x float> %res, ptr %a
428 define void @frintm_v32f32(ptr %a) vscale_range(8,0) #0 {
429 ; CHECK-LABEL: frintm_v32f32:
431 ; CHECK-NEXT: ptrue p0.s, vl32
432 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
433 ; CHECK-NEXT: frintm z0.s, p0/m, z0.s
434 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
436 %op = load <32 x float>, ptr %a
437 %res = call <32 x float> @llvm.floor.v32f32(<32 x float> %op)
438 store <32 x float> %res, ptr %a
442 define void @frintm_v64f32(ptr %a) vscale_range(16,0) #0 {
443 ; CHECK-LABEL: frintm_v64f32:
445 ; CHECK-NEXT: ptrue p0.s, vl64
446 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
447 ; CHECK-NEXT: frintm z0.s, p0/m, z0.s
448 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
450 %op = load <64 x float>, ptr %a
451 %res = call <64 x float> @llvm.floor.v64f32(<64 x float> %op)
452 store <64 x float> %res, ptr %a
456 ; Don't use SVE for 64-bit vectors.
457 define <1 x double> @frintm_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
458 ; CHECK-LABEL: frintm_v1f64:
460 ; CHECK-NEXT: frintm d0, d0
462 %res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op)
463 ret <1 x double> %res
466 ; Don't use SVE for 128-bit vectors.
467 define <2 x double> @frintm_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
468 ; CHECK-LABEL: frintm_v2f64:
470 ; CHECK-NEXT: frintm v0.2d, v0.2d
472 %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %op)
473 ret <2 x double> %res
476 define void @frintm_v4f64(ptr %a) vscale_range(2,0) #0 {
477 ; CHECK-LABEL: frintm_v4f64:
479 ; CHECK-NEXT: ptrue p0.d, vl4
480 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
481 ; CHECK-NEXT: frintm z0.d, p0/m, z0.d
482 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
484 %op = load <4 x double>, ptr %a
485 %res = call <4 x double> @llvm.floor.v4f64(<4 x double> %op)
486 store <4 x double> %res, ptr %a
490 define void @frintm_v8f64(ptr %a) #0 {
491 ; VBITS_GE_256-LABEL: frintm_v8f64:
492 ; VBITS_GE_256: // %bb.0:
493 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
494 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
495 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
496 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
497 ; VBITS_GE_256-NEXT: frintm z0.d, p0/m, z0.d
498 ; VBITS_GE_256-NEXT: frintm z1.d, p0/m, z1.d
499 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
500 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
501 ; VBITS_GE_256-NEXT: ret
503 ; VBITS_GE_512-LABEL: frintm_v8f64:
504 ; VBITS_GE_512: // %bb.0:
505 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
506 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
507 ; VBITS_GE_512-NEXT: frintm z0.d, p0/m, z0.d
508 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
509 ; VBITS_GE_512-NEXT: ret
510 %op = load <8 x double>, ptr %a
511 %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %op)
512 store <8 x double> %res, ptr %a
516 define void @frintm_v16f64(ptr %a) vscale_range(8,0) #0 {
517 ; CHECK-LABEL: frintm_v16f64:
519 ; CHECK-NEXT: ptrue p0.d, vl16
520 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
521 ; CHECK-NEXT: frintm z0.d, p0/m, z0.d
522 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
524 %op = load <16 x double>, ptr %a
525 %res = call <16 x double> @llvm.floor.v16f64(<16 x double> %op)
526 store <16 x double> %res, ptr %a
530 define void @frintm_v32f64(ptr %a) vscale_range(16,0) #0 {
531 ; CHECK-LABEL: frintm_v32f64:
533 ; CHECK-NEXT: ptrue p0.d, vl32
534 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
535 ; CHECK-NEXT: frintm z0.d, p0/m, z0.d
536 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
538 %op = load <32 x double>, ptr %a
539 %res = call <32 x double> @llvm.floor.v32f64(<32 x double> %op)
540 store <32 x double> %res, ptr %a
545 ; FNEARBYINT -> FRINTI
548 ; Don't use SVE for 64-bit vectors.
549 define <4 x half> @frinti_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
550 ; CHECK-LABEL: frinti_v4f16:
552 ; CHECK-NEXT: frinti v0.4h, v0.4h
554 %res = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %op)
558 ; Don't use SVE for 128-bit vectors.
559 define <8 x half> @frinti_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
560 ; CHECK-LABEL: frinti_v8f16:
562 ; CHECK-NEXT: frinti v0.8h, v0.8h
564 %res = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %op)
568 define void @frinti_v16f16(ptr %a) vscale_range(2,0) #0 {
569 ; CHECK-LABEL: frinti_v16f16:
571 ; CHECK-NEXT: ptrue p0.h, vl16
572 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
573 ; CHECK-NEXT: frinti z0.h, p0/m, z0.h
574 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
576 %op = load <16 x half>, ptr %a
577 %res = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %op)
578 store <16 x half> %res, ptr %a
582 define void @frinti_v32f16(ptr %a) #0 {
583 ; VBITS_GE_256-LABEL: frinti_v32f16:
584 ; VBITS_GE_256: // %bb.0:
585 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
586 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
587 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
588 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
589 ; VBITS_GE_256-NEXT: frinti z0.h, p0/m, z0.h
590 ; VBITS_GE_256-NEXT: frinti z1.h, p0/m, z1.h
591 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
592 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
593 ; VBITS_GE_256-NEXT: ret
595 ; VBITS_GE_512-LABEL: frinti_v32f16:
596 ; VBITS_GE_512: // %bb.0:
597 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
598 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
599 ; VBITS_GE_512-NEXT: frinti z0.h, p0/m, z0.h
600 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
601 ; VBITS_GE_512-NEXT: ret
602 %op = load <32 x half>, ptr %a
603 %res = call <32 x half> @llvm.nearbyint.v32f16(<32 x half> %op)
604 store <32 x half> %res, ptr %a
608 define void @frinti_v64f16(ptr %a) vscale_range(8,0) #0 {
609 ; CHECK-LABEL: frinti_v64f16:
611 ; CHECK-NEXT: ptrue p0.h, vl64
612 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
613 ; CHECK-NEXT: frinti z0.h, p0/m, z0.h
614 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
616 %op = load <64 x half>, ptr %a
617 %res = call <64 x half> @llvm.nearbyint.v64f16(<64 x half> %op)
618 store <64 x half> %res, ptr %a
622 define void @frinti_v128f16(ptr %a) vscale_range(16,0) #0 {
623 ; CHECK-LABEL: frinti_v128f16:
625 ; CHECK-NEXT: ptrue p0.h, vl128
626 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
627 ; CHECK-NEXT: frinti z0.h, p0/m, z0.h
628 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
630 %op = load <128 x half>, ptr %a
631 %res = call <128 x half> @llvm.nearbyint.v128f16(<128 x half> %op)
632 store <128 x half> %res, ptr %a
636 ; Don't use SVE for 64-bit vectors.
637 define <2 x float> @frinti_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
638 ; CHECK-LABEL: frinti_v2f32:
640 ; CHECK-NEXT: frinti v0.2s, v0.2s
642 %res = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %op)
646 ; Don't use SVE for 128-bit vectors.
647 define <4 x float> @frinti_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
648 ; CHECK-LABEL: frinti_v4f32:
650 ; CHECK-NEXT: frinti v0.4s, v0.4s
652 %res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %op)
656 define void @frinti_v8f32(ptr %a) vscale_range(2,0) #0 {
657 ; CHECK-LABEL: frinti_v8f32:
659 ; CHECK-NEXT: ptrue p0.s, vl8
660 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
661 ; CHECK-NEXT: frinti z0.s, p0/m, z0.s
662 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
664 %op = load <8 x float>, ptr %a
665 %res = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %op)
666 store <8 x float> %res, ptr %a
670 define void @frinti_v16f32(ptr %a) #0 {
671 ; VBITS_GE_256-LABEL: frinti_v16f32:
672 ; VBITS_GE_256: // %bb.0:
673 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
674 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
675 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
676 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
677 ; VBITS_GE_256-NEXT: frinti z0.s, p0/m, z0.s
678 ; VBITS_GE_256-NEXT: frinti z1.s, p0/m, z1.s
679 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
680 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
681 ; VBITS_GE_256-NEXT: ret
683 ; VBITS_GE_512-LABEL: frinti_v16f32:
684 ; VBITS_GE_512: // %bb.0:
685 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
686 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
687 ; VBITS_GE_512-NEXT: frinti z0.s, p0/m, z0.s
688 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
689 ; VBITS_GE_512-NEXT: ret
690 %op = load <16 x float>, ptr %a
691 %res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %op)
692 store <16 x float> %res, ptr %a
696 define void @frinti_v32f32(ptr %a) vscale_range(8,0) #0 {
697 ; CHECK-LABEL: frinti_v32f32:
699 ; CHECK-NEXT: ptrue p0.s, vl32
700 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
701 ; CHECK-NEXT: frinti z0.s, p0/m, z0.s
702 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
704 %op = load <32 x float>, ptr %a
705 %res = call <32 x float> @llvm.nearbyint.v32f32(<32 x float> %op)
706 store <32 x float> %res, ptr %a
710 define void @frinti_v64f32(ptr %a) vscale_range(16,0) #0 {
711 ; CHECK-LABEL: frinti_v64f32:
713 ; CHECK-NEXT: ptrue p0.s, vl64
714 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
715 ; CHECK-NEXT: frinti z0.s, p0/m, z0.s
716 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
718 %op = load <64 x float>, ptr %a
719 %res = call <64 x float> @llvm.nearbyint.v64f32(<64 x float> %op)
720 store <64 x float> %res, ptr %a
724 ; Don't use SVE for 64-bit vectors.
725 define <1 x double> @frinti_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
726 ; CHECK-LABEL: frinti_v1f64:
728 ; CHECK-NEXT: frinti d0, d0
730 %res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op)
731 ret <1 x double> %res
734 ; Don't use SVE for 128-bit vectors.
735 define <2 x double> @frinti_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
736 ; CHECK-LABEL: frinti_v2f64:
738 ; CHECK-NEXT: frinti v0.2d, v0.2d
740 %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %op)
741 ret <2 x double> %res
744 define void @frinti_v4f64(ptr %a) vscale_range(2,0) #0 {
745 ; CHECK-LABEL: frinti_v4f64:
747 ; CHECK-NEXT: ptrue p0.d, vl4
748 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
749 ; CHECK-NEXT: frinti z0.d, p0/m, z0.d
750 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
752 %op = load <4 x double>, ptr %a
753 %res = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %op)
754 store <4 x double> %res, ptr %a
758 define void @frinti_v8f64(ptr %a) #0 {
759 ; VBITS_GE_256-LABEL: frinti_v8f64:
760 ; VBITS_GE_256: // %bb.0:
761 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
762 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
763 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
764 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
765 ; VBITS_GE_256-NEXT: frinti z0.d, p0/m, z0.d
766 ; VBITS_GE_256-NEXT: frinti z1.d, p0/m, z1.d
767 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
768 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
769 ; VBITS_GE_256-NEXT: ret
771 ; VBITS_GE_512-LABEL: frinti_v8f64:
772 ; VBITS_GE_512: // %bb.0:
773 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
774 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
775 ; VBITS_GE_512-NEXT: frinti z0.d, p0/m, z0.d
776 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
777 ; VBITS_GE_512-NEXT: ret
778 %op = load <8 x double>, ptr %a
779 %res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %op)
780 store <8 x double> %res, ptr %a
784 define void @frinti_v16f64(ptr %a) vscale_range(8,0) #0 {
785 ; CHECK-LABEL: frinti_v16f64:
787 ; CHECK-NEXT: ptrue p0.d, vl16
788 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
789 ; CHECK-NEXT: frinti z0.d, p0/m, z0.d
790 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
792 %op = load <16 x double>, ptr %a
793 %res = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> %op)
794 store <16 x double> %res, ptr %a
798 define void @frinti_v32f64(ptr %a) vscale_range(16,0) #0 {
799 ; CHECK-LABEL: frinti_v32f64:
801 ; CHECK-NEXT: ptrue p0.d, vl32
802 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
803 ; CHECK-NEXT: frinti z0.d, p0/m, z0.d
804 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
806 %op = load <32 x double>, ptr %a
807 %res = call <32 x double> @llvm.nearbyint.v32f64(<32 x double> %op)
808 store <32 x double> %res, ptr %a
816 ; Don't use SVE for 64-bit vectors.
817 define <4 x half> @frintx_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
818 ; CHECK-LABEL: frintx_v4f16:
820 ; CHECK-NEXT: frintx v0.4h, v0.4h
822 %res = call <4 x half> @llvm.rint.v4f16(<4 x half> %op)
826 ; Don't use SVE for 128-bit vectors.
827 define <8 x half> @frintx_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
828 ; CHECK-LABEL: frintx_v8f16:
830 ; CHECK-NEXT: frintx v0.8h, v0.8h
832 %res = call <8 x half> @llvm.rint.v8f16(<8 x half> %op)
836 define void @frintx_v16f16(ptr %a) vscale_range(2,0) #0 {
837 ; CHECK-LABEL: frintx_v16f16:
839 ; CHECK-NEXT: ptrue p0.h, vl16
840 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
841 ; CHECK-NEXT: frintx z0.h, p0/m, z0.h
842 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
844 %op = load <16 x half>, ptr %a
845 %res = call <16 x half> @llvm.rint.v16f16(<16 x half> %op)
846 store <16 x half> %res, ptr %a
850 define void @frintx_v32f16(ptr %a) #0 {
851 ; VBITS_GE_256-LABEL: frintx_v32f16:
852 ; VBITS_GE_256: // %bb.0:
853 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
854 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
855 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
856 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
857 ; VBITS_GE_256-NEXT: frintx z0.h, p0/m, z0.h
858 ; VBITS_GE_256-NEXT: frintx z1.h, p0/m, z1.h
859 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
860 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
861 ; VBITS_GE_256-NEXT: ret
863 ; VBITS_GE_512-LABEL: frintx_v32f16:
864 ; VBITS_GE_512: // %bb.0:
865 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
866 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
867 ; VBITS_GE_512-NEXT: frintx z0.h, p0/m, z0.h
868 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
869 ; VBITS_GE_512-NEXT: ret
870 %op = load <32 x half>, ptr %a
871 %res = call <32 x half> @llvm.rint.v32f16(<32 x half> %op)
872 store <32 x half> %res, ptr %a
876 define void @frintx_v64f16(ptr %a) vscale_range(8,0) #0 {
877 ; CHECK-LABEL: frintx_v64f16:
879 ; CHECK-NEXT: ptrue p0.h, vl64
880 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
881 ; CHECK-NEXT: frintx z0.h, p0/m, z0.h
882 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
884 %op = load <64 x half>, ptr %a
885 %res = call <64 x half> @llvm.rint.v64f16(<64 x half> %op)
886 store <64 x half> %res, ptr %a
890 define void @frintx_v128f16(ptr %a) vscale_range(16,0) #0 {
891 ; CHECK-LABEL: frintx_v128f16:
893 ; CHECK-NEXT: ptrue p0.h, vl128
894 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
895 ; CHECK-NEXT: frintx z0.h, p0/m, z0.h
896 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
898 %op = load <128 x half>, ptr %a
899 %res = call <128 x half> @llvm.rint.v128f16(<128 x half> %op)
900 store <128 x half> %res, ptr %a
904 ; Don't use SVE for 64-bit vectors.
905 define <2 x float> @frintx_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
906 ; CHECK-LABEL: frintx_v2f32:
908 ; CHECK-NEXT: frintx v0.2s, v0.2s
910 %res = call <2 x float> @llvm.rint.v2f32(<2 x float> %op)
914 ; Don't use SVE for 128-bit vectors.
915 define <4 x float> @frintx_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
916 ; CHECK-LABEL: frintx_v4f32:
918 ; CHECK-NEXT: frintx v0.4s, v0.4s
920 %res = call <4 x float> @llvm.rint.v4f32(<4 x float> %op)
924 define void @frintx_v8f32(ptr %a) vscale_range(2,0) #0 {
925 ; CHECK-LABEL: frintx_v8f32:
927 ; CHECK-NEXT: ptrue p0.s, vl8
928 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
929 ; CHECK-NEXT: frintx z0.s, p0/m, z0.s
930 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
932 %op = load <8 x float>, ptr %a
933 %res = call <8 x float> @llvm.rint.v8f32(<8 x float> %op)
934 store <8 x float> %res, ptr %a
938 define void @frintx_v16f32(ptr %a) #0 {
939 ; VBITS_GE_256-LABEL: frintx_v16f32:
940 ; VBITS_GE_256: // %bb.0:
941 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
942 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
943 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
944 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
945 ; VBITS_GE_256-NEXT: frintx z0.s, p0/m, z0.s
946 ; VBITS_GE_256-NEXT: frintx z1.s, p0/m, z1.s
947 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
948 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
949 ; VBITS_GE_256-NEXT: ret
951 ; VBITS_GE_512-LABEL: frintx_v16f32:
952 ; VBITS_GE_512: // %bb.0:
953 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
954 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
955 ; VBITS_GE_512-NEXT: frintx z0.s, p0/m, z0.s
956 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
957 ; VBITS_GE_512-NEXT: ret
958 %op = load <16 x float>, ptr %a
959 %res = call <16 x float> @llvm.rint.v16f32(<16 x float> %op)
960 store <16 x float> %res, ptr %a
964 define void @frintx_v32f32(ptr %a) vscale_range(8,0) #0 {
965 ; CHECK-LABEL: frintx_v32f32:
967 ; CHECK-NEXT: ptrue p0.s, vl32
968 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
969 ; CHECK-NEXT: frintx z0.s, p0/m, z0.s
970 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
972 %op = load <32 x float>, ptr %a
973 %res = call <32 x float> @llvm.rint.v32f32(<32 x float> %op)
974 store <32 x float> %res, ptr %a
978 define void @frintx_v64f32(ptr %a) vscale_range(16,0) #0 {
979 ; CHECK-LABEL: frintx_v64f32:
981 ; CHECK-NEXT: ptrue p0.s, vl64
982 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
983 ; CHECK-NEXT: frintx z0.s, p0/m, z0.s
984 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
986 %op = load <64 x float>, ptr %a
987 %res = call <64 x float> @llvm.rint.v64f32(<64 x float> %op)
988 store <64 x float> %res, ptr %a
992 ; Don't use SVE for 64-bit vectors.
993 define <1 x double> @frintx_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
994 ; CHECK-LABEL: frintx_v1f64:
996 ; CHECK-NEXT: frintx d0, d0
998 %res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op)
999 ret <1 x double> %res
1002 ; Don't use SVE for 128-bit vectors.
1003 define <2 x double> @frintx_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
1004 ; CHECK-LABEL: frintx_v2f64:
1006 ; CHECK-NEXT: frintx v0.2d, v0.2d
1008 %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %op)
1009 ret <2 x double> %res
1012 define void @frintx_v4f64(ptr %a) vscale_range(2,0) #0 {
1013 ; CHECK-LABEL: frintx_v4f64:
1015 ; CHECK-NEXT: ptrue p0.d, vl4
1016 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1017 ; CHECK-NEXT: frintx z0.d, p0/m, z0.d
1018 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1020 %op = load <4 x double>, ptr %a
1021 %res = call <4 x double> @llvm.rint.v4f64(<4 x double> %op)
1022 store <4 x double> %res, ptr %a
1026 define void @frintx_v8f64(ptr %a) #0 {
1027 ; VBITS_GE_256-LABEL: frintx_v8f64:
1028 ; VBITS_GE_256: // %bb.0:
1029 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
1030 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
1031 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1032 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
1033 ; VBITS_GE_256-NEXT: frintx z0.d, p0/m, z0.d
1034 ; VBITS_GE_256-NEXT: frintx z1.d, p0/m, z1.d
1035 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
1036 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
1037 ; VBITS_GE_256-NEXT: ret
1039 ; VBITS_GE_512-LABEL: frintx_v8f64:
1040 ; VBITS_GE_512: // %bb.0:
1041 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
1042 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
1043 ; VBITS_GE_512-NEXT: frintx z0.d, p0/m, z0.d
1044 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
1045 ; VBITS_GE_512-NEXT: ret
1046 %op = load <8 x double>, ptr %a
1047 %res = call <8 x double> @llvm.rint.v8f64(<8 x double> %op)
1048 store <8 x double> %res, ptr %a
1052 define void @frintx_v16f64(ptr %a) vscale_range(8,0) #0 {
1053 ; CHECK-LABEL: frintx_v16f64:
1055 ; CHECK-NEXT: ptrue p0.d, vl16
1056 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1057 ; CHECK-NEXT: frintx z0.d, p0/m, z0.d
1058 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1060 %op = load <16 x double>, ptr %a
1061 %res = call <16 x double> @llvm.rint.v16f64(<16 x double> %op)
1062 store <16 x double> %res, ptr %a
1066 define void @frintx_v32f64(ptr %a) vscale_range(16,0) #0 {
1067 ; CHECK-LABEL: frintx_v32f64:
1069 ; CHECK-NEXT: ptrue p0.d, vl32
1070 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1071 ; CHECK-NEXT: frintx z0.d, p0/m, z0.d
1072 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1074 %op = load <32 x double>, ptr %a
1075 %res = call <32 x double> @llvm.rint.v32f64(<32 x double> %op)
1076 store <32 x double> %res, ptr %a
1084 ; Don't use SVE for 64-bit vectors.
1085 define <4 x half> @frinta_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
1086 ; CHECK-LABEL: frinta_v4f16:
1088 ; CHECK-NEXT: frinta v0.4h, v0.4h
1090 %res = call <4 x half> @llvm.round.v4f16(<4 x half> %op)
1094 ; Don't use SVE for 128-bit vectors.
1095 define <8 x half> @frinta_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
1096 ; CHECK-LABEL: frinta_v8f16:
1098 ; CHECK-NEXT: frinta v0.8h, v0.8h
1100 %res = call <8 x half> @llvm.round.v8f16(<8 x half> %op)
1104 define void @frinta_v16f16(ptr %a) vscale_range(2,0) #0 {
1105 ; CHECK-LABEL: frinta_v16f16:
1107 ; CHECK-NEXT: ptrue p0.h, vl16
1108 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
1109 ; CHECK-NEXT: frinta z0.h, p0/m, z0.h
1110 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
1112 %op = load <16 x half>, ptr %a
1113 %res = call <16 x half> @llvm.round.v16f16(<16 x half> %op)
1114 store <16 x half> %res, ptr %a
1118 define void @frinta_v32f16(ptr %a) #0 {
1119 ; VBITS_GE_256-LABEL: frinta_v32f16:
1120 ; VBITS_GE_256: // %bb.0:
1121 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
1122 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
1123 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
1124 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
1125 ; VBITS_GE_256-NEXT: frinta z0.h, p0/m, z0.h
1126 ; VBITS_GE_256-NEXT: frinta z1.h, p0/m, z1.h
1127 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
1128 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
1129 ; VBITS_GE_256-NEXT: ret
1131 ; VBITS_GE_512-LABEL: frinta_v32f16:
1132 ; VBITS_GE_512: // %bb.0:
1133 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
1134 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
1135 ; VBITS_GE_512-NEXT: frinta z0.h, p0/m, z0.h
1136 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
1137 ; VBITS_GE_512-NEXT: ret
1138 %op = load <32 x half>, ptr %a
1139 %res = call <32 x half> @llvm.round.v32f16(<32 x half> %op)
1140 store <32 x half> %res, ptr %a
1144 define void @frinta_v64f16(ptr %a) vscale_range(8,0) #0 {
1145 ; CHECK-LABEL: frinta_v64f16:
1147 ; CHECK-NEXT: ptrue p0.h, vl64
1148 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
1149 ; CHECK-NEXT: frinta z0.h, p0/m, z0.h
1150 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
1152 %op = load <64 x half>, ptr %a
1153 %res = call <64 x half> @llvm.round.v64f16(<64 x half> %op)
1154 store <64 x half> %res, ptr %a
1158 define void @frinta_v128f16(ptr %a) vscale_range(16,0) #0 {
1159 ; CHECK-LABEL: frinta_v128f16:
1161 ; CHECK-NEXT: ptrue p0.h, vl128
1162 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
1163 ; CHECK-NEXT: frinta z0.h, p0/m, z0.h
1164 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
1166 %op = load <128 x half>, ptr %a
1167 %res = call <128 x half> @llvm.round.v128f16(<128 x half> %op)
1168 store <128 x half> %res, ptr %a
1172 ; Don't use SVE for 64-bit vectors.
1173 define <2 x float> @frinta_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
1174 ; CHECK-LABEL: frinta_v2f32:
1176 ; CHECK-NEXT: frinta v0.2s, v0.2s
1178 %res = call <2 x float> @llvm.round.v2f32(<2 x float> %op)
1179 ret <2 x float> %res
1182 ; Don't use SVE for 128-bit vectors.
1183 define <4 x float> @frinta_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
1184 ; CHECK-LABEL: frinta_v4f32:
1186 ; CHECK-NEXT: frinta v0.4s, v0.4s
1188 %res = call <4 x float> @llvm.round.v4f32(<4 x float> %op)
1189 ret <4 x float> %res
1192 define void @frinta_v8f32(ptr %a) vscale_range(2,0) #0 {
1193 ; CHECK-LABEL: frinta_v8f32:
1195 ; CHECK-NEXT: ptrue p0.s, vl8
1196 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1197 ; CHECK-NEXT: frinta z0.s, p0/m, z0.s
1198 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1200 %op = load <8 x float>, ptr %a
1201 %res = call <8 x float> @llvm.round.v8f32(<8 x float> %op)
1202 store <8 x float> %res, ptr %a
1206 define void @frinta_v16f32(ptr %a) #0 {
1207 ; VBITS_GE_256-LABEL: frinta_v16f32:
1208 ; VBITS_GE_256: // %bb.0:
1209 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
1210 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
1211 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1212 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
1213 ; VBITS_GE_256-NEXT: frinta z0.s, p0/m, z0.s
1214 ; VBITS_GE_256-NEXT: frinta z1.s, p0/m, z1.s
1215 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
1216 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
1217 ; VBITS_GE_256-NEXT: ret
1219 ; VBITS_GE_512-LABEL: frinta_v16f32:
1220 ; VBITS_GE_512: // %bb.0:
1221 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
1222 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
1223 ; VBITS_GE_512-NEXT: frinta z0.s, p0/m, z0.s
1224 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
1225 ; VBITS_GE_512-NEXT: ret
1226 %op = load <16 x float>, ptr %a
1227 %res = call <16 x float> @llvm.round.v16f32(<16 x float> %op)
1228 store <16 x float> %res, ptr %a
1232 define void @frinta_v32f32(ptr %a) vscale_range(8,0) #0 {
1233 ; CHECK-LABEL: frinta_v32f32:
1235 ; CHECK-NEXT: ptrue p0.s, vl32
1236 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1237 ; CHECK-NEXT: frinta z0.s, p0/m, z0.s
1238 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1240 %op = load <32 x float>, ptr %a
1241 %res = call <32 x float> @llvm.round.v32f32(<32 x float> %op)
1242 store <32 x float> %res, ptr %a
1246 define void @frinta_v64f32(ptr %a) vscale_range(16,0) #0 {
1247 ; CHECK-LABEL: frinta_v64f32:
1249 ; CHECK-NEXT: ptrue p0.s, vl64
1250 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1251 ; CHECK-NEXT: frinta z0.s, p0/m, z0.s
1252 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1254 %op = load <64 x float>, ptr %a
1255 %res = call <64 x float> @llvm.round.v64f32(<64 x float> %op)
1256 store <64 x float> %res, ptr %a
1260 ; Don't use SVE for 64-bit vectors.
1261 define <1 x double> @frinta_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
1262 ; CHECK-LABEL: frinta_v1f64:
1264 ; CHECK-NEXT: frinta d0, d0
1266 %res = call <1 x double> @llvm.round.v1f64(<1 x double> %op)
1267 ret <1 x double> %res
1270 ; Don't use SVE for 128-bit vectors.
1271 define <2 x double> @frinta_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
1272 ; CHECK-LABEL: frinta_v2f64:
1274 ; CHECK-NEXT: frinta v0.2d, v0.2d
1276 %res = call <2 x double> @llvm.round.v2f64(<2 x double> %op)
1277 ret <2 x double> %res
1280 define void @frinta_v4f64(ptr %a) vscale_range(2,0) #0 {
1281 ; CHECK-LABEL: frinta_v4f64:
1283 ; CHECK-NEXT: ptrue p0.d, vl4
1284 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1285 ; CHECK-NEXT: frinta z0.d, p0/m, z0.d
1286 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1288 %op = load <4 x double>, ptr %a
1289 %res = call <4 x double> @llvm.round.v4f64(<4 x double> %op)
1290 store <4 x double> %res, ptr %a
1294 define void @frinta_v8f64(ptr %a) #0 {
1295 ; VBITS_GE_256-LABEL: frinta_v8f64:
1296 ; VBITS_GE_256: // %bb.0:
1297 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
1298 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
1299 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1300 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
1301 ; VBITS_GE_256-NEXT: frinta z0.d, p0/m, z0.d
1302 ; VBITS_GE_256-NEXT: frinta z1.d, p0/m, z1.d
1303 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
1304 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
1305 ; VBITS_GE_256-NEXT: ret
1307 ; VBITS_GE_512-LABEL: frinta_v8f64:
1308 ; VBITS_GE_512: // %bb.0:
1309 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
1310 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
1311 ; VBITS_GE_512-NEXT: frinta z0.d, p0/m, z0.d
1312 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
1313 ; VBITS_GE_512-NEXT: ret
1314 %op = load <8 x double>, ptr %a
1315 %res = call <8 x double> @llvm.round.v8f64(<8 x double> %op)
1316 store <8 x double> %res, ptr %a
1320 define void @frinta_v16f64(ptr %a) vscale_range(8,0) #0 {
1321 ; CHECK-LABEL: frinta_v16f64:
1323 ; CHECK-NEXT: ptrue p0.d, vl16
1324 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1325 ; CHECK-NEXT: frinta z0.d, p0/m, z0.d
1326 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1328 %op = load <16 x double>, ptr %a
1329 %res = call <16 x double> @llvm.round.v16f64(<16 x double> %op)
1330 store <16 x double> %res, ptr %a
1334 define void @frinta_v32f64(ptr %a) vscale_range(16,0) #0 {
1335 ; CHECK-LABEL: frinta_v32f64:
1337 ; CHECK-NEXT: ptrue p0.d, vl32
1338 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1339 ; CHECK-NEXT: frinta z0.d, p0/m, z0.d
1340 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1342 %op = load <32 x double>, ptr %a
1343 %res = call <32 x double> @llvm.round.v32f64(<32 x double> %op)
1344 store <32 x double> %res, ptr %a
1349 ; ROUNDEVEN -> FRINTN
1352 ; Don't use SVE for 64-bit vectors.
1353 define <4 x half> @frintn_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
1354 ; CHECK-LABEL: frintn_v4f16:
1356 ; CHECK-NEXT: frintn v0.4h, v0.4h
1358 %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
1362 ; Don't use SVE for 128-bit vectors.
1363 define <8 x half> @frintn_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
1364 ; CHECK-LABEL: frintn_v8f16:
1366 ; CHECK-NEXT: frintn v0.8h, v0.8h
1368 %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
1372 define void @frintn_v16f16(ptr %a) vscale_range(2,0) #0 {
1373 ; CHECK-LABEL: frintn_v16f16:
1375 ; CHECK-NEXT: ptrue p0.h, vl16
1376 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
1377 ; CHECK-NEXT: frintn z0.h, p0/m, z0.h
1378 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
1380 %op = load <16 x half>, ptr %a
1381 %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
1382 store <16 x half> %res, ptr %a
1386 define void @frintn_v32f16(ptr %a) #0 {
1387 ; VBITS_GE_256-LABEL: frintn_v32f16:
1388 ; VBITS_GE_256: // %bb.0:
1389 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
1390 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
1391 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
1392 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
1393 ; VBITS_GE_256-NEXT: frintn z0.h, p0/m, z0.h
1394 ; VBITS_GE_256-NEXT: frintn z1.h, p0/m, z1.h
1395 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
1396 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
1397 ; VBITS_GE_256-NEXT: ret
1399 ; VBITS_GE_512-LABEL: frintn_v32f16:
1400 ; VBITS_GE_512: // %bb.0:
1401 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
1402 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
1403 ; VBITS_GE_512-NEXT: frintn z0.h, p0/m, z0.h
1404 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
1405 ; VBITS_GE_512-NEXT: ret
1406 %op = load <32 x half>, ptr %a
1407 %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
1408 store <32 x half> %res, ptr %a
1412 define void @frintn_v64f16(ptr %a) vscale_range(8,0) #0 {
1413 ; CHECK-LABEL: frintn_v64f16:
1415 ; CHECK-NEXT: ptrue p0.h, vl64
1416 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
1417 ; CHECK-NEXT: frintn z0.h, p0/m, z0.h
1418 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
1420 %op = load <64 x half>, ptr %a
1421 %res = call <64 x half> @llvm.roundeven.v64f16(<64 x half> %op)
1422 store <64 x half> %res, ptr %a
1426 define void @frintn_v128f16(ptr %a) vscale_range(16,0) #0 {
1427 ; CHECK-LABEL: frintn_v128f16:
1429 ; CHECK-NEXT: ptrue p0.h, vl128
1430 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
1431 ; CHECK-NEXT: frintn z0.h, p0/m, z0.h
1432 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
1434 %op = load <128 x half>, ptr %a
1435 %res = call <128 x half> @llvm.roundeven.v128f16(<128 x half> %op)
1436 store <128 x half> %res, ptr %a
1440 ; Don't use SVE for 64-bit vectors.
1441 define <2 x float> @frintn_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
1442 ; CHECK-LABEL: frintn_v2f32:
1444 ; CHECK-NEXT: frintn v0.2s, v0.2s
1446 %res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op)
1447 ret <2 x float> %res
1450 ; Don't use SVE for 128-bit vectors.
1451 define <4 x float> @frintn_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
1452 ; CHECK-LABEL: frintn_v4f32:
1454 ; CHECK-NEXT: frintn v0.4s, v0.4s
1456 %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op)
1457 ret <4 x float> %res
1460 define void @frintn_v8f32(ptr %a) vscale_range(2,0) #0 {
1461 ; CHECK-LABEL: frintn_v8f32:
1463 ; CHECK-NEXT: ptrue p0.s, vl8
1464 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1465 ; CHECK-NEXT: frintn z0.s, p0/m, z0.s
1466 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1468 %op = load <8 x float>, ptr %a
1469 %res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op)
1470 store <8 x float> %res, ptr %a
1474 define void @frintn_v16f32(ptr %a) #0 {
1475 ; VBITS_GE_256-LABEL: frintn_v16f32:
1476 ; VBITS_GE_256: // %bb.0:
1477 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
1478 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
1479 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1480 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
1481 ; VBITS_GE_256-NEXT: frintn z0.s, p0/m, z0.s
1482 ; VBITS_GE_256-NEXT: frintn z1.s, p0/m, z1.s
1483 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
1484 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
1485 ; VBITS_GE_256-NEXT: ret
1487 ; VBITS_GE_512-LABEL: frintn_v16f32:
1488 ; VBITS_GE_512: // %bb.0:
1489 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
1490 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
1491 ; VBITS_GE_512-NEXT: frintn z0.s, p0/m, z0.s
1492 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
1493 ; VBITS_GE_512-NEXT: ret
1494 %op = load <16 x float>, ptr %a
1495 %res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op)
1496 store <16 x float> %res, ptr %a
1500 define void @frintn_v32f32(ptr %a) vscale_range(8,0) #0 {
1501 ; CHECK-LABEL: frintn_v32f32:
1503 ; CHECK-NEXT: ptrue p0.s, vl32
1504 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1505 ; CHECK-NEXT: frintn z0.s, p0/m, z0.s
1506 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1508 %op = load <32 x float>, ptr %a
1509 %res = call <32 x float> @llvm.roundeven.v32f32(<32 x float> %op)
1510 store <32 x float> %res, ptr %a
1514 define void @frintn_v64f32(ptr %a) vscale_range(16,0) #0 {
1515 ; CHECK-LABEL: frintn_v64f32:
1517 ; CHECK-NEXT: ptrue p0.s, vl64
1518 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1519 ; CHECK-NEXT: frintn z0.s, p0/m, z0.s
1520 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1522 %op = load <64 x float>, ptr %a
1523 %res = call <64 x float> @llvm.roundeven.v64f32(<64 x float> %op)
1524 store <64 x float> %res, ptr %a
1528 ; Don't use SVE for 64-bit vectors.
1529 define <1 x double> @frintn_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
1530 ; CHECK-LABEL: frintn_v1f64:
1532 ; CHECK-NEXT: frintn d0, d0
1534 %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op)
1535 ret <1 x double> %res
1538 ; Don't use SVE for 128-bit vectors.
1539 define <2 x double> @frintn_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
1540 ; CHECK-LABEL: frintn_v2f64:
1542 ; CHECK-NEXT: frintn v0.2d, v0.2d
1544 %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op)
1545 ret <2 x double> %res
1548 define void @frintn_v4f64(ptr %a) vscale_range(2,0) #0 {
1549 ; CHECK-LABEL: frintn_v4f64:
1551 ; CHECK-NEXT: ptrue p0.d, vl4
1552 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1553 ; CHECK-NEXT: frintn z0.d, p0/m, z0.d
1554 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1556 %op = load <4 x double>, ptr %a
1557 %res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op)
1558 store <4 x double> %res, ptr %a
1562 define void @frintn_v8f64(ptr %a) #0 {
1563 ; VBITS_GE_256-LABEL: frintn_v8f64:
1564 ; VBITS_GE_256: // %bb.0:
1565 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
1566 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
1567 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1568 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
1569 ; VBITS_GE_256-NEXT: frintn z0.d, p0/m, z0.d
1570 ; VBITS_GE_256-NEXT: frintn z1.d, p0/m, z1.d
1571 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
1572 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
1573 ; VBITS_GE_256-NEXT: ret
1575 ; VBITS_GE_512-LABEL: frintn_v8f64:
1576 ; VBITS_GE_512: // %bb.0:
1577 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
1578 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
1579 ; VBITS_GE_512-NEXT: frintn z0.d, p0/m, z0.d
1580 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
1581 ; VBITS_GE_512-NEXT: ret
1582 %op = load <8 x double>, ptr %a
1583 %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op)
1584 store <8 x double> %res, ptr %a
1588 define void @frintn_v16f64(ptr %a) vscale_range(8,0) #0 {
1589 ; CHECK-LABEL: frintn_v16f64:
1591 ; CHECK-NEXT: ptrue p0.d, vl16
1592 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1593 ; CHECK-NEXT: frintn z0.d, p0/m, z0.d
1594 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1596 %op = load <16 x double>, ptr %a
1597 %res = call <16 x double> @llvm.roundeven.v16f64(<16 x double> %op)
1598 store <16 x double> %res, ptr %a
1602 define void @frintn_v32f64(ptr %a) vscale_range(16,0) #0 {
1603 ; CHECK-LABEL: frintn_v32f64:
1605 ; CHECK-NEXT: ptrue p0.d, vl32
1606 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1607 ; CHECK-NEXT: frintn z0.d, p0/m, z0.d
1608 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1610 %op = load <32 x double>, ptr %a
1611 %res = call <32 x double> @llvm.roundeven.v32f64(<32 x double> %op)
1612 store <32 x double> %res, ptr %a
1620 ; Don't use SVE for 64-bit vectors.
1621 define <4 x half> @frintz_v4f16(<4 x half> %op) vscale_range(2,0) #0 {
1622 ; CHECK-LABEL: frintz_v4f16:
1624 ; CHECK-NEXT: frintz v0.4h, v0.4h
1626 %res = call <4 x half> @llvm.trunc.v4f16(<4 x half> %op)
1630 ; Don't use SVE for 128-bit vectors.
1631 define <8 x half> @frintz_v8f16(<8 x half> %op) vscale_range(2,0) #0 {
1632 ; CHECK-LABEL: frintz_v8f16:
1634 ; CHECK-NEXT: frintz v0.8h, v0.8h
1636 %res = call <8 x half> @llvm.trunc.v8f16(<8 x half> %op)
1640 define void @frintz_v16f16(ptr %a) vscale_range(2,0) #0 {
1641 ; CHECK-LABEL: frintz_v16f16:
1643 ; CHECK-NEXT: ptrue p0.h, vl16
1644 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
1645 ; CHECK-NEXT: frintz z0.h, p0/m, z0.h
1646 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
1648 %op = load <16 x half>, ptr %a
1649 %res = call <16 x half> @llvm.trunc.v16f16(<16 x half> %op)
1650 store <16 x half> %res, ptr %a
1654 define void @frintz_v32f16(ptr %a) #0 {
1655 ; VBITS_GE_256-LABEL: frintz_v32f16:
1656 ; VBITS_GE_256: // %bb.0:
1657 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
1658 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
1659 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
1660 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
1661 ; VBITS_GE_256-NEXT: frintz z0.h, p0/m, z0.h
1662 ; VBITS_GE_256-NEXT: frintz z1.h, p0/m, z1.h
1663 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
1664 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
1665 ; VBITS_GE_256-NEXT: ret
1667 ; VBITS_GE_512-LABEL: frintz_v32f16:
1668 ; VBITS_GE_512: // %bb.0:
1669 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
1670 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
1671 ; VBITS_GE_512-NEXT: frintz z0.h, p0/m, z0.h
1672 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
1673 ; VBITS_GE_512-NEXT: ret
1674 %op = load <32 x half>, ptr %a
1675 %res = call <32 x half> @llvm.trunc.v32f16(<32 x half> %op)
1676 store <32 x half> %res, ptr %a
1680 define void @frintz_v64f16(ptr %a) vscale_range(8,0) #0 {
1681 ; CHECK-LABEL: frintz_v64f16:
1683 ; CHECK-NEXT: ptrue p0.h, vl64
1684 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
1685 ; CHECK-NEXT: frintz z0.h, p0/m, z0.h
1686 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
1688 %op = load <64 x half>, ptr %a
1689 %res = call <64 x half> @llvm.trunc.v64f16(<64 x half> %op)
1690 store <64 x half> %res, ptr %a
1694 define void @frintz_v128f16(ptr %a) vscale_range(16,0) #0 {
1695 ; CHECK-LABEL: frintz_v128f16:
1697 ; CHECK-NEXT: ptrue p0.h, vl128
1698 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
1699 ; CHECK-NEXT: frintz z0.h, p0/m, z0.h
1700 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
1702 %op = load <128 x half>, ptr %a
1703 %res = call <128 x half> @llvm.trunc.v128f16(<128 x half> %op)
1704 store <128 x half> %res, ptr %a
1708 ; Don't use SVE for 64-bit vectors.
1709 define <2 x float> @frintz_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
1710 ; CHECK-LABEL: frintz_v2f32:
1712 ; CHECK-NEXT: frintz v0.2s, v0.2s
1714 %res = call <2 x float> @llvm.trunc.v2f32(<2 x float> %op)
1715 ret <2 x float> %res
1718 ; Don't use SVE for 128-bit vectors.
1719 define <4 x float> @frintz_v4f32(<4 x float> %op) vscale_range(2,0) #0 {
1720 ; CHECK-LABEL: frintz_v4f32:
1722 ; CHECK-NEXT: frintz v0.4s, v0.4s
1724 %res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %op)
1725 ret <4 x float> %res
1728 define void @frintz_v8f32(ptr %a) vscale_range(2,0) #0 {
1729 ; CHECK-LABEL: frintz_v8f32:
1731 ; CHECK-NEXT: ptrue p0.s, vl8
1732 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1733 ; CHECK-NEXT: frintz z0.s, p0/m, z0.s
1734 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1736 %op = load <8 x float>, ptr %a
1737 %res = call <8 x float> @llvm.trunc.v8f32(<8 x float> %op)
1738 store <8 x float> %res, ptr %a
1742 define void @frintz_v16f32(ptr %a) #0 {
1743 ; VBITS_GE_256-LABEL: frintz_v16f32:
1744 ; VBITS_GE_256: // %bb.0:
1745 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
1746 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
1747 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1748 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
1749 ; VBITS_GE_256-NEXT: frintz z0.s, p0/m, z0.s
1750 ; VBITS_GE_256-NEXT: frintz z1.s, p0/m, z1.s
1751 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
1752 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
1753 ; VBITS_GE_256-NEXT: ret
1755 ; VBITS_GE_512-LABEL: frintz_v16f32:
1756 ; VBITS_GE_512: // %bb.0:
1757 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
1758 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
1759 ; VBITS_GE_512-NEXT: frintz z0.s, p0/m, z0.s
1760 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
1761 ; VBITS_GE_512-NEXT: ret
1762 %op = load <16 x float>, ptr %a
1763 %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %op)
1764 store <16 x float> %res, ptr %a
1768 define void @frintz_v32f32(ptr %a) vscale_range(8,0) #0 {
1769 ; CHECK-LABEL: frintz_v32f32:
1771 ; CHECK-NEXT: ptrue p0.s, vl32
1772 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1773 ; CHECK-NEXT: frintz z0.s, p0/m, z0.s
1774 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1776 %op = load <32 x float>, ptr %a
1777 %res = call <32 x float> @llvm.trunc.v32f32(<32 x float> %op)
1778 store <32 x float> %res, ptr %a
1782 define void @frintz_v64f32(ptr %a) vscale_range(16,0) #0 {
1783 ; CHECK-LABEL: frintz_v64f32:
1785 ; CHECK-NEXT: ptrue p0.s, vl64
1786 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1787 ; CHECK-NEXT: frintz z0.s, p0/m, z0.s
1788 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1790 %op = load <64 x float>, ptr %a
1791 %res = call <64 x float> @llvm.trunc.v64f32(<64 x float> %op)
1792 store <64 x float> %res, ptr %a
1796 ; Don't use SVE for 64-bit vectors.
1797 define <1 x double> @frintz_v1f64(<1 x double> %op) vscale_range(2,0) #0 {
1798 ; CHECK-LABEL: frintz_v1f64:
1800 ; CHECK-NEXT: frintz d0, d0
1802 %res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op)
1803 ret <1 x double> %res
1806 ; Don't use SVE for 128-bit vectors.
1807 define <2 x double> @frintz_v2f64(<2 x double> %op) vscale_range(2,0) #0 {
1808 ; CHECK-LABEL: frintz_v2f64:
1810 ; CHECK-NEXT: frintz v0.2d, v0.2d
1812 %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %op)
1813 ret <2 x double> %res
1816 define void @frintz_v4f64(ptr %a) vscale_range(2,0) #0 {
1817 ; CHECK-LABEL: frintz_v4f64:
1819 ; CHECK-NEXT: ptrue p0.d, vl4
1820 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1821 ; CHECK-NEXT: frintz z0.d, p0/m, z0.d
1822 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1824 %op = load <4 x double>, ptr %a
1825 %res = call <4 x double> @llvm.trunc.v4f64(<4 x double> %op)
1826 store <4 x double> %res, ptr %a
1830 define void @frintz_v8f64(ptr %a) #0 {
1831 ; VBITS_GE_256-LABEL: frintz_v8f64:
1832 ; VBITS_GE_256: // %bb.0:
1833 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
1834 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
1835 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1836 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
1837 ; VBITS_GE_256-NEXT: frintz z0.d, p0/m, z0.d
1838 ; VBITS_GE_256-NEXT: frintz z1.d, p0/m, z1.d
1839 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
1840 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
1841 ; VBITS_GE_256-NEXT: ret
1843 ; VBITS_GE_512-LABEL: frintz_v8f64:
1844 ; VBITS_GE_512: // %bb.0:
1845 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
1846 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
1847 ; VBITS_GE_512-NEXT: frintz z0.d, p0/m, z0.d
1848 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
1849 ; VBITS_GE_512-NEXT: ret
1850 %op = load <8 x double>, ptr %a
1851 %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %op)
1852 store <8 x double> %res, ptr %a
1856 define void @frintz_v16f64(ptr %a) vscale_range(8,0) #0 {
1857 ; CHECK-LABEL: frintz_v16f64:
1859 ; CHECK-NEXT: ptrue p0.d, vl16
1860 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1861 ; CHECK-NEXT: frintz z0.d, p0/m, z0.d
1862 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1864 %op = load <16 x double>, ptr %a
1865 %res = call <16 x double> @llvm.trunc.v16f64(<16 x double> %op)
1866 store <16 x double> %res, ptr %a
1870 define void @frintz_v32f64(ptr %a) vscale_range(16,0) #0 {
1871 ; CHECK-LABEL: frintz_v32f64:
1873 ; CHECK-NEXT: ptrue p0.d, vl32
1874 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1875 ; CHECK-NEXT: frintz z0.d, p0/m, z0.d
1876 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1878 %op = load <32 x double>, ptr %a
1879 %res = call <32 x double> @llvm.trunc.v32f64(<32 x double> %op)
1880 store <32 x double> %res, ptr %a
1884 attributes #0 = { "target-features"="+sve" }
1886 declare <4 x half> @llvm.ceil.v4f16(<4 x half>)
1887 declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
1888 declare <16 x half> @llvm.ceil.v16f16(<16 x half>)
1889 declare <32 x half> @llvm.ceil.v32f16(<32 x half>)
1890 declare <64 x half> @llvm.ceil.v64f16(<64 x half>)
1891 declare <128 x half> @llvm.ceil.v128f16(<128 x half>)
1892 declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
1893 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
1894 declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
1895 declare <16 x float> @llvm.ceil.v16f32(<16 x float>)
1896 declare <32 x float> @llvm.ceil.v32f32(<32 x float>)
1897 declare <64 x float> @llvm.ceil.v64f32(<64 x float>)
1898 declare <1 x double> @llvm.ceil.v1f64(<1 x double>)
1899 declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
1900 declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
1901 declare <8 x double> @llvm.ceil.v8f64(<8 x double>)
1902 declare <16 x double> @llvm.ceil.v16f64(<16 x double>)
1903 declare <32 x double> @llvm.ceil.v32f64(<32 x double>)
1905 declare <4 x half> @llvm.floor.v4f16(<4 x half>)
1906 declare <8 x half> @llvm.floor.v8f16(<8 x half>)
1907 declare <16 x half> @llvm.floor.v16f16(<16 x half>)
1908 declare <32 x half> @llvm.floor.v32f16(<32 x half>)
1909 declare <64 x half> @llvm.floor.v64f16(<64 x half>)
1910 declare <128 x half> @llvm.floor.v128f16(<128 x half>)
1911 declare <2 x float> @llvm.floor.v2f32(<2 x float>)
1912 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
1913 declare <8 x float> @llvm.floor.v8f32(<8 x float>)
1914 declare <16 x float> @llvm.floor.v16f32(<16 x float>)
1915 declare <32 x float> @llvm.floor.v32f32(<32 x float>)
1916 declare <64 x float> @llvm.floor.v64f32(<64 x float>)
1917 declare <1 x double> @llvm.floor.v1f64(<1 x double>)
1918 declare <2 x double> @llvm.floor.v2f64(<2 x double>)
1919 declare <4 x double> @llvm.floor.v4f64(<4 x double>)
1920 declare <8 x double> @llvm.floor.v8f64(<8 x double>)
1921 declare <16 x double> @llvm.floor.v16f64(<16 x double>)
1922 declare <32 x double> @llvm.floor.v32f64(<32 x double>)
1924 declare <4 x half> @llvm.nearbyint.v4f16(<4 x half>)
1925 declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
1926 declare <16 x half> @llvm.nearbyint.v16f16(<16 x half>)
1927 declare <32 x half> @llvm.nearbyint.v32f16(<32 x half>)
1928 declare <64 x half> @llvm.nearbyint.v64f16(<64 x half>)
1929 declare <128 x half> @llvm.nearbyint.v128f16(<128 x half>)
1930 declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>)
1931 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
1932 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>)
1933 declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>)
1934 declare <32 x float> @llvm.nearbyint.v32f32(<32 x float>)
1935 declare <64 x float> @llvm.nearbyint.v64f32(<64 x float>)
1936 declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>)
1937 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
1938 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
1939 declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>)
1940 declare <16 x double> @llvm.nearbyint.v16f64(<16 x double>)
1941 declare <32 x double> @llvm.nearbyint.v32f64(<32 x double>)
1943 declare <4 x half> @llvm.rint.v4f16(<4 x half>)
1944 declare <8 x half> @llvm.rint.v8f16(<8 x half>)
1945 declare <16 x half> @llvm.rint.v16f16(<16 x half>)
1946 declare <32 x half> @llvm.rint.v32f16(<32 x half>)
1947 declare <64 x half> @llvm.rint.v64f16(<64 x half>)
1948 declare <128 x half> @llvm.rint.v128f16(<128 x half>)
1949 declare <2 x float> @llvm.rint.v2f32(<2 x float>)
1950 declare <4 x float> @llvm.rint.v4f32(<4 x float>)
1951 declare <8 x float> @llvm.rint.v8f32(<8 x float>)
1952 declare <16 x float> @llvm.rint.v16f32(<16 x float>)
1953 declare <32 x float> @llvm.rint.v32f32(<32 x float>)
1954 declare <64 x float> @llvm.rint.v64f32(<64 x float>)
1955 declare <1 x double> @llvm.rint.v1f64(<1 x double>)
1956 declare <2 x double> @llvm.rint.v2f64(<2 x double>)
1957 declare <4 x double> @llvm.rint.v4f64(<4 x double>)
1958 declare <8 x double> @llvm.rint.v8f64(<8 x double>)
1959 declare <16 x double> @llvm.rint.v16f64(<16 x double>)
1960 declare <32 x double> @llvm.rint.v32f64(<32 x double>)
1962 declare <4 x half> @llvm.round.v4f16(<4 x half>)
1963 declare <8 x half> @llvm.round.v8f16(<8 x half>)
1964 declare <16 x half> @llvm.round.v16f16(<16 x half>)
1965 declare <32 x half> @llvm.round.v32f16(<32 x half>)
1966 declare <64 x half> @llvm.round.v64f16(<64 x half>)
1967 declare <128 x half> @llvm.round.v128f16(<128 x half>)
1968 declare <2 x float> @llvm.round.v2f32(<2 x float>)
1969 declare <4 x float> @llvm.round.v4f32(<4 x float>)
1970 declare <8 x float> @llvm.round.v8f32(<8 x float>)
1971 declare <16 x float> @llvm.round.v16f32(<16 x float>)
1972 declare <32 x float> @llvm.round.v32f32(<32 x float>)
1973 declare <64 x float> @llvm.round.v64f32(<64 x float>)
1974 declare <1 x double> @llvm.round.v1f64(<1 x double>)
1975 declare <2 x double> @llvm.round.v2f64(<2 x double>)
1976 declare <4 x double> @llvm.round.v4f64(<4 x double>)
1977 declare <8 x double> @llvm.round.v8f64(<8 x double>)
1978 declare <16 x double> @llvm.round.v16f64(<16 x double>)
1979 declare <32 x double> @llvm.round.v32f64(<32 x double>)
1981 declare <4 x half> @llvm.roundeven.v4f16(<4 x half>)
1982 declare <8 x half> @llvm.roundeven.v8f16(<8 x half>)
1983 declare <16 x half> @llvm.roundeven.v16f16(<16 x half>)
1984 declare <32 x half> @llvm.roundeven.v32f16(<32 x half>)
1985 declare <64 x half> @llvm.roundeven.v64f16(<64 x half>)
1986 declare <128 x half> @llvm.roundeven.v128f16(<128 x half>)
1987 declare <2 x float> @llvm.roundeven.v2f32(<2 x float>)
1988 declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
1989 declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
1990 declare <16 x float> @llvm.roundeven.v16f32(<16 x float>)
1991 declare <32 x float> @llvm.roundeven.v32f32(<32 x float>)
1992 declare <64 x float> @llvm.roundeven.v64f32(<64 x float>)
1993 declare <1 x double> @llvm.roundeven.v1f64(<1 x double>)
1994 declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
1995 declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
1996 declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
1997 declare <16 x double> @llvm.roundeven.v16f64(<16 x double>)
1998 declare <32 x double> @llvm.roundeven.v32f64(<32 x double>)
2000 declare <4 x half> @llvm.trunc.v4f16(<4 x half>)
2001 declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
2002 declare <16 x half> @llvm.trunc.v16f16(<16 x half>)
2003 declare <32 x half> @llvm.trunc.v32f16(<32 x half>)
2004 declare <64 x half> @llvm.trunc.v64f16(<64 x half>)
2005 declare <128 x half> @llvm.trunc.v128f16(<128 x half>)
2006 declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
2007 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
2008 declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
2009 declare <16 x float> @llvm.trunc.v16f32(<16 x float>)
2010 declare <32 x float> @llvm.trunc.v32f32(<32 x float>)
2011 declare <64 x float> @llvm.trunc.v64f32(<64 x float>)
2012 declare <1 x double> @llvm.trunc.v1f64(<1 x double>)
2013 declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
2014 declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
2015 declare <8 x double> @llvm.trunc.v8f64(<8 x double>)
2016 declare <16 x double> @llvm.trunc.v16f64(<16 x double>)
2017 declare <32 x double> @llvm.trunc.v32f64(<32 x double>)