1 ; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
4 ; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5 ; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 ; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
7 ; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
18 target triple = "aarch64-unknown-linux-gnu"
20 ; Don't use SVE when its registers are no bigger than NEON.
27 ; Don't use SVE for 64-bit vectors.
28 define <4 x half> @frintp_v4f16(<4 x half> %op) #0 {
29 ; CHECK-LABEL: frintp_v4f16:
30 ; CHECK: frintp v0.4h, v0.4h
32 %res = call <4 x half> @llvm.ceil.v4f16(<4 x half> %op)
36 ; Don't use SVE for 128-bit vectors.
37 define <8 x half> @frintp_v8f16(<8 x half> %op) #0 {
38 ; CHECK-LABEL: frintp_v8f16:
39 ; CHECK: frintp v0.8h, v0.8h
41 %res = call <8 x half> @llvm.ceil.v8f16(<8 x half> %op)
45 define void @frintp_v16f16(<16 x half>* %a) #0 {
46 ; CHECK-LABEL: frintp_v16f16:
47 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
48 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
49 ; CHECK-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
50 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
52 %op = load <16 x half>, <16 x half>* %a
53 %res = call <16 x half> @llvm.ceil.v16f16(<16 x half> %op)
54 store <16 x half> %res, <16 x half>* %a
58 define void @frintp_v32f16(<32 x half>* %a) #0 {
59 ; CHECK-LABEL: frintp_v32f16:
60 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
61 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
62 ; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
63 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
64 ; VBITS_GE_512-NEXT: ret
66 ; Ensure sensible type legalisation.
67 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
68 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
69 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
70 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
71 ; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
72 ; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
73 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
74 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
75 ; VBITS_EQ_256-NEXT: ret
76 %op = load <32 x half>, <32 x half>* %a
77 %res = call <32 x half> @llvm.ceil.v32f16(<32 x half> %op)
78 store <32 x half> %res, <32 x half>* %a
82 define void @frintp_v64f16(<64 x half>* %a) #0 {
83 ; CHECK-LABEL: frintp_v64f16:
84 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
85 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
86 ; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
87 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
88 ; VBITS_GE_1024-NEXT: ret
89 %op = load <64 x half>, <64 x half>* %a
90 %res = call <64 x half> @llvm.ceil.v64f16(<64 x half> %op)
91 store <64 x half> %res, <64 x half>* %a
95 define void @frintp_v128f16(<128 x half>* %a) #0 {
96 ; CHECK-LABEL: frintp_v128f16:
97 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
98 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
99 ; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
100 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
101 ; VBITS_GE_2048-NEXT: ret
102 %op = load <128 x half>, <128 x half>* %a
103 %res = call <128 x half> @llvm.ceil.v128f16(<128 x half> %op)
104 store <128 x half> %res, <128 x half>* %a
108 ; Don't use SVE for 64-bit vectors.
109 define <2 x float> @frintp_v2f32(<2 x float> %op) #0 {
110 ; CHECK-LABEL: frintp_v2f32:
111 ; CHECK: frintp v0.2s, v0.2s
113 %res = call <2 x float> @llvm.ceil.v2f32(<2 x float> %op)
117 ; Don't use SVE for 128-bit vectors.
118 define <4 x float> @frintp_v4f32(<4 x float> %op) #0 {
119 ; CHECK-LABEL: frintp_v4f32:
120 ; CHECK: frintp v0.4s, v0.4s
122 %res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %op)
126 define void @frintp_v8f32(<8 x float>* %a) #0 {
127 ; CHECK-LABEL: frintp_v8f32:
128 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
129 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
130 ; CHECK-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
131 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
133 %op = load <8 x float>, <8 x float>* %a
134 %res = call <8 x float> @llvm.ceil.v8f32(<8 x float> %op)
135 store <8 x float> %res, <8 x float>* %a
139 define void @frintp_v16f32(<16 x float>* %a) #0 {
140 ; CHECK-LABEL: frintp_v16f32:
141 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
142 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
143 ; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
144 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
145 ; VBITS_GE_512-NEXT: ret
147 ; Ensure sensible type legalisation.
148 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
149 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
150 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
151 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
152 ; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
153 ; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
154 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
155 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
156 ; VBITS_EQ_256-NEXT: ret
157 %op = load <16 x float>, <16 x float>* %a
158 %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %op)
159 store <16 x float> %res, <16 x float>* %a
163 define void @frintp_v32f32(<32 x float>* %a) #0 {
164 ; CHECK-LABEL: frintp_v32f32:
165 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
166 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
167 ; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
168 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
169 ; VBITS_GE_1024-NEXT: ret
170 %op = load <32 x float>, <32 x float>* %a
171 %res = call <32 x float> @llvm.ceil.v32f32(<32 x float> %op)
172 store <32 x float> %res, <32 x float>* %a
176 define void @frintp_v64f32(<64 x float>* %a) #0 {
177 ; CHECK-LABEL: frintp_v64f32:
178 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
179 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
180 ; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
181 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
182 ; VBITS_GE_2048-NEXT: ret
183 %op = load <64 x float>, <64 x float>* %a
184 %res = call <64 x float> @llvm.ceil.v64f32(<64 x float> %op)
185 store <64 x float> %res, <64 x float>* %a
189 ; Don't use SVE for 64-bit vectors.
190 define <1 x double> @frintp_v1f64(<1 x double> %op) #0 {
191 ; CHECK-LABEL: frintp_v1f64:
192 ; CHECK: frintp d0, d0
194 %res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op)
195 ret <1 x double> %res
198 ; Don't use SVE for 128-bit vectors.
199 define <2 x double> @frintp_v2f64(<2 x double> %op) #0 {
200 ; CHECK-LABEL: frintp_v2f64:
201 ; CHECK: frintp v0.2d, v0.2d
203 %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %op)
204 ret <2 x double> %res
207 define void @frintp_v4f64(<4 x double>* %a) #0 {
208 ; CHECK-LABEL: frintp_v4f64:
209 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
210 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
211 ; CHECK-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
212 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
214 %op = load <4 x double>, <4 x double>* %a
215 %res = call <4 x double> @llvm.ceil.v4f64(<4 x double> %op)
216 store <4 x double> %res, <4 x double>* %a
220 define void @frintp_v8f64(<8 x double>* %a) #0 {
221 ; CHECK-LABEL: frintp_v8f64:
222 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
223 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
224 ; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
225 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
226 ; VBITS_GE_512-NEXT: ret
228 ; Ensure sensible type legalisation.
229 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
230 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
231 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
232 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
233 ; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
234 ; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
235 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
236 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
237 ; VBITS_EQ_256-NEXT: ret
238 %op = load <8 x double>, <8 x double>* %a
239 %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %op)
240 store <8 x double> %res, <8 x double>* %a
244 define void @frintp_v16f64(<16 x double>* %a) #0 {
245 ; CHECK-LABEL: frintp_v16f64:
246 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
247 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
248 ; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
249 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
250 ; VBITS_GE_1024-NEXT: ret
251 %op = load <16 x double>, <16 x double>* %a
252 %res = call <16 x double> @llvm.ceil.v16f64(<16 x double> %op)
253 store <16 x double> %res, <16 x double>* %a
257 define void @frintp_v32f64(<32 x double>* %a) #0 {
258 ; CHECK-LABEL: frintp_v32f64:
259 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
260 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
261 ; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
262 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
263 ; VBITS_GE_2048-NEXT: ret
264 %op = load <32 x double>, <32 x double>* %a
265 %res = call <32 x double> @llvm.ceil.v32f64(<32 x double> %op)
266 store <32 x double> %res, <32 x double>* %a
274 ; Don't use SVE for 64-bit vectors.
275 define <4 x half> @frintm_v4f16(<4 x half> %op) #0 {
276 ; CHECK-LABEL: frintm_v4f16:
277 ; CHECK: frintm v0.4h, v0.4h
279 %res = call <4 x half> @llvm.floor.v4f16(<4 x half> %op)
283 ; Don't use SVE for 128-bit vectors.
284 define <8 x half> @frintm_v8f16(<8 x half> %op) #0 {
285 ; CHECK-LABEL: frintm_v8f16:
286 ; CHECK: frintm v0.8h, v0.8h
288 %res = call <8 x half> @llvm.floor.v8f16(<8 x half> %op)
292 define void @frintm_v16f16(<16 x half>* %a) #0 {
293 ; CHECK-LABEL: frintm_v16f16:
294 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
295 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
296 ; CHECK-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
297 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
299 %op = load <16 x half>, <16 x half>* %a
300 %res = call <16 x half> @llvm.floor.v16f16(<16 x half> %op)
301 store <16 x half> %res, <16 x half>* %a
305 define void @frintm_v32f16(<32 x half>* %a) #0 {
306 ; CHECK-LABEL: frintm_v32f16:
307 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
308 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
309 ; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
310 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
311 ; VBITS_GE_512-NEXT: ret
313 ; Ensure sensible type legalisation.
314 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
315 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
316 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
317 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
318 ; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
319 ; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
320 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
321 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
322 ; VBITS_EQ_256-NEXT: ret
323 %op = load <32 x half>, <32 x half>* %a
324 %res = call <32 x half> @llvm.floor.v32f16(<32 x half> %op)
325 store <32 x half> %res, <32 x half>* %a
329 define void @frintm_v64f16(<64 x half>* %a) #0 {
330 ; CHECK-LABEL: frintm_v64f16:
331 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
332 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
333 ; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
334 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
335 ; VBITS_GE_1024-NEXT: ret
336 %op = load <64 x half>, <64 x half>* %a
337 %res = call <64 x half> @llvm.floor.v64f16(<64 x half> %op)
338 store <64 x half> %res, <64 x half>* %a
342 define void @frintm_v128f16(<128 x half>* %a) #0 {
343 ; CHECK-LABEL: frintm_v128f16:
344 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
345 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
346 ; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
347 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
348 ; VBITS_GE_2048-NEXT: ret
349 %op = load <128 x half>, <128 x half>* %a
350 %res = call <128 x half> @llvm.floor.v128f16(<128 x half> %op)
351 store <128 x half> %res, <128 x half>* %a
355 ; Don't use SVE for 64-bit vectors.
356 define <2 x float> @frintm_v2f32(<2 x float> %op) #0 {
357 ; CHECK-LABEL: frintm_v2f32:
358 ; CHECK: frintm v0.2s, v0.2s
360 %res = call <2 x float> @llvm.floor.v2f32(<2 x float> %op)
364 ; Don't use SVE for 128-bit vectors.
365 define <4 x float> @frintm_v4f32(<4 x float> %op) #0 {
366 ; CHECK-LABEL: frintm_v4f32:
367 ; CHECK: frintm v0.4s, v0.4s
369 %res = call <4 x float> @llvm.floor.v4f32(<4 x float> %op)
373 define void @frintm_v8f32(<8 x float>* %a) #0 {
374 ; CHECK-LABEL: frintm_v8f32:
375 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
376 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
377 ; CHECK-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
378 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
380 %op = load <8 x float>, <8 x float>* %a
381 %res = call <8 x float> @llvm.floor.v8f32(<8 x float> %op)
382 store <8 x float> %res, <8 x float>* %a
386 define void @frintm_v16f32(<16 x float>* %a) #0 {
387 ; CHECK-LABEL: frintm_v16f32:
388 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
389 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
390 ; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
391 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
392 ; VBITS_GE_512-NEXT: ret
394 ; Ensure sensible type legalisation.
395 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
396 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
397 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
398 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
399 ; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
400 ; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
401 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
402 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
403 ; VBITS_EQ_256-NEXT: ret
404 %op = load <16 x float>, <16 x float>* %a
405 %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %op)
406 store <16 x float> %res, <16 x float>* %a
410 define void @frintm_v32f32(<32 x float>* %a) #0 {
411 ; CHECK-LABEL: frintm_v32f32:
412 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
413 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
414 ; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
415 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
416 ; VBITS_GE_1024-NEXT: ret
417 %op = load <32 x float>, <32 x float>* %a
418 %res = call <32 x float> @llvm.floor.v32f32(<32 x float> %op)
419 store <32 x float> %res, <32 x float>* %a
423 define void @frintm_v64f32(<64 x float>* %a) #0 {
424 ; CHECK-LABEL: frintm_v64f32:
425 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
426 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
427 ; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
428 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
429 ; VBITS_GE_2048-NEXT: ret
430 %op = load <64 x float>, <64 x float>* %a
431 %res = call <64 x float> @llvm.floor.v64f32(<64 x float> %op)
432 store <64 x float> %res, <64 x float>* %a
436 ; Don't use SVE for 64-bit vectors.
437 define <1 x double> @frintm_v1f64(<1 x double> %op) #0 {
438 ; CHECK-LABEL: frintm_v1f64:
439 ; CHECK: frintm d0, d0
441 %res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op)
442 ret <1 x double> %res
445 ; Don't use SVE for 128-bit vectors.
446 define <2 x double> @frintm_v2f64(<2 x double> %op) #0 {
447 ; CHECK-LABEL: frintm_v2f64:
448 ; CHECK: frintm v0.2d, v0.2d
450 %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %op)
451 ret <2 x double> %res
454 define void @frintm_v4f64(<4 x double>* %a) #0 {
455 ; CHECK-LABEL: frintm_v4f64:
456 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
457 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
458 ; CHECK-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
459 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
461 %op = load <4 x double>, <4 x double>* %a
462 %res = call <4 x double> @llvm.floor.v4f64(<4 x double> %op)
463 store <4 x double> %res, <4 x double>* %a
467 define void @frintm_v8f64(<8 x double>* %a) #0 {
468 ; CHECK-LABEL: frintm_v8f64:
469 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
470 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
471 ; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
472 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
473 ; VBITS_GE_512-NEXT: ret
475 ; Ensure sensible type legalisation.
476 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
477 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
478 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
479 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
480 ; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
481 ; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
482 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
483 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
484 ; VBITS_EQ_256-NEXT: ret
485 %op = load <8 x double>, <8 x double>* %a
486 %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %op)
487 store <8 x double> %res, <8 x double>* %a
491 define void @frintm_v16f64(<16 x double>* %a) #0 {
492 ; CHECK-LABEL: frintm_v16f64:
493 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
494 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
495 ; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
496 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
497 ; VBITS_GE_1024-NEXT: ret
498 %op = load <16 x double>, <16 x double>* %a
499 %res = call <16 x double> @llvm.floor.v16f64(<16 x double> %op)
500 store <16 x double> %res, <16 x double>* %a
504 define void @frintm_v32f64(<32 x double>* %a) #0 {
505 ; CHECK-LABEL: frintm_v32f64:
506 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
507 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
508 ; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
509 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
510 ; VBITS_GE_2048-NEXT: ret
511 %op = load <32 x double>, <32 x double>* %a
512 %res = call <32 x double> @llvm.floor.v32f64(<32 x double> %op)
513 store <32 x double> %res, <32 x double>* %a
518 ; FNEARBYINT -> FRINTI
521 ; Don't use SVE for 64-bit vectors.
522 define <4 x half> @frinti_v4f16(<4 x half> %op) #0 {
523 ; CHECK-LABEL: frinti_v4f16:
524 ; CHECK: frinti v0.4h, v0.4h
526 %res = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %op)
530 ; Don't use SVE for 128-bit vectors.
531 define <8 x half> @frinti_v8f16(<8 x half> %op) #0 {
532 ; CHECK-LABEL: frinti_v8f16:
533 ; CHECK: frinti v0.8h, v0.8h
535 %res = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %op)
539 define void @frinti_v16f16(<16 x half>* %a) #0 {
540 ; CHECK-LABEL: frinti_v16f16:
541 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
542 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
543 ; CHECK-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
544 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
546 %op = load <16 x half>, <16 x half>* %a
547 %res = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %op)
548 store <16 x half> %res, <16 x half>* %a
552 define void @frinti_v32f16(<32 x half>* %a) #0 {
553 ; CHECK-LABEL: frinti_v32f16:
554 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
555 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
556 ; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
557 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
558 ; VBITS_GE_512-NEXT: ret
560 ; Ensure sensible type legalisation.
561 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
562 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
563 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
564 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
565 ; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
566 ; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
567 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
568 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
569 ; VBITS_EQ_256-NEXT: ret
570 %op = load <32 x half>, <32 x half>* %a
571 %res = call <32 x half> @llvm.nearbyint.v32f16(<32 x half> %op)
572 store <32 x half> %res, <32 x half>* %a
576 define void @frinti_v64f16(<64 x half>* %a) #0 {
577 ; CHECK-LABEL: frinti_v64f16:
578 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
579 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
580 ; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
581 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
582 ; VBITS_GE_1024-NEXT: ret
583 %op = load <64 x half>, <64 x half>* %a
584 %res = call <64 x half> @llvm.nearbyint.v64f16(<64 x half> %op)
585 store <64 x half> %res, <64 x half>* %a
589 define void @frinti_v128f16(<128 x half>* %a) #0 {
590 ; CHECK-LABEL: frinti_v128f16:
591 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
592 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
593 ; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
594 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
595 ; VBITS_GE_2048-NEXT: ret
596 %op = load <128 x half>, <128 x half>* %a
597 %res = call <128 x half> @llvm.nearbyint.v128f16(<128 x half> %op)
598 store <128 x half> %res, <128 x half>* %a
602 ; Don't use SVE for 64-bit vectors.
603 define <2 x float> @frinti_v2f32(<2 x float> %op) #0 {
604 ; CHECK-LABEL: frinti_v2f32:
605 ; CHECK: frinti v0.2s, v0.2s
607 %res = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %op)
611 ; Don't use SVE for 128-bit vectors.
612 define <4 x float> @frinti_v4f32(<4 x float> %op) #0 {
613 ; CHECK-LABEL: frinti_v4f32:
614 ; CHECK: frinti v0.4s, v0.4s
616 %res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %op)
620 define void @frinti_v8f32(<8 x float>* %a) #0 {
621 ; CHECK-LABEL: frinti_v8f32:
622 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
623 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
624 ; CHECK-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
625 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
627 %op = load <8 x float>, <8 x float>* %a
628 %res = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %op)
629 store <8 x float> %res, <8 x float>* %a
633 define void @frinti_v16f32(<16 x float>* %a) #0 {
634 ; CHECK-LABEL: frinti_v16f32:
635 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
636 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
637 ; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
638 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
639 ; VBITS_GE_512-NEXT: ret
641 ; Ensure sensible type legalisation.
642 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
643 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
644 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
645 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
646 ; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
647 ; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
648 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
649 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
650 ; VBITS_EQ_256-NEXT: ret
651 %op = load <16 x float>, <16 x float>* %a
652 %res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %op)
653 store <16 x float> %res, <16 x float>* %a
657 define void @frinti_v32f32(<32 x float>* %a) #0 {
658 ; CHECK-LABEL: frinti_v32f32:
659 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
660 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
661 ; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
662 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
663 ; VBITS_GE_1024-NEXT: ret
664 %op = load <32 x float>, <32 x float>* %a
665 %res = call <32 x float> @llvm.nearbyint.v32f32(<32 x float> %op)
666 store <32 x float> %res, <32 x float>* %a
670 define void @frinti_v64f32(<64 x float>* %a) #0 {
671 ; CHECK-LABEL: frinti_v64f32:
672 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
673 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
674 ; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
675 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
676 ; VBITS_GE_2048-NEXT: ret
677 %op = load <64 x float>, <64 x float>* %a
678 %res = call <64 x float> @llvm.nearbyint.v64f32(<64 x float> %op)
679 store <64 x float> %res, <64 x float>* %a
683 ; Don't use SVE for 64-bit vectors.
684 define <1 x double> @frinti_v1f64(<1 x double> %op) #0 {
685 ; CHECK-LABEL: frinti_v1f64:
686 ; CHECK: frinti d0, d0
688 %res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op)
689 ret <1 x double> %res
692 ; Don't use SVE for 128-bit vectors.
693 define <2 x double> @frinti_v2f64(<2 x double> %op) #0 {
694 ; CHECK-LABEL: frinti_v2f64:
695 ; CHECK: frinti v0.2d, v0.2d
697 %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %op)
698 ret <2 x double> %res
701 define void @frinti_v4f64(<4 x double>* %a) #0 {
702 ; CHECK-LABEL: frinti_v4f64:
703 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
704 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
705 ; CHECK-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
706 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
708 %op = load <4 x double>, <4 x double>* %a
709 %res = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %op)
710 store <4 x double> %res, <4 x double>* %a
714 define void @frinti_v8f64(<8 x double>* %a) #0 {
715 ; CHECK-LABEL: frinti_v8f64:
716 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
717 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
718 ; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
719 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
720 ; VBITS_GE_512-NEXT: ret
722 ; Ensure sensible type legalisation.
723 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
724 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
725 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
726 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
727 ; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
728 ; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
729 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
730 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
731 ; VBITS_EQ_256-NEXT: ret
732 %op = load <8 x double>, <8 x double>* %a
733 %res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %op)
734 store <8 x double> %res, <8 x double>* %a
738 define void @frinti_v16f64(<16 x double>* %a) #0 {
739 ; CHECK-LABEL: frinti_v16f64:
740 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
741 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
742 ; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
743 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
744 ; VBITS_GE_1024-NEXT: ret
745 %op = load <16 x double>, <16 x double>* %a
746 %res = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> %op)
747 store <16 x double> %res, <16 x double>* %a
751 define void @frinti_v32f64(<32 x double>* %a) #0 {
752 ; CHECK-LABEL: frinti_v32f64:
753 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
754 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
755 ; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
756 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
757 ; VBITS_GE_2048-NEXT: ret
758 %op = load <32 x double>, <32 x double>* %a
759 %res = call <32 x double> @llvm.nearbyint.v32f64(<32 x double> %op)
760 store <32 x double> %res, <32 x double>* %a
768 ; Don't use SVE for 64-bit vectors.
769 define <4 x half> @frintx_v4f16(<4 x half> %op) #0 {
770 ; CHECK-LABEL: frintx_v4f16:
771 ; CHECK: frintx v0.4h, v0.4h
773 %res = call <4 x half> @llvm.rint.v4f16(<4 x half> %op)
777 ; Don't use SVE for 128-bit vectors.
778 define <8 x half> @frintx_v8f16(<8 x half> %op) #0 {
779 ; CHECK-LABEL: frintx_v8f16:
780 ; CHECK: frintx v0.8h, v0.8h
782 %res = call <8 x half> @llvm.rint.v8f16(<8 x half> %op)
786 define void @frintx_v16f16(<16 x half>* %a) #0 {
787 ; CHECK-LABEL: frintx_v16f16:
788 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
789 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
790 ; CHECK-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
791 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
793 %op = load <16 x half>, <16 x half>* %a
794 %res = call <16 x half> @llvm.rint.v16f16(<16 x half> %op)
795 store <16 x half> %res, <16 x half>* %a
799 define void @frintx_v32f16(<32 x half>* %a) #0 {
800 ; CHECK-LABEL: frintx_v32f16:
801 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
802 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
803 ; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
804 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
805 ; VBITS_GE_512-NEXT: ret
807 ; Ensure sensible type legalisation.
808 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
809 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
810 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
811 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
812 ; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
813 ; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
814 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
815 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
816 ; VBITS_EQ_256-NEXT: ret
817 %op = load <32 x half>, <32 x half>* %a
818 %res = call <32 x half> @llvm.rint.v32f16(<32 x half> %op)
819 store <32 x half> %res, <32 x half>* %a
823 define void @frintx_v64f16(<64 x half>* %a) #0 {
824 ; CHECK-LABEL: frintx_v64f16:
825 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
826 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
827 ; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
828 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
829 ; VBITS_GE_1024-NEXT: ret
830 %op = load <64 x half>, <64 x half>* %a
831 %res = call <64 x half> @llvm.rint.v64f16(<64 x half> %op)
832 store <64 x half> %res, <64 x half>* %a
836 define void @frintx_v128f16(<128 x half>* %a) #0 {
837 ; CHECK-LABEL: frintx_v128f16:
838 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
839 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
840 ; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
841 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
842 ; VBITS_GE_2048-NEXT: ret
843 %op = load <128 x half>, <128 x half>* %a
844 %res = call <128 x half> @llvm.rint.v128f16(<128 x half> %op)
845 store <128 x half> %res, <128 x half>* %a
849 ; Don't use SVE for 64-bit vectors.
850 define <2 x float> @frintx_v2f32(<2 x float> %op) #0 {
851 ; CHECK-LABEL: frintx_v2f32:
852 ; CHECK: frintx v0.2s, v0.2s
854 %res = call <2 x float> @llvm.rint.v2f32(<2 x float> %op)
858 ; Don't use SVE for 128-bit vectors.
859 define <4 x float> @frintx_v4f32(<4 x float> %op) #0 {
860 ; CHECK-LABEL: frintx_v4f32:
861 ; CHECK: frintx v0.4s, v0.4s
863 %res = call <4 x float> @llvm.rint.v4f32(<4 x float> %op)
867 define void @frintx_v8f32(<8 x float>* %a) #0 {
868 ; CHECK-LABEL: frintx_v8f32:
869 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
870 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
871 ; CHECK-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
872 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
874 %op = load <8 x float>, <8 x float>* %a
875 %res = call <8 x float> @llvm.rint.v8f32(<8 x float> %op)
876 store <8 x float> %res, <8 x float>* %a
880 define void @frintx_v16f32(<16 x float>* %a) #0 {
881 ; CHECK-LABEL: frintx_v16f32:
882 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
883 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
884 ; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
885 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
886 ; VBITS_GE_512-NEXT: ret
888 ; Ensure sensible type legalisation.
889 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
890 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
891 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
892 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
893 ; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
894 ; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
895 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
896 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
897 ; VBITS_EQ_256-NEXT: ret
898 %op = load <16 x float>, <16 x float>* %a
899 %res = call <16 x float> @llvm.rint.v16f32(<16 x float> %op)
900 store <16 x float> %res, <16 x float>* %a
904 define void @frintx_v32f32(<32 x float>* %a) #0 {
905 ; CHECK-LABEL: frintx_v32f32:
906 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
907 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
908 ; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
909 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
910 ; VBITS_GE_1024-NEXT: ret
911 %op = load <32 x float>, <32 x float>* %a
912 %res = call <32 x float> @llvm.rint.v32f32(<32 x float> %op)
913 store <32 x float> %res, <32 x float>* %a
917 define void @frintx_v64f32(<64 x float>* %a) #0 {
918 ; CHECK-LABEL: frintx_v64f32:
919 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
920 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
921 ; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
922 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
923 ; VBITS_GE_2048-NEXT: ret
924 %op = load <64 x float>, <64 x float>* %a
925 %res = call <64 x float> @llvm.rint.v64f32(<64 x float> %op)
926 store <64 x float> %res, <64 x float>* %a
930 ; Don't use SVE for 64-bit vectors.
931 define <1 x double> @frintx_v1f64(<1 x double> %op) #0 {
932 ; CHECK-LABEL: frintx_v1f64:
933 ; CHECK: frintx d0, d0
935 %res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op)
936 ret <1 x double> %res
939 ; Don't use SVE for 128-bit vectors.
940 define <2 x double> @frintx_v2f64(<2 x double> %op) #0 {
941 ; CHECK-LABEL: frintx_v2f64:
942 ; CHECK: frintx v0.2d, v0.2d
944 %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %op)
945 ret <2 x double> %res
948 define void @frintx_v4f64(<4 x double>* %a) #0 {
949 ; CHECK-LABEL: frintx_v4f64:
950 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
951 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
952 ; CHECK-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
953 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
955 %op = load <4 x double>, <4 x double>* %a
956 %res = call <4 x double> @llvm.rint.v4f64(<4 x double> %op)
957 store <4 x double> %res, <4 x double>* %a
961 define void @frintx_v8f64(<8 x double>* %a) #0 {
962 ; CHECK-LABEL: frintx_v8f64:
963 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
964 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
965 ; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
966 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
967 ; VBITS_GE_512-NEXT: ret
969 ; Ensure sensible type legalisation.
970 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
971 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
972 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
973 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
974 ; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
975 ; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
976 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
977 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
978 ; VBITS_EQ_256-NEXT: ret
979 %op = load <8 x double>, <8 x double>* %a
980 %res = call <8 x double> @llvm.rint.v8f64(<8 x double> %op)
981 store <8 x double> %res, <8 x double>* %a
985 define void @frintx_v16f64(<16 x double>* %a) #0 {
986 ; CHECK-LABEL: frintx_v16f64:
987 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
988 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
989 ; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
990 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
991 ; VBITS_GE_1024-NEXT: ret
992 %op = load <16 x double>, <16 x double>* %a
993 %res = call <16 x double> @llvm.rint.v16f64(<16 x double> %op)
994 store <16 x double> %res, <16 x double>* %a
998 define void @frintx_v32f64(<32 x double>* %a) #0 {
999 ; CHECK-LABEL: frintx_v32f64:
1000 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1001 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1002 ; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1003 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1004 ; VBITS_GE_2048-NEXT: ret
1005 %op = load <32 x double>, <32 x double>* %a
1006 %res = call <32 x double> @llvm.rint.v32f64(<32 x double> %op)
1007 store <32 x double> %res, <32 x double>* %a
1015 ; Don't use SVE for 64-bit vectors.
1016 define <4 x half> @frinta_v4f16(<4 x half> %op) #0 {
1017 ; CHECK-LABEL: frinta_v4f16:
1018 ; CHECK: frinta v0.4h, v0.4h
1020 %res = call <4 x half> @llvm.round.v4f16(<4 x half> %op)
1024 ; Don't use SVE for 128-bit vectors.
1025 define <8 x half> @frinta_v8f16(<8 x half> %op) #0 {
1026 ; CHECK-LABEL: frinta_v8f16:
1027 ; CHECK: frinta v0.8h, v0.8h
1029 %res = call <8 x half> @llvm.round.v8f16(<8 x half> %op)
1033 define void @frinta_v16f16(<16 x half>* %a) #0 {
1034 ; CHECK-LABEL: frinta_v16f16:
1035 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
1036 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1037 ; CHECK-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1038 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1040 %op = load <16 x half>, <16 x half>* %a
1041 %res = call <16 x half> @llvm.round.v16f16(<16 x half> %op)
1042 store <16 x half> %res, <16 x half>* %a
1046 define void @frinta_v32f16(<32 x half>* %a) #0 {
1047 ; CHECK-LABEL: frinta_v32f16:
1048 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
1049 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1050 ; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1051 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1052 ; VBITS_GE_512-NEXT: ret
1054 ; Ensure sensible type legalisation.
1055 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
1056 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
1057 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
1058 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
1059 ; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
1060 ; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
1061 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
1062 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
1063 ; VBITS_EQ_256-NEXT: ret
1064 %op = load <32 x half>, <32 x half>* %a
1065 %res = call <32 x half> @llvm.round.v32f16(<32 x half> %op)
1066 store <32 x half> %res, <32 x half>* %a
1070 define void @frinta_v64f16(<64 x half>* %a) #0 {
1071 ; CHECK-LABEL: frinta_v64f16:
1072 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
1073 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1074 ; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1075 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1076 ; VBITS_GE_1024-NEXT: ret
1077 %op = load <64 x half>, <64 x half>* %a
1078 %res = call <64 x half> @llvm.round.v64f16(<64 x half> %op)
1079 store <64 x half> %res, <64 x half>* %a
1083 define void @frinta_v128f16(<128 x half>* %a) #0 {
1084 ; CHECK-LABEL: frinta_v128f16:
1085 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
1086 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1087 ; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1088 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1089 ; VBITS_GE_2048-NEXT: ret
1090 %op = load <128 x half>, <128 x half>* %a
1091 %res = call <128 x half> @llvm.round.v128f16(<128 x half> %op)
1092 store <128 x half> %res, <128 x half>* %a
1096 ; Don't use SVE for 64-bit vectors.
1097 define <2 x float> @frinta_v2f32(<2 x float> %op) #0 {
1098 ; CHECK-LABEL: frinta_v2f32:
1099 ; CHECK: frinta v0.2s, v0.2s
1101 %res = call <2 x float> @llvm.round.v2f32(<2 x float> %op)
1102 ret <2 x float> %res
1105 ; Don't use SVE for 128-bit vectors.
1106 define <4 x float> @frinta_v4f32(<4 x float> %op) #0 {
1107 ; CHECK-LABEL: frinta_v4f32:
1108 ; CHECK: frinta v0.4s, v0.4s
1110 %res = call <4 x float> @llvm.round.v4f32(<4 x float> %op)
1111 ret <4 x float> %res
1114 define void @frinta_v8f32(<8 x float>* %a) #0 {
1115 ; CHECK-LABEL: frinta_v8f32:
1116 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
1117 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1118 ; CHECK-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1119 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1121 %op = load <8 x float>, <8 x float>* %a
1122 %res = call <8 x float> @llvm.round.v8f32(<8 x float> %op)
1123 store <8 x float> %res, <8 x float>* %a
1127 define void @frinta_v16f32(<16 x float>* %a) #0 {
1128 ; CHECK-LABEL: frinta_v16f32:
1129 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
1130 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1131 ; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1132 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1133 ; VBITS_GE_512-NEXT: ret
1135 ; Ensure sensible type legalisation.
1136 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1137 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1138 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1139 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1140 ; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
1141 ; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
1142 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
1143 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
1144 ; VBITS_EQ_256-NEXT: ret
1145 %op = load <16 x float>, <16 x float>* %a
1146 %res = call <16 x float> @llvm.round.v16f32(<16 x float> %op)
1147 store <16 x float> %res, <16 x float>* %a
1151 define void @frinta_v32f32(<32 x float>* %a) #0 {
1152 ; CHECK-LABEL: frinta_v32f32:
1153 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1154 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1155 ; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1156 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1157 ; VBITS_GE_1024-NEXT: ret
1158 %op = load <32 x float>, <32 x float>* %a
1159 %res = call <32 x float> @llvm.round.v32f32(<32 x float> %op)
1160 store <32 x float> %res, <32 x float>* %a
1164 define void @frinta_v64f32(<64 x float>* %a) #0 {
1165 ; CHECK-LABEL: frinta_v64f32:
1166 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1167 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1168 ; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1169 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1170 ; VBITS_GE_2048-NEXT: ret
1171 %op = load <64 x float>, <64 x float>* %a
1172 %res = call <64 x float> @llvm.round.v64f32(<64 x float> %op)
1173 store <64 x float> %res, <64 x float>* %a
1177 ; Don't use SVE for 64-bit vectors.
1178 define <1 x double> @frinta_v1f64(<1 x double> %op) #0 {
1179 ; CHECK-LABEL: frinta_v1f64:
1180 ; CHECK: frinta d0, d0
1182 %res = call <1 x double> @llvm.round.v1f64(<1 x double> %op)
1183 ret <1 x double> %res
1186 ; Don't use SVE for 128-bit vectors.
1187 define <2 x double> @frinta_v2f64(<2 x double> %op) #0 {
1188 ; CHECK-LABEL: frinta_v2f64:
1189 ; CHECK: frinta v0.2d, v0.2d
1191 %res = call <2 x double> @llvm.round.v2f64(<2 x double> %op)
1192 ret <2 x double> %res
1195 define void @frinta_v4f64(<4 x double>* %a) #0 {
1196 ; CHECK-LABEL: frinta_v4f64:
1197 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1198 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1199 ; CHECK-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1200 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1202 %op = load <4 x double>, <4 x double>* %a
1203 %res = call <4 x double> @llvm.round.v4f64(<4 x double> %op)
1204 store <4 x double> %res, <4 x double>* %a
1208 define void @frinta_v8f64(<8 x double>* %a) #0 {
1209 ; CHECK-LABEL: frinta_v8f64:
1210 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1211 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1212 ; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1213 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1214 ; VBITS_GE_512-NEXT: ret
1216 ; Ensure sensible type legalisation.
1217 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1218 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1219 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1220 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1221 ; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
1222 ; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
1223 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
1224 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
1225 ; VBITS_EQ_256-NEXT: ret
1226 %op = load <8 x double>, <8 x double>* %a
1227 %res = call <8 x double> @llvm.round.v8f64(<8 x double> %op)
1228 store <8 x double> %res, <8 x double>* %a
1232 define void @frinta_v16f64(<16 x double>* %a) #0 {
1233 ; CHECK-LABEL: frinta_v16f64:
1234 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1235 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1236 ; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1237 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1238 ; VBITS_GE_1024-NEXT: ret
1239 %op = load <16 x double>, <16 x double>* %a
1240 %res = call <16 x double> @llvm.round.v16f64(<16 x double> %op)
1241 store <16 x double> %res, <16 x double>* %a
1245 define void @frinta_v32f64(<32 x double>* %a) #0 {
1246 ; CHECK-LABEL: frinta_v32f64:
1247 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1248 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1249 ; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1250 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1251 ; VBITS_GE_2048-NEXT: ret
1252 %op = load <32 x double>, <32 x double>* %a
1253 %res = call <32 x double> @llvm.round.v32f64(<32 x double> %op)
1254 store <32 x double> %res, <32 x double>* %a
1259 ; ROUNDEVEN -> FRINTN
1262 ; Don't use SVE for 64-bit vectors.
1263 define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
1264 ; CHECK-LABEL: frintn_v4f16:
1265 ; CHECK: frintn v0.4h, v0.4h
1267 %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
1271 ; Don't use SVE for 128-bit vectors.
1272 define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
1273 ; CHECK-LABEL: frintn_v8f16:
1274 ; CHECK: frintn v0.8h, v0.8h
1276 %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
1280 define void @frintn_v16f16(<16 x half>* %a) #0 {
1281 ; CHECK-LABEL: frintn_v16f16:
1282 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
1283 ; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1284 ; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1285 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1287 %op = load <16 x half>, <16 x half>* %a
1288 %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
1289 store <16 x half> %res, <16 x half>* %a
1293 define void @frintn_v32f16(<32 x half>* %a) #0 {
1294 ; CHECK-LABEL: frintn_v32f16:
1295 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
1296 ; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1297 ; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1298 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1299 ; VBITS_GE_512-NEXT: ret
1301 ; Ensure sensible type legalisation.
1302 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
1303 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
1304 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
1305 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
1306 ; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
1307 ; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
1308 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
1309 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
1310 ; VBITS_EQ_256-NEXT: ret
1311 %op = load <32 x half>, <32 x half>* %a
1312 %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
1313 store <32 x half> %res, <32 x half>* %a
1317 define void @frintn_v64f16(<64 x half>* %a) #0 {
1318 ; CHECK-LABEL: frintn_v64f16:
1319 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
1320 ; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1321 ; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1322 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1323 ; VBITS_GE_1024-NEXT: ret
1324 %op = load <64 x half>, <64 x half>* %a
1325 %res = call <64 x half> @llvm.roundeven.v64f16(<64 x half> %op)
1326 store <64 x half> %res, <64 x half>* %a
1330 define void @frintn_v128f16(<128 x half>* %a) #0 {
1331 ; CHECK-LABEL: frintn_v128f16:
1332 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
1333 ; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1334 ; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1335 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1336 ; VBITS_GE_2048-NEXT: ret
1337 %op = load <128 x half>, <128 x half>* %a
1338 %res = call <128 x half> @llvm.roundeven.v128f16(<128 x half> %op)
1339 store <128 x half> %res, <128 x half>* %a
1343 ; Don't use SVE for 64-bit vectors.
1344 define <2 x float> @frintn_v2f32(<2 x float> %op) #0 {
1345 ; CHECK-LABEL: frintn_v2f32:
1346 ; CHECK: frintn v0.2s, v0.2s
1348 %res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op)
1349 ret <2 x float> %res
1352 ; Don't use SVE for 128-bit vectors.
1353 define <4 x float> @frintn_v4f32(<4 x float> %op) #0 {
1354 ; CHECK-LABEL: frintn_v4f32:
1355 ; CHECK: frintn v0.4s, v0.4s
1357 %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op)
1358 ret <4 x float> %res
1361 define void @frintn_v8f32(<8 x float>* %a) #0 {
1362 ; CHECK-LABEL: frintn_v8f32:
1363 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
1364 ; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1365 ; CHECK-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1366 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1368 %op = load <8 x float>, <8 x float>* %a
1369 %res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op)
1370 store <8 x float> %res, <8 x float>* %a
1374 define void @frintn_v16f32(<16 x float>* %a) #0 {
1375 ; CHECK-LABEL: frintn_v16f32:
1376 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
1377 ; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1378 ; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1379 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1380 ; VBITS_GE_512-NEXT: ret
1382 ; Ensure sensible type legalisation.
1383 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1384 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1385 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1386 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1387 ; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
1388 ; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
1389 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
1390 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
1391 ; VBITS_EQ_256-NEXT: ret
1392 %op = load <16 x float>, <16 x float>* %a
1393 %res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op)
1394 store <16 x float> %res, <16 x float>* %a
1398 define void @frintn_v32f32(<32 x float>* %a) #0 {
1399 ; CHECK-LABEL: frintn_v32f32:
1400 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1401 ; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1402 ; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1403 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1404 ; VBITS_GE_1024-NEXT: ret
1405 %op = load <32 x float>, <32 x float>* %a
1406 %res = call <32 x float> @llvm.roundeven.v32f32(<32 x float> %op)
1407 store <32 x float> %res, <32 x float>* %a
1411 define void @frintn_v64f32(<64 x float>* %a) #0 {
1412 ; CHECK-LABEL: frintn_v64f32:
1413 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1414 ; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1415 ; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1416 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1417 ; VBITS_GE_2048-NEXT: ret
1418 %op = load <64 x float>, <64 x float>* %a
1419 %res = call <64 x float> @llvm.roundeven.v64f32(<64 x float> %op)
1420 store <64 x float> %res, <64 x float>* %a
1424 ; Don't use SVE for 64-bit vectors.
1425 define <1 x double> @frintn_v1f64(<1 x double> %op) #0 {
1426 ; CHECK-LABEL: frintn_v1f64:
1427 ; CHECK: frintn d0, d0
1429 %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op)
1430 ret <1 x double> %res
1433 ; Don't use SVE for 128-bit vectors.
1434 define <2 x double> @frintn_v2f64(<2 x double> %op) #0 {
1435 ; CHECK-LABEL: frintn_v2f64:
1436 ; CHECK: frintn v0.2d, v0.2d
1438 %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op)
1439 ret <2 x double> %res
1442 define void @frintn_v4f64(<4 x double>* %a) #0 {
1443 ; CHECK-LABEL: frintn_v4f64:
1444 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1445 ; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1446 ; CHECK-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1447 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1449 %op = load <4 x double>, <4 x double>* %a
1450 %res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op)
1451 store <4 x double> %res, <4 x double>* %a
1455 define void @frintn_v8f64(<8 x double>* %a) #0 {
1456 ; CHECK-LABEL: frintn_v8f64:
1457 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1458 ; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1459 ; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1460 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1461 ; VBITS_GE_512-NEXT: ret
1463 ; Ensure sensible type legalisation.
1464 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1465 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1466 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1467 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1468 ; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
1469 ; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
1470 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
1471 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
1472 ; VBITS_EQ_256-NEXT: ret
1473 %op = load <8 x double>, <8 x double>* %a
1474 %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op)
1475 store <8 x double> %res, <8 x double>* %a
1479 define void @frintn_v16f64(<16 x double>* %a) #0 {
1480 ; CHECK-LABEL: frintn_v16f64:
1481 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1482 ; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1483 ; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1484 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1485 ; VBITS_GE_1024-NEXT: ret
1486 %op = load <16 x double>, <16 x double>* %a
1487 %res = call <16 x double> @llvm.roundeven.v16f64(<16 x double> %op)
1488 store <16 x double> %res, <16 x double>* %a
1492 define void @frintn_v32f64(<32 x double>* %a) #0 {
1493 ; CHECK-LABEL: frintn_v32f64:
1494 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1495 ; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1496 ; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1497 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1498 ; VBITS_GE_2048-NEXT: ret
1499 %op = load <32 x double>, <32 x double>* %a
1500 %res = call <32 x double> @llvm.roundeven.v32f64(<32 x double> %op)
1501 store <32 x double> %res, <32 x double>* %a
1509 ; Don't use SVE for 64-bit vectors.
1510 define <4 x half> @frintz_v4f16(<4 x half> %op) #0 {
1511 ; CHECK-LABEL: frintz_v4f16:
1512 ; CHECK: frintz v0.4h, v0.4h
1514 %res = call <4 x half> @llvm.trunc.v4f16(<4 x half> %op)
1518 ; Don't use SVE for 128-bit vectors.
1519 define <8 x half> @frintz_v8f16(<8 x half> %op) #0 {
1520 ; CHECK-LABEL: frintz_v8f16:
1521 ; CHECK: frintz v0.8h, v0.8h
1523 %res = call <8 x half> @llvm.trunc.v8f16(<8 x half> %op)
1527 define void @frintz_v16f16(<16 x half>* %a) #0 {
1528 ; CHECK-LABEL: frintz_v16f16:
1529 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
1530 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1531 ; CHECK-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1532 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1534 %op = load <16 x half>, <16 x half>* %a
1535 %res = call <16 x half> @llvm.trunc.v16f16(<16 x half> %op)
1536 store <16 x half> %res, <16 x half>* %a
1540 define void @frintz_v32f16(<32 x half>* %a) #0 {
1541 ; CHECK-LABEL: frintz_v32f16:
1542 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
1543 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1544 ; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1545 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1546 ; VBITS_GE_512-NEXT: ret
1548 ; Ensure sensible type legalisation.
1549 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
1550 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
1551 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
1552 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
1553 ; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
1554 ; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
1555 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
1556 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
1557 ; VBITS_EQ_256-NEXT: ret
1558 %op = load <32 x half>, <32 x half>* %a
1559 %res = call <32 x half> @llvm.trunc.v32f16(<32 x half> %op)
1560 store <32 x half> %res, <32 x half>* %a
1564 define void @frintz_v64f16(<64 x half>* %a) #0 {
1565 ; CHECK-LABEL: frintz_v64f16:
1566 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
1567 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1568 ; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1569 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1570 ; VBITS_GE_1024-NEXT: ret
1571 %op = load <64 x half>, <64 x half>* %a
1572 %res = call <64 x half> @llvm.trunc.v64f16(<64 x half> %op)
1573 store <64 x half> %res, <64 x half>* %a
1577 define void @frintz_v128f16(<128 x half>* %a) #0 {
1578 ; CHECK-LABEL: frintz_v128f16:
1579 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
1580 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1581 ; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1582 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1583 ; VBITS_GE_2048-NEXT: ret
1584 %op = load <128 x half>, <128 x half>* %a
1585 %res = call <128 x half> @llvm.trunc.v128f16(<128 x half> %op)
1586 store <128 x half> %res, <128 x half>* %a
1590 ; Don't use SVE for 64-bit vectors.
1591 define <2 x float> @frintz_v2f32(<2 x float> %op) #0 {
1592 ; CHECK-LABEL: frintz_v2f32:
1593 ; CHECK: frintz v0.2s, v0.2s
1595 %res = call <2 x float> @llvm.trunc.v2f32(<2 x float> %op)
1596 ret <2 x float> %res
1599 ; Don't use SVE for 128-bit vectors.
1600 define <4 x float> @frintz_v4f32(<4 x float> %op) #0 {
1601 ; CHECK-LABEL: frintz_v4f32:
1602 ; CHECK: frintz v0.4s, v0.4s
1604 %res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %op)
1605 ret <4 x float> %res
1608 define void @frintz_v8f32(<8 x float>* %a) #0 {
1609 ; CHECK-LABEL: frintz_v8f32:
1610 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
1611 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1612 ; CHECK-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1613 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1615 %op = load <8 x float>, <8 x float>* %a
1616 %res = call <8 x float> @llvm.trunc.v8f32(<8 x float> %op)
1617 store <8 x float> %res, <8 x float>* %a
1621 define void @frintz_v16f32(<16 x float>* %a) #0 {
1622 ; CHECK-LABEL: frintz_v16f32:
1623 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
1624 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1625 ; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1626 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1627 ; VBITS_GE_512-NEXT: ret
1629 ; Ensure sensible type legalisation.
1630 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1631 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1632 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1633 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1634 ; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
1635 ; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
1636 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
1637 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
1638 ; VBITS_EQ_256-NEXT: ret
1639 %op = load <16 x float>, <16 x float>* %a
1640 %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %op)
1641 store <16 x float> %res, <16 x float>* %a
1645 define void @frintz_v32f32(<32 x float>* %a) #0 {
1646 ; CHECK-LABEL: frintz_v32f32:
1647 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1648 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1649 ; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1650 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1651 ; VBITS_GE_1024-NEXT: ret
1652 %op = load <32 x float>, <32 x float>* %a
1653 %res = call <32 x float> @llvm.trunc.v32f32(<32 x float> %op)
1654 store <32 x float> %res, <32 x float>* %a
1658 define void @frintz_v64f32(<64 x float>* %a) #0 {
1659 ; CHECK-LABEL: frintz_v64f32:
1660 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1661 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1662 ; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1663 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1664 ; VBITS_GE_2048-NEXT: ret
1665 %op = load <64 x float>, <64 x float>* %a
1666 %res = call <64 x float> @llvm.trunc.v64f32(<64 x float> %op)
1667 store <64 x float> %res, <64 x float>* %a
1671 ; Don't use SVE for 64-bit vectors.
1672 define <1 x double> @frintz_v1f64(<1 x double> %op) #0 {
1673 ; CHECK-LABEL: frintz_v1f64:
1674 ; CHECK: frintz d0, d0
1676 %res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op)
1677 ret <1 x double> %res
1680 ; Don't use SVE for 128-bit vectors.
1681 define <2 x double> @frintz_v2f64(<2 x double> %op) #0 {
1682 ; CHECK-LABEL: frintz_v2f64:
1683 ; CHECK: frintz v0.2d, v0.2d
1685 %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %op)
1686 ret <2 x double> %res
1689 define void @frintz_v4f64(<4 x double>* %a) #0 {
1690 ; CHECK-LABEL: frintz_v4f64:
1691 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1692 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1693 ; CHECK-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1694 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1696 %op = load <4 x double>, <4 x double>* %a
1697 %res = call <4 x double> @llvm.trunc.v4f64(<4 x double> %op)
1698 store <4 x double> %res, <4 x double>* %a
1702 define void @frintz_v8f64(<8 x double>* %a) #0 {
1703 ; CHECK-LABEL: frintz_v8f64:
1704 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1705 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1706 ; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1707 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1708 ; VBITS_GE_512-NEXT: ret
1710 ; Ensure sensible type legalisation.
1711 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1712 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1713 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1714 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1715 ; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
1716 ; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
1717 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
1718 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
1719 ; VBITS_EQ_256-NEXT: ret
1720 %op = load <8 x double>, <8 x double>* %a
1721 %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %op)
1722 store <8 x double> %res, <8 x double>* %a
1726 define void @frintz_v16f64(<16 x double>* %a) #0 {
1727 ; CHECK-LABEL: frintz_v16f64:
1728 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1729 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1730 ; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1731 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1732 ; VBITS_GE_1024-NEXT: ret
1733 %op = load <16 x double>, <16 x double>* %a
1734 %res = call <16 x double> @llvm.trunc.v16f64(<16 x double> %op)
1735 store <16 x double> %res, <16 x double>* %a
1739 define void @frintz_v32f64(<32 x double>* %a) #0 {
1740 ; CHECK-LABEL: frintz_v32f64:
1741 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1742 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1743 ; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1744 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1745 ; VBITS_GE_2048-NEXT: ret
1746 %op = load <32 x double>, <32 x double>* %a
1747 %res = call <32 x double> @llvm.trunc.v32f64(<32 x double> %op)
1748 store <32 x double> %res, <32 x double>* %a
1752 attributes #0 = { "target-features"="+sve" }
1754 declare <4 x half> @llvm.ceil.v4f16(<4 x half>)
1755 declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
1756 declare <16 x half> @llvm.ceil.v16f16(<16 x half>)
1757 declare <32 x half> @llvm.ceil.v32f16(<32 x half>)
1758 declare <64 x half> @llvm.ceil.v64f16(<64 x half>)
1759 declare <128 x half> @llvm.ceil.v128f16(<128 x half>)
1760 declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
1761 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
1762 declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
1763 declare <16 x float> @llvm.ceil.v16f32(<16 x float>)
1764 declare <32 x float> @llvm.ceil.v32f32(<32 x float>)
1765 declare <64 x float> @llvm.ceil.v64f32(<64 x float>)
1766 declare <1 x double> @llvm.ceil.v1f64(<1 x double>)
1767 declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
1768 declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
1769 declare <8 x double> @llvm.ceil.v8f64(<8 x double>)
1770 declare <16 x double> @llvm.ceil.v16f64(<16 x double>)
1771 declare <32 x double> @llvm.ceil.v32f64(<32 x double>)
1773 declare <4 x half> @llvm.floor.v4f16(<4 x half>)
1774 declare <8 x half> @llvm.floor.v8f16(<8 x half>)
1775 declare <16 x half> @llvm.floor.v16f16(<16 x half>)
1776 declare <32 x half> @llvm.floor.v32f16(<32 x half>)
1777 declare <64 x half> @llvm.floor.v64f16(<64 x half>)
1778 declare <128 x half> @llvm.floor.v128f16(<128 x half>)
1779 declare <2 x float> @llvm.floor.v2f32(<2 x float>)
1780 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
1781 declare <8 x float> @llvm.floor.v8f32(<8 x float>)
1782 declare <16 x float> @llvm.floor.v16f32(<16 x float>)
1783 declare <32 x float> @llvm.floor.v32f32(<32 x float>)
1784 declare <64 x float> @llvm.floor.v64f32(<64 x float>)
1785 declare <1 x double> @llvm.floor.v1f64(<1 x double>)
1786 declare <2 x double> @llvm.floor.v2f64(<2 x double>)
1787 declare <4 x double> @llvm.floor.v4f64(<4 x double>)
1788 declare <8 x double> @llvm.floor.v8f64(<8 x double>)
1789 declare <16 x double> @llvm.floor.v16f64(<16 x double>)
1790 declare <32 x double> @llvm.floor.v32f64(<32 x double>)
1792 declare <4 x half> @llvm.nearbyint.v4f16(<4 x half>)
1793 declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
1794 declare <16 x half> @llvm.nearbyint.v16f16(<16 x half>)
1795 declare <32 x half> @llvm.nearbyint.v32f16(<32 x half>)
1796 declare <64 x half> @llvm.nearbyint.v64f16(<64 x half>)
1797 declare <128 x half> @llvm.nearbyint.v128f16(<128 x half>)
1798 declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>)
1799 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
1800 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>)
1801 declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>)
1802 declare <32 x float> @llvm.nearbyint.v32f32(<32 x float>)
1803 declare <64 x float> @llvm.nearbyint.v64f32(<64 x float>)
1804 declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>)
1805 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
1806 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
1807 declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>)
1808 declare <16 x double> @llvm.nearbyint.v16f64(<16 x double>)
1809 declare <32 x double> @llvm.nearbyint.v32f64(<32 x double>)
1811 declare <4 x half> @llvm.rint.v4f16(<4 x half>)
1812 declare <8 x half> @llvm.rint.v8f16(<8 x half>)
1813 declare <16 x half> @llvm.rint.v16f16(<16 x half>)
1814 declare <32 x half> @llvm.rint.v32f16(<32 x half>)
1815 declare <64 x half> @llvm.rint.v64f16(<64 x half>)
1816 declare <128 x half> @llvm.rint.v128f16(<128 x half>)
1817 declare <2 x float> @llvm.rint.v2f32(<2 x float>)
1818 declare <4 x float> @llvm.rint.v4f32(<4 x float>)
1819 declare <8 x float> @llvm.rint.v8f32(<8 x float>)
1820 declare <16 x float> @llvm.rint.v16f32(<16 x float>)
1821 declare <32 x float> @llvm.rint.v32f32(<32 x float>)
1822 declare <64 x float> @llvm.rint.v64f32(<64 x float>)
1823 declare <1 x double> @llvm.rint.v1f64(<1 x double>)
1824 declare <2 x double> @llvm.rint.v2f64(<2 x double>)
1825 declare <4 x double> @llvm.rint.v4f64(<4 x double>)
1826 declare <8 x double> @llvm.rint.v8f64(<8 x double>)
1827 declare <16 x double> @llvm.rint.v16f64(<16 x double>)
1828 declare <32 x double> @llvm.rint.v32f64(<32 x double>)
1830 declare <4 x half> @llvm.round.v4f16(<4 x half>)
1831 declare <8 x half> @llvm.round.v8f16(<8 x half>)
1832 declare <16 x half> @llvm.round.v16f16(<16 x half>)
1833 declare <32 x half> @llvm.round.v32f16(<32 x half>)
1834 declare <64 x half> @llvm.round.v64f16(<64 x half>)
1835 declare <128 x half> @llvm.round.v128f16(<128 x half>)
1836 declare <2 x float> @llvm.round.v2f32(<2 x float>)
1837 declare <4 x float> @llvm.round.v4f32(<4 x float>)
1838 declare <8 x float> @llvm.round.v8f32(<8 x float>)
1839 declare <16 x float> @llvm.round.v16f32(<16 x float>)
1840 declare <32 x float> @llvm.round.v32f32(<32 x float>)
1841 declare <64 x float> @llvm.round.v64f32(<64 x float>)
1842 declare <1 x double> @llvm.round.v1f64(<1 x double>)
1843 declare <2 x double> @llvm.round.v2f64(<2 x double>)
1844 declare <4 x double> @llvm.round.v4f64(<4 x double>)
1845 declare <8 x double> @llvm.round.v8f64(<8 x double>)
1846 declare <16 x double> @llvm.round.v16f64(<16 x double>)
1847 declare <32 x double> @llvm.round.v32f64(<32 x double>)
1849 declare <4 x half> @llvm.roundeven.v4f16(<4 x half>)
1850 declare <8 x half> @llvm.roundeven.v8f16(<8 x half>)
1851 declare <16 x half> @llvm.roundeven.v16f16(<16 x half>)
1852 declare <32 x half> @llvm.roundeven.v32f16(<32 x half>)
1853 declare <64 x half> @llvm.roundeven.v64f16(<64 x half>)
1854 declare <128 x half> @llvm.roundeven.v128f16(<128 x half>)
1855 declare <2 x float> @llvm.roundeven.v2f32(<2 x float>)
1856 declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
1857 declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
1858 declare <16 x float> @llvm.roundeven.v16f32(<16 x float>)
1859 declare <32 x float> @llvm.roundeven.v32f32(<32 x float>)
1860 declare <64 x float> @llvm.roundeven.v64f32(<64 x float>)
1861 declare <1 x double> @llvm.roundeven.v1f64(<1 x double>)
1862 declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
1863 declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
1864 declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
1865 declare <16 x double> @llvm.roundeven.v16f64(<16 x double>)
1866 declare <32 x double> @llvm.roundeven.v32f64(<32 x double>)
1868 declare <4 x half> @llvm.trunc.v4f16(<4 x half>)
1869 declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
1870 declare <16 x half> @llvm.trunc.v16f16(<16 x half>)
1871 declare <32 x half> @llvm.trunc.v32f16(<32 x half>)
1872 declare <64 x half> @llvm.trunc.v64f16(<64 x half>)
1873 declare <128 x half> @llvm.trunc.v128f16(<128 x half>)
1874 declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
1875 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
1876 declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
1877 declare <16 x float> @llvm.trunc.v16f32(<16 x float>)
1878 declare <32 x float> @llvm.trunc.v32f32(<32 x float>)
1879 declare <64 x float> @llvm.trunc.v64f32(<64 x float>)
1880 declare <1 x double> @llvm.trunc.v1f64(<1 x double>)
1881 declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
1882 declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
1883 declare <8 x double> @llvm.trunc.v8f64(<8 x double>)
1884 declare <16 x double> @llvm.trunc.v16f64(<16 x double>)
1885 declare <32 x double> @llvm.trunc.v32f64(<32 x double>)