1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 declare <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
13 define <vscale x 1 x half> @vfsqrt_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
14 ; ZVFH-LABEL: vfsqrt_vv_nxv1f16:
16 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
17 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
20 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv1f16:
22 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
23 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
25 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t
26 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
27 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
29 %v = call <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
30 ret <vscale x 1 x half> %v
33 define <vscale x 1 x half> @vfsqrt_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
34 ; ZVFH-LABEL: vfsqrt_vv_nxv1f16_unmasked:
36 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
37 ; ZVFH-NEXT: vfsqrt.v v8, v8
40 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv1f16_unmasked:
42 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
43 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
44 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
45 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9
46 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
47 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
49 %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
50 %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
51 %v = call <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
52 ret <vscale x 1 x half> %v
55 declare <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
57 define <vscale x 2 x half> @vfsqrt_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
58 ; ZVFH-LABEL: vfsqrt_vv_nxv2f16:
60 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
61 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
64 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv2f16:
66 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
67 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
68 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
69 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t
70 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
71 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
73 %v = call <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
74 ret <vscale x 2 x half> %v
77 define <vscale x 2 x half> @vfsqrt_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
78 ; ZVFH-LABEL: vfsqrt_vv_nxv2f16_unmasked:
80 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
81 ; ZVFH-NEXT: vfsqrt.v v8, v8
84 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv2f16_unmasked:
86 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
87 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
88 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
89 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9
90 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
91 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
93 %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
94 %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
95 %v = call <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
96 ret <vscale x 2 x half> %v
99 declare <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
101 define <vscale x 4 x half> @vfsqrt_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
102 ; ZVFH-LABEL: vfsqrt_vv_nxv4f16:
104 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
105 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
108 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv4f16:
110 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
111 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
112 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
113 ; ZVFHMIN-NEXT: vfsqrt.v v10, v10, v0.t
114 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
115 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
117 %v = call <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
118 ret <vscale x 4 x half> %v
121 define <vscale x 4 x half> @vfsqrt_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
122 ; ZVFH-LABEL: vfsqrt_vv_nxv4f16_unmasked:
124 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
125 ; ZVFH-NEXT: vfsqrt.v v8, v8
128 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv4f16_unmasked:
130 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
131 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
132 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
133 ; ZVFHMIN-NEXT: vfsqrt.v v10, v10
134 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
135 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
137 %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
138 %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
139 %v = call <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
140 ret <vscale x 4 x half> %v
143 declare <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
145 define <vscale x 8 x half> @vfsqrt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
146 ; ZVFH-LABEL: vfsqrt_vv_nxv8f16:
148 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
149 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
152 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv8f16:
154 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
155 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
156 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
157 ; ZVFHMIN-NEXT: vfsqrt.v v12, v12, v0.t
158 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
159 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
161 %v = call <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
162 ret <vscale x 8 x half> %v
165 define <vscale x 8 x half> @vfsqrt_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
166 ; ZVFH-LABEL: vfsqrt_vv_nxv8f16_unmasked:
168 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
169 ; ZVFH-NEXT: vfsqrt.v v8, v8
172 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv8f16_unmasked:
174 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
175 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
176 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
177 ; ZVFHMIN-NEXT: vfsqrt.v v12, v12
178 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
179 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
181 %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
182 %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
183 %v = call <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
184 ret <vscale x 8 x half> %v
187 declare <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
189 define <vscale x 16 x half> @vfsqrt_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
190 ; ZVFH-LABEL: vfsqrt_vv_nxv16f16:
192 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
193 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
196 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv16f16:
198 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
199 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
200 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
201 ; ZVFHMIN-NEXT: vfsqrt.v v16, v16, v0.t
202 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
203 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
205 %v = call <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
206 ret <vscale x 16 x half> %v
209 define <vscale x 16 x half> @vfsqrt_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
210 ; ZVFH-LABEL: vfsqrt_vv_nxv16f16_unmasked:
212 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
213 ; ZVFH-NEXT: vfsqrt.v v8, v8
216 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv16f16_unmasked:
218 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
219 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
220 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
221 ; ZVFHMIN-NEXT: vfsqrt.v v16, v16
222 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
223 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
225 %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
226 %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
227 %v = call <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
228 ret <vscale x 16 x half> %v
231 declare <vscale x 32 x half> @llvm.vp.sqrt.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32)
233 define <vscale x 32 x half> @vfsqrt_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
234 ; ZVFH-LABEL: vfsqrt_vv_nxv32f16:
236 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
237 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
240 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv32f16:
242 ; ZVFHMIN-NEXT: vmv1r.v v16, v0
243 ; ZVFHMIN-NEXT: csrr a2, vlenb
244 ; ZVFHMIN-NEXT: slli a1, a2, 1
245 ; ZVFHMIN-NEXT: sub a3, a0, a1
246 ; ZVFHMIN-NEXT: sltu a4, a0, a3
247 ; ZVFHMIN-NEXT: addi a4, a4, -1
248 ; ZVFHMIN-NEXT: and a3, a4, a3
249 ; ZVFHMIN-NEXT: srli a2, a2, 2
250 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
251 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2
252 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
253 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
254 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
255 ; ZVFHMIN-NEXT: vfsqrt.v v24, v24, v0.t
256 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
257 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
258 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
259 ; ZVFHMIN-NEXT: # %bb.1:
260 ; ZVFHMIN-NEXT: mv a0, a1
261 ; ZVFHMIN-NEXT: .LBB10_2:
262 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
263 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
264 ; ZVFHMIN-NEXT: vmv1r.v v0, v16
265 ; ZVFHMIN-NEXT: vfsqrt.v v16, v24, v0.t
266 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
267 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
269 %v = call <vscale x 32 x half> @llvm.vp.sqrt.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
270 ret <vscale x 32 x half> %v
273 define <vscale x 32 x half> @vfsqrt_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
274 ; ZVFH-LABEL: vfsqrt_vv_nxv32f16_unmasked:
276 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
277 ; ZVFH-NEXT: vfsqrt.v v8, v8
280 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv32f16_unmasked:
282 ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma
283 ; ZVFHMIN-NEXT: vmset.m v16
284 ; ZVFHMIN-NEXT: csrr a2, vlenb
285 ; ZVFHMIN-NEXT: slli a1, a2, 1
286 ; ZVFHMIN-NEXT: sub a3, a0, a1
287 ; ZVFHMIN-NEXT: sltu a4, a0, a3
288 ; ZVFHMIN-NEXT: addi a4, a4, -1
289 ; ZVFHMIN-NEXT: and a3, a4, a3
290 ; ZVFHMIN-NEXT: srli a2, a2, 2
291 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
292 ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2
293 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
294 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
295 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
296 ; ZVFHMIN-NEXT: vfsqrt.v v16, v16, v0.t
297 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
298 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
299 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
300 ; ZVFHMIN-NEXT: # %bb.1:
301 ; ZVFHMIN-NEXT: mv a0, a1
302 ; ZVFHMIN-NEXT: .LBB11_2:
303 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
304 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
305 ; ZVFHMIN-NEXT: vfsqrt.v v16, v16
306 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
307 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
309 %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
310 %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
311 %v = call <vscale x 32 x half> @llvm.vp.sqrt.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
312 ret <vscale x 32 x half> %v
315 declare <vscale x 1 x float> @llvm.vp.sqrt.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
317 define <vscale x 1 x float> @vfsqrt_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
318 ; CHECK-LABEL: vfsqrt_vv_nxv1f32:
320 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
321 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
323 %v = call <vscale x 1 x float> @llvm.vp.sqrt.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
324 ret <vscale x 1 x float> %v
327 define <vscale x 1 x float> @vfsqrt_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) {
328 ; CHECK-LABEL: vfsqrt_vv_nxv1f32_unmasked:
330 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
331 ; CHECK-NEXT: vfsqrt.v v8, v8
333 %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
334 %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
335 %v = call <vscale x 1 x float> @llvm.vp.sqrt.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
336 ret <vscale x 1 x float> %v
339 declare <vscale x 2 x float> @llvm.vp.sqrt.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
341 define <vscale x 2 x float> @vfsqrt_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
342 ; CHECK-LABEL: vfsqrt_vv_nxv2f32:
344 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
345 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
347 %v = call <vscale x 2 x float> @llvm.vp.sqrt.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
348 ret <vscale x 2 x float> %v
351 define <vscale x 2 x float> @vfsqrt_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) {
352 ; CHECK-LABEL: vfsqrt_vv_nxv2f32_unmasked:
354 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
355 ; CHECK-NEXT: vfsqrt.v v8, v8
357 %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
358 %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
359 %v = call <vscale x 2 x float> @llvm.vp.sqrt.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
360 ret <vscale x 2 x float> %v
363 declare <vscale x 4 x float> @llvm.vp.sqrt.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
365 define <vscale x 4 x float> @vfsqrt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
366 ; CHECK-LABEL: vfsqrt_vv_nxv4f32:
368 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
369 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
371 %v = call <vscale x 4 x float> @llvm.vp.sqrt.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
372 ret <vscale x 4 x float> %v
375 define <vscale x 4 x float> @vfsqrt_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) {
376 ; CHECK-LABEL: vfsqrt_vv_nxv4f32_unmasked:
378 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
379 ; CHECK-NEXT: vfsqrt.v v8, v8
381 %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
382 %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
383 %v = call <vscale x 4 x float> @llvm.vp.sqrt.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
384 ret <vscale x 4 x float> %v
387 declare <vscale x 8 x float> @llvm.vp.sqrt.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
389 define <vscale x 8 x float> @vfsqrt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
390 ; CHECK-LABEL: vfsqrt_vv_nxv8f32:
392 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
393 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
395 %v = call <vscale x 8 x float> @llvm.vp.sqrt.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
396 ret <vscale x 8 x float> %v
399 define <vscale x 8 x float> @vfsqrt_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) {
400 ; CHECK-LABEL: vfsqrt_vv_nxv8f32_unmasked:
402 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
403 ; CHECK-NEXT: vfsqrt.v v8, v8
405 %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
406 %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
407 %v = call <vscale x 8 x float> @llvm.vp.sqrt.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
408 ret <vscale x 8 x float> %v
411 declare <vscale x 16 x float> @llvm.vp.sqrt.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
413 define <vscale x 16 x float> @vfsqrt_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
414 ; CHECK-LABEL: vfsqrt_vv_nxv16f32:
416 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
417 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
419 %v = call <vscale x 16 x float> @llvm.vp.sqrt.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
420 ret <vscale x 16 x float> %v
423 define <vscale x 16 x float> @vfsqrt_vv_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) {
424 ; CHECK-LABEL: vfsqrt_vv_nxv16f32_unmasked:
426 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
427 ; CHECK-NEXT: vfsqrt.v v8, v8
429 %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
430 %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
431 %v = call <vscale x 16 x float> @llvm.vp.sqrt.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
432 ret <vscale x 16 x float> %v
435 declare <vscale x 1 x double> @llvm.vp.sqrt.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
437 define <vscale x 1 x double> @vfsqrt_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
438 ; CHECK-LABEL: vfsqrt_vv_nxv1f64:
440 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
441 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
443 %v = call <vscale x 1 x double> @llvm.vp.sqrt.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
444 ret <vscale x 1 x double> %v
447 define <vscale x 1 x double> @vfsqrt_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
448 ; CHECK-LABEL: vfsqrt_vv_nxv1f64_unmasked:
450 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
451 ; CHECK-NEXT: vfsqrt.v v8, v8
453 %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
454 %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
455 %v = call <vscale x 1 x double> @llvm.vp.sqrt.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
456 ret <vscale x 1 x double> %v
459 declare <vscale x 2 x double> @llvm.vp.sqrt.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
461 define <vscale x 2 x double> @vfsqrt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
462 ; CHECK-LABEL: vfsqrt_vv_nxv2f64:
464 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
465 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
467 %v = call <vscale x 2 x double> @llvm.vp.sqrt.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
468 ret <vscale x 2 x double> %v
471 define <vscale x 2 x double> @vfsqrt_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
472 ; CHECK-LABEL: vfsqrt_vv_nxv2f64_unmasked:
474 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
475 ; CHECK-NEXT: vfsqrt.v v8, v8
477 %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
478 %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
479 %v = call <vscale x 2 x double> @llvm.vp.sqrt.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
480 ret <vscale x 2 x double> %v
483 declare <vscale x 4 x double> @llvm.vp.sqrt.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
485 define <vscale x 4 x double> @vfsqrt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
486 ; CHECK-LABEL: vfsqrt_vv_nxv4f64:
488 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
489 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
491 %v = call <vscale x 4 x double> @llvm.vp.sqrt.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
492 ret <vscale x 4 x double> %v
495 define <vscale x 4 x double> @vfsqrt_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
496 ; CHECK-LABEL: vfsqrt_vv_nxv4f64_unmasked:
498 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
499 ; CHECK-NEXT: vfsqrt.v v8, v8
501 %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
502 %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
503 %v = call <vscale x 4 x double> @llvm.vp.sqrt.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
504 ret <vscale x 4 x double> %v
507 declare <vscale x 7 x double> @llvm.vp.sqrt.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
509 define <vscale x 7 x double> @vfsqrt_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
510 ; CHECK-LABEL: vfsqrt_vv_nxv7f64:
512 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
513 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
515 %v = call <vscale x 7 x double> @llvm.vp.sqrt.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
516 ret <vscale x 7 x double> %v
519 define <vscale x 7 x double> @vfsqrt_vv_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
520 ; CHECK-LABEL: vfsqrt_vv_nxv7f64_unmasked:
522 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
523 ; CHECK-NEXT: vfsqrt.v v8, v8
525 %head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
526 %m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
527 %v = call <vscale x 7 x double> @llvm.vp.sqrt.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
528 ret <vscale x 7 x double> %v
531 declare <vscale x 8 x double> @llvm.vp.sqrt.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
533 define <vscale x 8 x double> @vfsqrt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
534 ; CHECK-LABEL: vfsqrt_vv_nxv8f64:
536 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
537 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
539 %v = call <vscale x 8 x double> @llvm.vp.sqrt.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
540 ret <vscale x 8 x double> %v
543 define <vscale x 8 x double> @vfsqrt_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
544 ; CHECK-LABEL: vfsqrt_vv_nxv8f64_unmasked:
546 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
547 ; CHECK-NEXT: vfsqrt.v v8, v8
549 %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
550 %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
551 %v = call <vscale x 8 x double> @llvm.vp.sqrt.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
552 ret <vscale x 8 x double> %v
556 declare <vscale x 16 x double> @llvm.vp.sqrt.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
558 define <vscale x 16 x double> @vfsqrt_vv_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
559 ; CHECK-LABEL: vfsqrt_vv_nxv16f64:
561 ; CHECK-NEXT: vmv1r.v v24, v0
562 ; CHECK-NEXT: csrr a1, vlenb
563 ; CHECK-NEXT: srli a2, a1, 3
564 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
565 ; CHECK-NEXT: vslidedown.vx v0, v0, a2
566 ; CHECK-NEXT: sub a2, a0, a1
567 ; CHECK-NEXT: sltu a3, a0, a2
568 ; CHECK-NEXT: addi a3, a3, -1
569 ; CHECK-NEXT: and a2, a3, a2
570 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
571 ; CHECK-NEXT: vfsqrt.v v16, v16, v0.t
572 ; CHECK-NEXT: bltu a0, a1, .LBB32_2
573 ; CHECK-NEXT: # %bb.1:
574 ; CHECK-NEXT: mv a0, a1
575 ; CHECK-NEXT: .LBB32_2:
576 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
577 ; CHECK-NEXT: vmv1r.v v0, v24
578 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
580 %v = call <vscale x 16 x double> @llvm.vp.sqrt.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
581 ret <vscale x 16 x double> %v
584 define <vscale x 16 x double> @vfsqrt_vv_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
585 ; CHECK-LABEL: vfsqrt_vv_nxv16f64_unmasked:
587 ; CHECK-NEXT: csrr a1, vlenb
588 ; CHECK-NEXT: sub a2, a0, a1
589 ; CHECK-NEXT: sltu a3, a0, a2
590 ; CHECK-NEXT: addi a3, a3, -1
591 ; CHECK-NEXT: and a2, a3, a2
592 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
593 ; CHECK-NEXT: vfsqrt.v v16, v16
594 ; CHECK-NEXT: bltu a0, a1, .LBB33_2
595 ; CHECK-NEXT: # %bb.1:
596 ; CHECK-NEXT: mv a0, a1
597 ; CHECK-NEXT: .LBB33_2:
598 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
599 ; CHECK-NEXT: vfsqrt.v v8, v8
601 %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
602 %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
603 %v = call <vscale x 16 x double> @llvm.vp.sqrt.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
604 ret <vscale x 16 x double> %v