1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
3 ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4 ; RUN: --check-prefixes=CHECK,ZVFH
5 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
6 ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7 ; RUN: --check-prefixes=CHECK,ZVFH
8 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
9 ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
10 ; RUN: --check-prefixes=CHECK,ZVFHMIN
11 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
12 ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
13 ; RUN: --check-prefixes=CHECK,ZVFHMIN
15 declare <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
17 define <vscale x 1 x bfloat> @vfsqrt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
18 ; CHECK-LABEL: vfsqrt_vv_nxv1bf16:
20 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
21 ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t
22 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
23 ; CHECK-NEXT: vfsqrt.v v9, v9, v0.t
24 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
25 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
27 %v = call <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
28 ret <vscale x 1 x bfloat> %v
31 define <vscale x 1 x bfloat> @vfsqrt_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
32 ; CHECK-LABEL: vfsqrt_vv_nxv1bf16_unmasked:
34 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
35 ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
36 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
37 ; CHECK-NEXT: vfsqrt.v v9, v9
38 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
39 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
41 %v = call <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
42 ret <vscale x 1 x bfloat> %v
45 declare <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
47 define <vscale x 2 x bfloat> @vfsqrt_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
48 ; CHECK-LABEL: vfsqrt_vv_nxv2bf16:
50 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
51 ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t
52 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
53 ; CHECK-NEXT: vfsqrt.v v9, v9, v0.t
54 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
55 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
57 %v = call <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
58 ret <vscale x 2 x bfloat> %v
61 define <vscale x 2 x bfloat> @vfsqrt_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
62 ; CHECK-LABEL: vfsqrt_vv_nxv2bf16_unmasked:
64 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
65 ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
66 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
67 ; CHECK-NEXT: vfsqrt.v v9, v9
68 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
69 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
71 %v = call <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
72 ret <vscale x 2 x bfloat> %v
75 declare <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
77 define <vscale x 4 x bfloat> @vfsqrt_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
78 ; CHECK-LABEL: vfsqrt_vv_nxv4bf16:
80 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
81 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
82 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
83 ; CHECK-NEXT: vfsqrt.v v10, v10, v0.t
84 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
85 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
87 %v = call <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
88 ret <vscale x 4 x bfloat> %v
91 define <vscale x 4 x bfloat> @vfsqrt_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
92 ; CHECK-LABEL: vfsqrt_vv_nxv4bf16_unmasked:
94 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
95 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
96 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
97 ; CHECK-NEXT: vfsqrt.v v10, v10
98 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
99 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
101 %v = call <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
102 ret <vscale x 4 x bfloat> %v
105 declare <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
107 define <vscale x 8 x bfloat> @vfsqrt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
108 ; CHECK-LABEL: vfsqrt_vv_nxv8bf16:
110 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
111 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t
112 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
113 ; CHECK-NEXT: vfsqrt.v v12, v12, v0.t
114 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
115 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
117 %v = call <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
118 ret <vscale x 8 x bfloat> %v
121 define <vscale x 8 x bfloat> @vfsqrt_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
122 ; CHECK-LABEL: vfsqrt_vv_nxv8bf16_unmasked:
124 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
125 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
126 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
127 ; CHECK-NEXT: vfsqrt.v v12, v12
128 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
129 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
131 %v = call <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
132 ret <vscale x 8 x bfloat> %v
135 declare <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
137 define <vscale x 16 x bfloat> @vfsqrt_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
138 ; CHECK-LABEL: vfsqrt_vv_nxv16bf16:
140 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
141 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t
142 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
143 ; CHECK-NEXT: vfsqrt.v v16, v16, v0.t
144 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
145 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
147 %v = call <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
148 ret <vscale x 16 x bfloat> %v
151 define <vscale x 16 x bfloat> @vfsqrt_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
152 ; CHECK-LABEL: vfsqrt_vv_nxv16bf16_unmasked:
154 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
155 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
156 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
157 ; CHECK-NEXT: vfsqrt.v v16, v16
158 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
159 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
161 %v = call <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
162 ret <vscale x 16 x bfloat> %v
165 declare <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
167 define <vscale x 32 x bfloat> @vfsqrt_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
168 ; CHECK-LABEL: vfsqrt_vv_nxv32bf16:
170 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
171 ; CHECK-NEXT: vmv1r.v v16, v0
172 ; CHECK-NEXT: csrr a2, vlenb
173 ; CHECK-NEXT: slli a1, a2, 1
174 ; CHECK-NEXT: srli a2, a2, 2
175 ; CHECK-NEXT: sub a3, a0, a1
176 ; CHECK-NEXT: sltu a4, a0, a3
177 ; CHECK-NEXT: addi a4, a4, -1
178 ; CHECK-NEXT: vslidedown.vx v0, v0, a2
179 ; CHECK-NEXT: and a3, a4, a3
180 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma
181 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t
182 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
183 ; CHECK-NEXT: vfsqrt.v v24, v24, v0.t
184 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
185 ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t
186 ; CHECK-NEXT: bltu a0, a1, .LBB10_2
187 ; CHECK-NEXT: # %bb.1:
188 ; CHECK-NEXT: mv a0, a1
189 ; CHECK-NEXT: .LBB10_2:
190 ; CHECK-NEXT: vmv1r.v v0, v16
191 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
192 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t
193 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
194 ; CHECK-NEXT: vfsqrt.v v24, v24, v0.t
195 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
196 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t
198 %v = call <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
199 ret <vscale x 32 x bfloat> %v
202 define <vscale x 32 x bfloat> @vfsqrt_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
203 ; CHECK-LABEL: vfsqrt_vv_nxv32bf16_unmasked:
205 ; CHECK-NEXT: csrr a2, vlenb
206 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
207 ; CHECK-NEXT: vmset.m v16
208 ; CHECK-NEXT: slli a1, a2, 1
209 ; CHECK-NEXT: srli a2, a2, 2
210 ; CHECK-NEXT: sub a3, a0, a1
211 ; CHECK-NEXT: sltu a4, a0, a3
212 ; CHECK-NEXT: addi a4, a4, -1
213 ; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
214 ; CHECK-NEXT: vslidedown.vx v0, v16, a2
215 ; CHECK-NEXT: and a3, a4, a3
216 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma
217 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t
218 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
219 ; CHECK-NEXT: vfsqrt.v v16, v16, v0.t
220 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
221 ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16, v0.t
222 ; CHECK-NEXT: bltu a0, a1, .LBB11_2
223 ; CHECK-NEXT: # %bb.1:
224 ; CHECK-NEXT: mv a0, a1
225 ; CHECK-NEXT: .LBB11_2:
226 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
227 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
228 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
229 ; CHECK-NEXT: vfsqrt.v v16, v16
230 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
231 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
233 %v = call <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
234 ret <vscale x 32 x bfloat> %v
236 declare <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
238 define <vscale x 1 x half> @vfsqrt_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
239 ; ZVFH-LABEL: vfsqrt_vv_nxv1f16:
241 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
242 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
245 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv1f16:
247 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
248 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t
249 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
250 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t
251 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
252 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t
254 %v = call <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
255 ret <vscale x 1 x half> %v
258 define <vscale x 1 x half> @vfsqrt_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
259 ; ZVFH-LABEL: vfsqrt_vv_nxv1f16_unmasked:
261 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
262 ; ZVFH-NEXT: vfsqrt.v v8, v8
265 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv1f16_unmasked:
267 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
268 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
269 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
270 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9
271 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
272 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
274 %v = call <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
275 ret <vscale x 1 x half> %v
278 declare <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
280 define <vscale x 2 x half> @vfsqrt_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
281 ; ZVFH-LABEL: vfsqrt_vv_nxv2f16:
283 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
284 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
287 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv2f16:
289 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
290 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t
291 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
292 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t
293 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
294 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t
296 %v = call <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
297 ret <vscale x 2 x half> %v
300 define <vscale x 2 x half> @vfsqrt_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
301 ; ZVFH-LABEL: vfsqrt_vv_nxv2f16_unmasked:
303 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
304 ; ZVFH-NEXT: vfsqrt.v v8, v8
307 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv2f16_unmasked:
309 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
310 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
311 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
312 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9
313 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
314 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
316 %v = call <vscale x 2 x half> @llvm.vp.sqrt.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
317 ret <vscale x 2 x half> %v
320 declare <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
322 define <vscale x 4 x half> @vfsqrt_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
323 ; ZVFH-LABEL: vfsqrt_vv_nxv4f16:
325 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
326 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
329 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv4f16:
331 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
332 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
333 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
334 ; ZVFHMIN-NEXT: vfsqrt.v v10, v10, v0.t
335 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
336 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
338 %v = call <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
339 ret <vscale x 4 x half> %v
342 define <vscale x 4 x half> @vfsqrt_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
343 ; ZVFH-LABEL: vfsqrt_vv_nxv4f16_unmasked:
345 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
346 ; ZVFH-NEXT: vfsqrt.v v8, v8
349 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv4f16_unmasked:
351 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
352 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
353 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
354 ; ZVFHMIN-NEXT: vfsqrt.v v10, v10
355 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
356 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
358 %v = call <vscale x 4 x half> @llvm.vp.sqrt.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
359 ret <vscale x 4 x half> %v
362 declare <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
364 define <vscale x 8 x half> @vfsqrt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
365 ; ZVFH-LABEL: vfsqrt_vv_nxv8f16:
367 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
368 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
371 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv8f16:
373 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
374 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
375 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
376 ; ZVFHMIN-NEXT: vfsqrt.v v12, v12, v0.t
377 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
378 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
380 %v = call <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
381 ret <vscale x 8 x half> %v
384 define <vscale x 8 x half> @vfsqrt_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
385 ; ZVFH-LABEL: vfsqrt_vv_nxv8f16_unmasked:
387 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
388 ; ZVFH-NEXT: vfsqrt.v v8, v8
391 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv8f16_unmasked:
393 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
394 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
395 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
396 ; ZVFHMIN-NEXT: vfsqrt.v v12, v12
397 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
398 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
400 %v = call <vscale x 8 x half> @llvm.vp.sqrt.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
401 ret <vscale x 8 x half> %v
404 declare <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
406 define <vscale x 16 x half> @vfsqrt_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
407 ; ZVFH-LABEL: vfsqrt_vv_nxv16f16:
409 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
410 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
413 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv16f16:
415 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
416 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
417 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
418 ; ZVFHMIN-NEXT: vfsqrt.v v16, v16, v0.t
419 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
420 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
422 %v = call <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
423 ret <vscale x 16 x half> %v
426 define <vscale x 16 x half> @vfsqrt_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
427 ; ZVFH-LABEL: vfsqrt_vv_nxv16f16_unmasked:
429 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
430 ; ZVFH-NEXT: vfsqrt.v v8, v8
433 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv16f16_unmasked:
435 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
436 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
437 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
438 ; ZVFHMIN-NEXT: vfsqrt.v v16, v16
439 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
440 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
442 %v = call <vscale x 16 x half> @llvm.vp.sqrt.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
443 ret <vscale x 16 x half> %v
446 declare <vscale x 32 x half> @llvm.vp.sqrt.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32)
448 define <vscale x 32 x half> @vfsqrt_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
449 ; ZVFH-LABEL: vfsqrt_vv_nxv32f16:
451 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
452 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
455 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv32f16:
457 ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
458 ; ZVFHMIN-NEXT: vmv1r.v v16, v0
459 ; ZVFHMIN-NEXT: csrr a2, vlenb
460 ; ZVFHMIN-NEXT: slli a1, a2, 1
461 ; ZVFHMIN-NEXT: srli a2, a2, 2
462 ; ZVFHMIN-NEXT: sub a3, a0, a1
463 ; ZVFHMIN-NEXT: sltu a4, a0, a3
464 ; ZVFHMIN-NEXT: addi a4, a4, -1
465 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2
466 ; ZVFHMIN-NEXT: and a3, a4, a3
467 ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma
468 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
469 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
470 ; ZVFHMIN-NEXT: vfsqrt.v v24, v24, v0.t
471 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
472 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t
473 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
474 ; ZVFHMIN-NEXT: # %bb.1:
475 ; ZVFHMIN-NEXT: mv a0, a1
476 ; ZVFHMIN-NEXT: .LBB22_2:
477 ; ZVFHMIN-NEXT: vmv1r.v v0, v16
478 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
479 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
480 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
481 ; ZVFHMIN-NEXT: vfsqrt.v v24, v24, v0.t
482 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
483 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t
485 %v = call <vscale x 32 x half> @llvm.vp.sqrt.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
486 ret <vscale x 32 x half> %v
489 define <vscale x 32 x half> @vfsqrt_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
490 ; ZVFH-LABEL: vfsqrt_vv_nxv32f16_unmasked:
492 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
493 ; ZVFH-NEXT: vfsqrt.v v8, v8
496 ; ZVFHMIN-LABEL: vfsqrt_vv_nxv32f16_unmasked:
498 ; ZVFHMIN-NEXT: csrr a2, vlenb
499 ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma
500 ; ZVFHMIN-NEXT: vmset.m v16
501 ; ZVFHMIN-NEXT: slli a1, a2, 1
502 ; ZVFHMIN-NEXT: srli a2, a2, 2
503 ; ZVFHMIN-NEXT: sub a3, a0, a1
504 ; ZVFHMIN-NEXT: sltu a4, a0, a3
505 ; ZVFHMIN-NEXT: addi a4, a4, -1
506 ; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
507 ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2
508 ; ZVFHMIN-NEXT: and a3, a4, a3
509 ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma
510 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
511 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
512 ; ZVFHMIN-NEXT: vfsqrt.v v16, v16, v0.t
513 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
514 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t
515 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
516 ; ZVFHMIN-NEXT: # %bb.1:
517 ; ZVFHMIN-NEXT: mv a0, a1
518 ; ZVFHMIN-NEXT: .LBB23_2:
519 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
520 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
521 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
522 ; ZVFHMIN-NEXT: vfsqrt.v v16, v16
523 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
524 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
526 %v = call <vscale x 32 x half> @llvm.vp.sqrt.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
527 ret <vscale x 32 x half> %v
530 declare <vscale x 1 x float> @llvm.vp.sqrt.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
532 define <vscale x 1 x float> @vfsqrt_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
533 ; CHECK-LABEL: vfsqrt_vv_nxv1f32:
535 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
536 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
538 %v = call <vscale x 1 x float> @llvm.vp.sqrt.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
539 ret <vscale x 1 x float> %v
542 define <vscale x 1 x float> @vfsqrt_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) {
543 ; CHECK-LABEL: vfsqrt_vv_nxv1f32_unmasked:
545 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
546 ; CHECK-NEXT: vfsqrt.v v8, v8
548 %v = call <vscale x 1 x float> @llvm.vp.sqrt.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
549 ret <vscale x 1 x float> %v
552 declare <vscale x 2 x float> @llvm.vp.sqrt.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
554 define <vscale x 2 x float> @vfsqrt_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
555 ; CHECK-LABEL: vfsqrt_vv_nxv2f32:
557 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
558 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
560 %v = call <vscale x 2 x float> @llvm.vp.sqrt.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
561 ret <vscale x 2 x float> %v
564 define <vscale x 2 x float> @vfsqrt_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) {
565 ; CHECK-LABEL: vfsqrt_vv_nxv2f32_unmasked:
567 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
568 ; CHECK-NEXT: vfsqrt.v v8, v8
570 %v = call <vscale x 2 x float> @llvm.vp.sqrt.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
571 ret <vscale x 2 x float> %v
574 declare <vscale x 4 x float> @llvm.vp.sqrt.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
576 define <vscale x 4 x float> @vfsqrt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
577 ; CHECK-LABEL: vfsqrt_vv_nxv4f32:
579 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
580 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
582 %v = call <vscale x 4 x float> @llvm.vp.sqrt.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
583 ret <vscale x 4 x float> %v
586 define <vscale x 4 x float> @vfsqrt_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) {
587 ; CHECK-LABEL: vfsqrt_vv_nxv4f32_unmasked:
589 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
590 ; CHECK-NEXT: vfsqrt.v v8, v8
592 %v = call <vscale x 4 x float> @llvm.vp.sqrt.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
593 ret <vscale x 4 x float> %v
596 declare <vscale x 8 x float> @llvm.vp.sqrt.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
598 define <vscale x 8 x float> @vfsqrt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
599 ; CHECK-LABEL: vfsqrt_vv_nxv8f32:
601 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
602 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
604 %v = call <vscale x 8 x float> @llvm.vp.sqrt.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
605 ret <vscale x 8 x float> %v
608 define <vscale x 8 x float> @vfsqrt_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) {
609 ; CHECK-LABEL: vfsqrt_vv_nxv8f32_unmasked:
611 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
612 ; CHECK-NEXT: vfsqrt.v v8, v8
614 %v = call <vscale x 8 x float> @llvm.vp.sqrt.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
615 ret <vscale x 8 x float> %v
618 declare <vscale x 16 x float> @llvm.vp.sqrt.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
620 define <vscale x 16 x float> @vfsqrt_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
621 ; CHECK-LABEL: vfsqrt_vv_nxv16f32:
623 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
624 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
626 %v = call <vscale x 16 x float> @llvm.vp.sqrt.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
627 ret <vscale x 16 x float> %v
630 define <vscale x 16 x float> @vfsqrt_vv_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) {
631 ; CHECK-LABEL: vfsqrt_vv_nxv16f32_unmasked:
633 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
634 ; CHECK-NEXT: vfsqrt.v v8, v8
636 %v = call <vscale x 16 x float> @llvm.vp.sqrt.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
637 ret <vscale x 16 x float> %v
640 declare <vscale x 1 x double> @llvm.vp.sqrt.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
642 define <vscale x 1 x double> @vfsqrt_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
643 ; CHECK-LABEL: vfsqrt_vv_nxv1f64:
645 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
646 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
648 %v = call <vscale x 1 x double> @llvm.vp.sqrt.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
649 ret <vscale x 1 x double> %v
652 define <vscale x 1 x double> @vfsqrt_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
653 ; CHECK-LABEL: vfsqrt_vv_nxv1f64_unmasked:
655 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
656 ; CHECK-NEXT: vfsqrt.v v8, v8
658 %v = call <vscale x 1 x double> @llvm.vp.sqrt.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
659 ret <vscale x 1 x double> %v
662 declare <vscale x 2 x double> @llvm.vp.sqrt.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
664 define <vscale x 2 x double> @vfsqrt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
665 ; CHECK-LABEL: vfsqrt_vv_nxv2f64:
667 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
668 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
670 %v = call <vscale x 2 x double> @llvm.vp.sqrt.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
671 ret <vscale x 2 x double> %v
674 define <vscale x 2 x double> @vfsqrt_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
675 ; CHECK-LABEL: vfsqrt_vv_nxv2f64_unmasked:
677 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
678 ; CHECK-NEXT: vfsqrt.v v8, v8
680 %v = call <vscale x 2 x double> @llvm.vp.sqrt.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
681 ret <vscale x 2 x double> %v
684 declare <vscale x 4 x double> @llvm.vp.sqrt.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
686 define <vscale x 4 x double> @vfsqrt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
687 ; CHECK-LABEL: vfsqrt_vv_nxv4f64:
689 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
690 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
692 %v = call <vscale x 4 x double> @llvm.vp.sqrt.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
693 ret <vscale x 4 x double> %v
696 define <vscale x 4 x double> @vfsqrt_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
697 ; CHECK-LABEL: vfsqrt_vv_nxv4f64_unmasked:
699 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
700 ; CHECK-NEXT: vfsqrt.v v8, v8
702 %v = call <vscale x 4 x double> @llvm.vp.sqrt.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
703 ret <vscale x 4 x double> %v
706 declare <vscale x 7 x double> @llvm.vp.sqrt.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
708 define <vscale x 7 x double> @vfsqrt_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
709 ; CHECK-LABEL: vfsqrt_vv_nxv7f64:
711 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
712 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
714 %v = call <vscale x 7 x double> @llvm.vp.sqrt.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
715 ret <vscale x 7 x double> %v
718 define <vscale x 7 x double> @vfsqrt_vv_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
719 ; CHECK-LABEL: vfsqrt_vv_nxv7f64_unmasked:
721 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
722 ; CHECK-NEXT: vfsqrt.v v8, v8
724 %v = call <vscale x 7 x double> @llvm.vp.sqrt.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl)
725 ret <vscale x 7 x double> %v
728 declare <vscale x 8 x double> @llvm.vp.sqrt.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
730 define <vscale x 8 x double> @vfsqrt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
731 ; CHECK-LABEL: vfsqrt_vv_nxv8f64:
733 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
734 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
736 %v = call <vscale x 8 x double> @llvm.vp.sqrt.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
737 ret <vscale x 8 x double> %v
740 define <vscale x 8 x double> @vfsqrt_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
741 ; CHECK-LABEL: vfsqrt_vv_nxv8f64_unmasked:
743 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
744 ; CHECK-NEXT: vfsqrt.v v8, v8
746 %v = call <vscale x 8 x double> @llvm.vp.sqrt.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
747 ret <vscale x 8 x double> %v
751 declare <vscale x 16 x double> @llvm.vp.sqrt.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
753 define <vscale x 16 x double> @vfsqrt_vv_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
754 ; CHECK-LABEL: vfsqrt_vv_nxv16f64:
756 ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
757 ; CHECK-NEXT: vmv1r.v v24, v0
758 ; CHECK-NEXT: csrr a1, vlenb
759 ; CHECK-NEXT: srli a2, a1, 3
760 ; CHECK-NEXT: sub a3, a0, a1
761 ; CHECK-NEXT: vslidedown.vx v0, v0, a2
762 ; CHECK-NEXT: sltu a2, a0, a3
763 ; CHECK-NEXT: addi a2, a2, -1
764 ; CHECK-NEXT: and a2, a2, a3
765 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
766 ; CHECK-NEXT: vfsqrt.v v16, v16, v0.t
767 ; CHECK-NEXT: bltu a0, a1, .LBB44_2
768 ; CHECK-NEXT: # %bb.1:
769 ; CHECK-NEXT: mv a0, a1
770 ; CHECK-NEXT: .LBB44_2:
771 ; CHECK-NEXT: vmv1r.v v0, v24
772 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
773 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
775 %v = call <vscale x 16 x double> @llvm.vp.sqrt.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
776 ret <vscale x 16 x double> %v
779 define <vscale x 16 x double> @vfsqrt_vv_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
780 ; CHECK-LABEL: vfsqrt_vv_nxv16f64_unmasked:
782 ; CHECK-NEXT: csrr a1, vlenb
783 ; CHECK-NEXT: sub a2, a0, a1
784 ; CHECK-NEXT: sltu a3, a0, a2
785 ; CHECK-NEXT: addi a3, a3, -1
786 ; CHECK-NEXT: and a2, a3, a2
787 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
788 ; CHECK-NEXT: vfsqrt.v v16, v16
789 ; CHECK-NEXT: bltu a0, a1, .LBB45_2
790 ; CHECK-NEXT: # %bb.1:
791 ; CHECK-NEXT: mv a0, a1
792 ; CHECK-NEXT: .LBB45_2:
793 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
794 ; CHECK-NEXT: vfsqrt.v v8, v8
796 %v = call <vscale x 16 x double> @llvm.vp.sqrt.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
797 ret <vscale x 16 x double> %v