1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 declare <2 x half> @llvm.vp.sqrt.v2f16(<2 x half>, <2 x i1>, i32)
13 define <2 x half> @vfsqrt_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
14 ; ZVFH-LABEL: vfsqrt_vv_v2f16:
16 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
17 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
20 ; ZVFHMIN-LABEL: vfsqrt_vv_v2f16:
22 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
23 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
25 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t
26 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
27 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
29 %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
33 define <2 x half> @vfsqrt_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
34 ; ZVFH-LABEL: vfsqrt_vv_v2f16_unmasked:
36 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
37 ; ZVFH-NEXT: vfsqrt.v v8, v8
40 ; ZVFHMIN-LABEL: vfsqrt_vv_v2f16_unmasked:
42 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
43 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
44 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
45 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9
46 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
47 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
49 %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl)
53 declare <4 x half> @llvm.vp.sqrt.v4f16(<4 x half>, <4 x i1>, i32)
55 define <4 x half> @vfsqrt_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
56 ; ZVFH-LABEL: vfsqrt_vv_v4f16:
58 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
59 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
62 ; ZVFHMIN-LABEL: vfsqrt_vv_v4f16:
64 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
65 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
66 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
67 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9, v0.t
68 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
69 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
71 %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
75 define <4 x half> @vfsqrt_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
76 ; ZVFH-LABEL: vfsqrt_vv_v4f16_unmasked:
78 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
79 ; ZVFH-NEXT: vfsqrt.v v8, v8
82 ; ZVFHMIN-LABEL: vfsqrt_vv_v4f16_unmasked:
84 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
85 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
86 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
87 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9
88 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
89 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
91 %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
95 declare <8 x half> @llvm.vp.sqrt.v8f16(<8 x half>, <8 x i1>, i32)
97 define <8 x half> @vfsqrt_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
98 ; ZVFH-LABEL: vfsqrt_vv_v8f16:
100 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
101 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
104 ; ZVFHMIN-LABEL: vfsqrt_vv_v8f16:
106 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
107 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
108 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
109 ; ZVFHMIN-NEXT: vfsqrt.v v10, v10, v0.t
110 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
111 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
113 %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
117 define <8 x half> @vfsqrt_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
118 ; ZVFH-LABEL: vfsqrt_vv_v8f16_unmasked:
120 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
121 ; ZVFH-NEXT: vfsqrt.v v8, v8
124 ; ZVFHMIN-LABEL: vfsqrt_vv_v8f16_unmasked:
126 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
127 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
128 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
129 ; ZVFHMIN-NEXT: vfsqrt.v v10, v10
130 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
131 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
133 %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl)
137 declare <16 x half> @llvm.vp.sqrt.v16f16(<16 x half>, <16 x i1>, i32)
139 define <16 x half> @vfsqrt_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
140 ; ZVFH-LABEL: vfsqrt_vv_v16f16:
142 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
143 ; ZVFH-NEXT: vfsqrt.v v8, v8, v0.t
146 ; ZVFHMIN-LABEL: vfsqrt_vv_v16f16:
148 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
149 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
150 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
151 ; ZVFHMIN-NEXT: vfsqrt.v v12, v12, v0.t
152 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
153 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
155 %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
159 define <16 x half> @vfsqrt_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
160 ; ZVFH-LABEL: vfsqrt_vv_v16f16_unmasked:
162 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
163 ; ZVFH-NEXT: vfsqrt.v v8, v8
166 ; ZVFHMIN-LABEL: vfsqrt_vv_v16f16_unmasked:
168 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
169 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
170 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
171 ; ZVFHMIN-NEXT: vfsqrt.v v12, v12
172 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
173 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
175 %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl)
179 declare <2 x float> @llvm.vp.sqrt.v2f32(<2 x float>, <2 x i1>, i32)
181 define <2 x float> @vfsqrt_vv_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %evl) {
182 ; CHECK-LABEL: vfsqrt_vv_v2f32:
184 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
185 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
187 %v = call <2 x float> @llvm.vp.sqrt.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
191 define <2 x float> @vfsqrt_vv_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) {
192 ; CHECK-LABEL: vfsqrt_vv_v2f32_unmasked:
194 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
195 ; CHECK-NEXT: vfsqrt.v v8, v8
197 %v = call <2 x float> @llvm.vp.sqrt.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl)
201 declare <4 x float> @llvm.vp.sqrt.v4f32(<4 x float>, <4 x i1>, i32)
203 define <4 x float> @vfsqrt_vv_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl) {
204 ; CHECK-LABEL: vfsqrt_vv_v4f32:
206 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
207 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
209 %v = call <4 x float> @llvm.vp.sqrt.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
213 define <4 x float> @vfsqrt_vv_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) {
214 ; CHECK-LABEL: vfsqrt_vv_v4f32_unmasked:
216 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
217 ; CHECK-NEXT: vfsqrt.v v8, v8
219 %v = call <4 x float> @llvm.vp.sqrt.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl)
223 declare <8 x float> @llvm.vp.sqrt.v8f32(<8 x float>, <8 x i1>, i32)
225 define <8 x float> @vfsqrt_vv_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
226 ; CHECK-LABEL: vfsqrt_vv_v8f32:
228 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
229 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
231 %v = call <8 x float> @llvm.vp.sqrt.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
235 define <8 x float> @vfsqrt_vv_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) {
236 ; CHECK-LABEL: vfsqrt_vv_v8f32_unmasked:
238 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
239 ; CHECK-NEXT: vfsqrt.v v8, v8
241 %v = call <8 x float> @llvm.vp.sqrt.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl)
245 declare <16 x float> @llvm.vp.sqrt.v16f32(<16 x float>, <16 x i1>, i32)
247 define <16 x float> @vfsqrt_vv_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
248 ; CHECK-LABEL: vfsqrt_vv_v16f32:
250 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
251 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
253 %v = call <16 x float> @llvm.vp.sqrt.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
257 define <16 x float> @vfsqrt_vv_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) {
258 ; CHECK-LABEL: vfsqrt_vv_v16f32_unmasked:
260 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
261 ; CHECK-NEXT: vfsqrt.v v8, v8
263 %v = call <16 x float> @llvm.vp.sqrt.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl)
267 declare <2 x double> @llvm.vp.sqrt.v2f64(<2 x double>, <2 x i1>, i32)
269 define <2 x double> @vfsqrt_vv_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
270 ; CHECK-LABEL: vfsqrt_vv_v2f64:
272 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
273 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
275 %v = call <2 x double> @llvm.vp.sqrt.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
279 define <2 x double> @vfsqrt_vv_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
280 ; CHECK-LABEL: vfsqrt_vv_v2f64_unmasked:
282 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
283 ; CHECK-NEXT: vfsqrt.v v8, v8
285 %v = call <2 x double> @llvm.vp.sqrt.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl)
289 declare <4 x double> @llvm.vp.sqrt.v4f64(<4 x double>, <4 x i1>, i32)
291 define <4 x double> @vfsqrt_vv_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
292 ; CHECK-LABEL: vfsqrt_vv_v4f64:
294 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
295 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
297 %v = call <4 x double> @llvm.vp.sqrt.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
301 define <4 x double> @vfsqrt_vv_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
302 ; CHECK-LABEL: vfsqrt_vv_v4f64_unmasked:
304 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
305 ; CHECK-NEXT: vfsqrt.v v8, v8
307 %v = call <4 x double> @llvm.vp.sqrt.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl)
311 declare <8 x double> @llvm.vp.sqrt.v8f64(<8 x double>, <8 x i1>, i32)
313 define <8 x double> @vfsqrt_vv_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
314 ; CHECK-LABEL: vfsqrt_vv_v8f64:
316 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
317 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
319 %v = call <8 x double> @llvm.vp.sqrt.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
323 define <8 x double> @vfsqrt_vv_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
324 ; CHECK-LABEL: vfsqrt_vv_v8f64_unmasked:
326 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
327 ; CHECK-NEXT: vfsqrt.v v8, v8
329 %v = call <8 x double> @llvm.vp.sqrt.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl)
333 declare <15 x double> @llvm.vp.sqrt.v15f64(<15 x double>, <15 x i1>, i32)
335 define <15 x double> @vfsqrt_vv_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
336 ; CHECK-LABEL: vfsqrt_vv_v15f64:
338 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
339 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
341 %v = call <15 x double> @llvm.vp.sqrt.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
345 define <15 x double> @vfsqrt_vv_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
346 ; CHECK-LABEL: vfsqrt_vv_v15f64_unmasked:
348 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
349 ; CHECK-NEXT: vfsqrt.v v8, v8
351 %v = call <15 x double> @llvm.vp.sqrt.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl)
355 declare <16 x double> @llvm.vp.sqrt.v16f64(<16 x double>, <16 x i1>, i32)
357 define <16 x double> @vfsqrt_vv_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
358 ; CHECK-LABEL: vfsqrt_vv_v16f64:
360 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
361 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
363 %v = call <16 x double> @llvm.vp.sqrt.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
367 define <16 x double> @vfsqrt_vv_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
368 ; CHECK-LABEL: vfsqrt_vv_v16f64_unmasked:
370 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
371 ; CHECK-NEXT: vfsqrt.v v8, v8
373 %v = call <16 x double> @llvm.vp.sqrt.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl)
377 declare <32 x double> @llvm.vp.sqrt.v32f64(<32 x double>, <32 x i1>, i32)
379 define <32 x double> @vfsqrt_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
380 ; CHECK-LABEL: vfsqrt_vv_v32f64:
382 ; CHECK-NEXT: li a2, 16
383 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
384 ; CHECK-NEXT: vslidedown.vi v24, v0, 2
385 ; CHECK-NEXT: mv a1, a0
386 ; CHECK-NEXT: bltu a0, a2, .LBB26_2
387 ; CHECK-NEXT: # %bb.1:
388 ; CHECK-NEXT: li a1, 16
389 ; CHECK-NEXT: .LBB26_2:
390 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
391 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
392 ; CHECK-NEXT: addi a1, a0, -16
393 ; CHECK-NEXT: sltu a0, a0, a1
394 ; CHECK-NEXT: addi a0, a0, -1
395 ; CHECK-NEXT: and a0, a0, a1
396 ; CHECK-NEXT: vmv1r.v v0, v24
397 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
398 ; CHECK-NEXT: vfsqrt.v v16, v16, v0.t
400 %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
404 define <32 x double> @vfsqrt_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
405 ; CHECK-LABEL: vfsqrt_vv_v32f64_unmasked:
407 ; CHECK-NEXT: li a2, 16
408 ; CHECK-NEXT: mv a1, a0
409 ; CHECK-NEXT: bltu a0, a2, .LBB27_2
410 ; CHECK-NEXT: # %bb.1:
411 ; CHECK-NEXT: li a1, 16
412 ; CHECK-NEXT: .LBB27_2:
413 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
414 ; CHECK-NEXT: vfsqrt.v v8, v8
415 ; CHECK-NEXT: addi a1, a0, -16
416 ; CHECK-NEXT: sltu a0, a0, a1
417 ; CHECK-NEXT: addi a0, a0, -1
418 ; CHECK-NEXT: and a0, a0, a1
419 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
420 ; CHECK-NEXT: vfsqrt.v v16, v16
422 %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl)