1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 declare <vscale x 1 x half> @llvm.sqrt.nxv1f16(<vscale x 1 x half>)
13 define <vscale x 1 x half> @vfsqrt_nxv1f16(<vscale x 1 x half> %v) {
14 ; ZVFH-LABEL: vfsqrt_nxv1f16:
16 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
17 ; ZVFH-NEXT: vfsqrt.v v8, v8
20 ; ZVFHMIN-LABEL: vfsqrt_nxv1f16:
22 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
23 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
24 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
25 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9
26 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
27 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
29 %r = call <vscale x 1 x half> @llvm.sqrt.nxv1f16(<vscale x 1 x half> %v)
30 ret <vscale x 1 x half> %r
33 declare <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half>)
35 define <vscale x 2 x half> @vfsqrt_nxv2f16(<vscale x 2 x half> %v) {
36 ; ZVFH-LABEL: vfsqrt_nxv2f16:
38 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
39 ; ZVFH-NEXT: vfsqrt.v v8, v8
42 ; ZVFHMIN-LABEL: vfsqrt_nxv2f16:
44 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
45 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
46 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
47 ; ZVFHMIN-NEXT: vfsqrt.v v9, v9
48 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
49 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
51 %r = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> %v)
52 ret <vscale x 2 x half> %r
55 declare <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half>)
57 define <vscale x 4 x half> @vfsqrt_nxv4f16(<vscale x 4 x half> %v) {
58 ; ZVFH-LABEL: vfsqrt_nxv4f16:
60 ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
61 ; ZVFH-NEXT: vfsqrt.v v8, v8
64 ; ZVFHMIN-LABEL: vfsqrt_nxv4f16:
66 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
67 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
68 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
69 ; ZVFHMIN-NEXT: vfsqrt.v v10, v10
70 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
71 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
73 %r = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> %v)
74 ret <vscale x 4 x half> %r
77 declare <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half>)
79 define <vscale x 8 x half> @vfsqrt_nxv8f16(<vscale x 8 x half> %v) {
80 ; ZVFH-LABEL: vfsqrt_nxv8f16:
82 ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
83 ; ZVFH-NEXT: vfsqrt.v v8, v8
86 ; ZVFHMIN-LABEL: vfsqrt_nxv8f16:
88 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
89 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
90 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
91 ; ZVFHMIN-NEXT: vfsqrt.v v12, v12
92 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
93 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
95 %r = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> %v)
96 ret <vscale x 8 x half> %r
99 declare <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half>)
101 define <vscale x 16 x half> @vfsqrt_nxv16f16(<vscale x 16 x half> %v) {
102 ; ZVFH-LABEL: vfsqrt_nxv16f16:
104 ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
105 ; ZVFH-NEXT: vfsqrt.v v8, v8
108 ; ZVFHMIN-LABEL: vfsqrt_nxv16f16:
110 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
111 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
112 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
113 ; ZVFHMIN-NEXT: vfsqrt.v v16, v16
114 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
115 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
117 %r = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> %v)
118 ret <vscale x 16 x half> %r
121 declare <vscale x 32 x half> @llvm.sqrt.nxv32f16(<vscale x 32 x half>)
123 define <vscale x 32 x half> @vfsqrt_nxv32f16(<vscale x 32 x half> %v) {
124 ; ZVFH-LABEL: vfsqrt_nxv32f16:
126 ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
127 ; ZVFH-NEXT: vfsqrt.v v8, v8
130 ; ZVFHMIN-LABEL: vfsqrt_nxv32f16:
132 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
133 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
134 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
135 ; ZVFHMIN-NEXT: vfsqrt.v v16, v16
136 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
137 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
138 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
139 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
140 ; ZVFHMIN-NEXT: vfsqrt.v v16, v16
141 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
142 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
144 %r = call <vscale x 32 x half> @llvm.sqrt.nxv32f16(<vscale x 32 x half> %v)
145 ret <vscale x 32 x half> %r
148 declare <vscale x 1 x float> @llvm.sqrt.nxv1f32(<vscale x 1 x float>)
150 define <vscale x 1 x float> @vfsqrt_nxv1f32(<vscale x 1 x float> %v) {
151 ; CHECK-LABEL: vfsqrt_nxv1f32:
153 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
154 ; CHECK-NEXT: vfsqrt.v v8, v8
156 %r = call <vscale x 1 x float> @llvm.sqrt.nxv1f32(<vscale x 1 x float> %v)
157 ret <vscale x 1 x float> %r
160 declare <vscale x 2 x float> @llvm.sqrt.nxv2f32(<vscale x 2 x float>)
162 define <vscale x 2 x float> @vfsqrt_nxv2f32(<vscale x 2 x float> %v) {
163 ; CHECK-LABEL: vfsqrt_nxv2f32:
165 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
166 ; CHECK-NEXT: vfsqrt.v v8, v8
168 %r = call <vscale x 2 x float> @llvm.sqrt.nxv2f32(<vscale x 2 x float> %v)
169 ret <vscale x 2 x float> %r
172 declare <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float>)
174 define <vscale x 4 x float> @vfsqrt_nxv4f32(<vscale x 4 x float> %v) {
175 ; CHECK-LABEL: vfsqrt_nxv4f32:
177 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
178 ; CHECK-NEXT: vfsqrt.v v8, v8
180 %r = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> %v)
181 ret <vscale x 4 x float> %r
184 declare <vscale x 8 x float> @llvm.sqrt.nxv8f32(<vscale x 8 x float>)
186 define <vscale x 8 x float> @vfsqrt_nxv8f32(<vscale x 8 x float> %v) {
187 ; CHECK-LABEL: vfsqrt_nxv8f32:
189 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
190 ; CHECK-NEXT: vfsqrt.v v8, v8
192 %r = call <vscale x 8 x float> @llvm.sqrt.nxv8f32(<vscale x 8 x float> %v)
193 ret <vscale x 8 x float> %r
196 declare <vscale x 16 x float> @llvm.sqrt.nxv16f32(<vscale x 16 x float>)
198 define <vscale x 16 x float> @vfsqrt_nxv16f32(<vscale x 16 x float> %v) {
199 ; CHECK-LABEL: vfsqrt_nxv16f32:
201 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
202 ; CHECK-NEXT: vfsqrt.v v8, v8
204 %r = call <vscale x 16 x float> @llvm.sqrt.nxv16f32(<vscale x 16 x float> %v)
205 ret <vscale x 16 x float> %r
208 declare <vscale x 1 x double> @llvm.sqrt.nxv1f64(<vscale x 1 x double>)
210 define <vscale x 1 x double> @vfsqrt_nxv1f64(<vscale x 1 x double> %v) {
211 ; CHECK-LABEL: vfsqrt_nxv1f64:
213 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
214 ; CHECK-NEXT: vfsqrt.v v8, v8
216 %r = call <vscale x 1 x double> @llvm.sqrt.nxv1f64(<vscale x 1 x double> %v)
217 ret <vscale x 1 x double> %r
220 declare <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double>)
222 define <vscale x 2 x double> @vfsqrt_nxv2f64(<vscale x 2 x double> %v) {
223 ; CHECK-LABEL: vfsqrt_nxv2f64:
225 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
226 ; CHECK-NEXT: vfsqrt.v v8, v8
228 %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %v)
229 ret <vscale x 2 x double> %r
232 declare <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double>)
234 define <vscale x 4 x double> @vfsqrt_nxv4f64(<vscale x 4 x double> %v) {
235 ; CHECK-LABEL: vfsqrt_nxv4f64:
237 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
238 ; CHECK-NEXT: vfsqrt.v v8, v8
240 %r = call <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double> %v)
241 ret <vscale x 4 x double> %r
244 declare <vscale x 8 x double> @llvm.sqrt.nxv8f64(<vscale x 8 x double>)
246 define <vscale x 8 x double> @vfsqrt_nxv8f64(<vscale x 8 x double> %v) {
247 ; CHECK-LABEL: vfsqrt_nxv8f64:
249 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
250 ; CHECK-NEXT: vfsqrt.v v8, v8
252 %r = call <vscale x 8 x double> @llvm.sqrt.nxv8f64(<vscale x 8 x double> %v)
253 ret <vscale x 8 x double> %r