1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
3 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
4 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
5 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
7 define void @fpext_v2f16_v2f32(ptr %x, ptr %y) {
8 ; CHECK-LABEL: fpext_v2f16_v2f32:
10 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
11 ; CHECK-NEXT: vle16.v v8, (a0)
12 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8
13 ; CHECK-NEXT: vse32.v v9, (a1)
15 %a = load <2 x half>, ptr %x
16 %d = fpext <2 x half> %a to <2 x float>
17 store <2 x float> %d, ptr %y
21 define void @fpext_v2f16_v2f64(ptr %x, ptr %y) {
22 ; CHECK-LABEL: fpext_v2f16_v2f64:
24 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
25 ; CHECK-NEXT: vle16.v v8, (a0)
26 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8
27 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
28 ; CHECK-NEXT: vfwcvt.f.f.v v8, v9
29 ; CHECK-NEXT: vse64.v v8, (a1)
31 %a = load <2 x half>, ptr %x
32 %d = fpext <2 x half> %a to <2 x double>
33 store <2 x double> %d, ptr %y
37 define void @fpext_v8f16_v8f32(ptr %x, ptr %y) {
38 ; LMULMAX8-LABEL: fpext_v8f16_v8f32:
40 ; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma
41 ; LMULMAX8-NEXT: vle16.v v8, (a0)
42 ; LMULMAX8-NEXT: vfwcvt.f.f.v v10, v8
43 ; LMULMAX8-NEXT: vse32.v v10, (a1)
46 ; LMULMAX1-LABEL: fpext_v8f16_v8f32:
48 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
49 ; LMULMAX1-NEXT: vle16.v v8, (a0)
50 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
51 ; LMULMAX1-NEXT: vfwcvt.f.f.v v9, v8
52 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma
53 ; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4
54 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
55 ; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v8
56 ; LMULMAX1-NEXT: addi a0, a1, 16
57 ; LMULMAX1-NEXT: vse32.v v10, (a0)
58 ; LMULMAX1-NEXT: vse32.v v9, (a1)
60 %a = load <8 x half>, ptr %x
61 %d = fpext <8 x half> %a to <8 x float>
62 store <8 x float> %d, ptr %y
66 define void @fpext_v8f16_v8f64(ptr %x, ptr %y) {
67 ; LMULMAX8-LABEL: fpext_v8f16_v8f64:
69 ; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma
70 ; LMULMAX8-NEXT: vle16.v v8, (a0)
71 ; LMULMAX8-NEXT: vfwcvt.f.f.v v10, v8
72 ; LMULMAX8-NEXT: vsetvli zero, zero, e32, m2, ta, ma
73 ; LMULMAX8-NEXT: vfwcvt.f.f.v v12, v10
74 ; LMULMAX8-NEXT: vse64.v v12, (a1)
77 ; LMULMAX1-LABEL: fpext_v8f16_v8f64:
79 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
80 ; LMULMAX1-NEXT: vle16.v v8, (a0)
81 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
82 ; LMULMAX1-NEXT: vslidedown.vi v9, v8, 2
83 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
84 ; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v9
85 ; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
86 ; LMULMAX1-NEXT: vfwcvt.f.f.v v9, v10
87 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
88 ; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v8
89 ; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
90 ; LMULMAX1-NEXT: vfwcvt.f.f.v v11, v10
91 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, ta, ma
92 ; LMULMAX1-NEXT: vslidedown.vi v8, v8, 4
93 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
94 ; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v8
95 ; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
96 ; LMULMAX1-NEXT: vfwcvt.f.f.v v12, v10
97 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
98 ; LMULMAX1-NEXT: vslidedown.vi v8, v8, 2
99 ; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
100 ; LMULMAX1-NEXT: vfwcvt.f.f.v v10, v8
101 ; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
102 ; LMULMAX1-NEXT: vfwcvt.f.f.v v8, v10
103 ; LMULMAX1-NEXT: addi a0, a1, 48
104 ; LMULMAX1-NEXT: vse64.v v8, (a0)
105 ; LMULMAX1-NEXT: addi a0, a1, 32
106 ; LMULMAX1-NEXT: vse64.v v12, (a0)
107 ; LMULMAX1-NEXT: vse64.v v11, (a1)
108 ; LMULMAX1-NEXT: addi a1, a1, 16
109 ; LMULMAX1-NEXT: vse64.v v9, (a1)
111 %a = load <8 x half>, ptr %x
112 %d = fpext <8 x half> %a to <8 x double>
113 store <8 x double> %d, ptr %y
117 define void @fpround_v2f32_v2f16(ptr %x, ptr %y) {
118 ; CHECK-LABEL: fpround_v2f32_v2f16:
120 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
121 ; CHECK-NEXT: vle32.v v8, (a0)
122 ; CHECK-NEXT: vfncvt.f.f.w v9, v8
123 ; CHECK-NEXT: vse16.v v9, (a1)
125 %a = load <2 x float>, ptr %x
126 %d = fptrunc <2 x float> %a to <2 x half>
127 store <2 x half> %d, ptr %y
131 define void @fpround_v2f64_v2f16(ptr %x, ptr %y) {
132 ; CHECK-LABEL: fpround_v2f64_v2f16:
134 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
135 ; CHECK-NEXT: vle64.v v8, (a0)
136 ; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8
137 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
138 ; CHECK-NEXT: vfncvt.f.f.w v8, v9
139 ; CHECK-NEXT: vse16.v v8, (a1)
141 %a = load <2 x double>, ptr %x
142 %d = fptrunc <2 x double> %a to <2 x half>
143 store <2 x half> %d, ptr %y
147 define void @fpround_v8f32_v8f16(ptr %x, ptr %y) {
148 ; LMULMAX8-LABEL: fpround_v8f32_v8f16:
150 ; LMULMAX8-NEXT: vsetivli zero, 8, e16, m1, ta, ma
151 ; LMULMAX8-NEXT: vle32.v v8, (a0)
152 ; LMULMAX8-NEXT: vfncvt.f.f.w v10, v8
153 ; LMULMAX8-NEXT: vse16.v v10, (a1)
156 ; LMULMAX1-LABEL: fpround_v8f32_v8f16:
158 ; LMULMAX1-NEXT: addi a2, a0, 16
159 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
160 ; LMULMAX1-NEXT: vle32.v v8, (a0)
161 ; LMULMAX1-NEXT: vle32.v v9, (a2)
162 ; LMULMAX1-NEXT: vfncvt.f.f.w v10, v8
163 ; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9
164 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
165 ; LMULMAX1-NEXT: vslideup.vi v10, v8, 4
166 ; LMULMAX1-NEXT: vse16.v v10, (a1)
168 %a = load <8 x float>, ptr %x
169 %d = fptrunc <8 x float> %a to <8 x half>
170 store <8 x half> %d, ptr %y
174 define void @fpround_v8f64_v8f16(ptr %x, ptr %y) {
175 ; LMULMAX8-LABEL: fpround_v8f64_v8f16:
177 ; LMULMAX8-NEXT: vsetivli zero, 8, e32, m2, ta, ma
178 ; LMULMAX8-NEXT: vle64.v v8, (a0)
179 ; LMULMAX8-NEXT: vfncvt.rod.f.f.w v12, v8
180 ; LMULMAX8-NEXT: vsetvli zero, zero, e16, m1, ta, ma
181 ; LMULMAX8-NEXT: vfncvt.f.f.w v8, v12
182 ; LMULMAX8-NEXT: vse16.v v8, (a1)
185 ; LMULMAX1-LABEL: fpround_v8f64_v8f16:
187 ; LMULMAX1-NEXT: addi a2, a0, 48
188 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
189 ; LMULMAX1-NEXT: vle64.v v8, (a2)
190 ; LMULMAX1-NEXT: addi a2, a0, 32
191 ; LMULMAX1-NEXT: vle64.v v9, (a0)
192 ; LMULMAX1-NEXT: vle64.v v10, (a2)
193 ; LMULMAX1-NEXT: addi a0, a0, 16
194 ; LMULMAX1-NEXT: vle64.v v11, (a0)
195 ; LMULMAX1-NEXT: vfncvt.rod.f.f.w v12, v9
196 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
197 ; LMULMAX1-NEXT: vfncvt.f.f.w v9, v12
198 ; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
199 ; LMULMAX1-NEXT: vfncvt.rod.f.f.w v12, v11
200 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
201 ; LMULMAX1-NEXT: vfncvt.f.f.w v11, v12
202 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, ma
203 ; LMULMAX1-NEXT: vslideup.vi v9, v11, 2
204 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
205 ; LMULMAX1-NEXT: vfncvt.rod.f.f.w v11, v10
206 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
207 ; LMULMAX1-NEXT: vfncvt.f.f.w v10, v11
208 ; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, ma
209 ; LMULMAX1-NEXT: vslideup.vi v9, v10, 4
210 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
211 ; LMULMAX1-NEXT: vfncvt.rod.f.f.w v10, v8
212 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
213 ; LMULMAX1-NEXT: vfncvt.f.f.w v8, v10
214 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma
215 ; LMULMAX1-NEXT: vslideup.vi v9, v8, 6
216 ; LMULMAX1-NEXT: vse16.v v9, (a1)
218 %a = load <8 x double>, ptr %x
219 %d = fptrunc <8 x double> %a to <8 x half>
220 store <8 x half> %d, ptr %y