1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING,RV32
3 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING,RV32
4 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,RV32
5 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING,RV64
6 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING,RV64
7 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,RV64
8 ; Check that the default value enables the web folding and
9 ; that it is bigger than 3.
10 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING,RV32
11 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING,RV64
13 ; Check that the scalable vector add/sub/mul operations are all promoted into their
14 ; vw counterpart when the folding of the web size is increased to 3.
15 ; We need the web size to be at least 3 for the folding to happen, because
17 ; see https://github.com/llvm/llvm-project/pull/72340
19 define <vscale x 2 x i16> @vwop_vscale_sext_i8i16_multiple_users(ptr %x, ptr %y, ptr %z) {
20 ; NO_FOLDING-LABEL: vwop_vscale_sext_i8i16_multiple_users:
21 ; NO_FOLDING: # %bb.0:
22 ; NO_FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
23 ; NO_FOLDING-NEXT: vle8.v v8, (a0)
24 ; NO_FOLDING-NEXT: vle8.v v9, (a1)
25 ; NO_FOLDING-NEXT: vle8.v v10, (a2)
26 ; NO_FOLDING-NEXT: vsext.vf2 v11, v8
27 ; NO_FOLDING-NEXT: vsext.vf2 v8, v9
28 ; NO_FOLDING-NEXT: vsext.vf2 v9, v10
29 ; NO_FOLDING-NEXT: vmul.vv v8, v11, v8
30 ; NO_FOLDING-NEXT: vadd.vv v10, v11, v9
31 ; NO_FOLDING-NEXT: vsub.vv v9, v11, v9
32 ; NO_FOLDING-NEXT: vor.vv v8, v8, v10
33 ; NO_FOLDING-NEXT: vor.vv v8, v8, v9
34 ; NO_FOLDING-NEXT: ret
36 ; FOLDING-LABEL: vwop_vscale_sext_i8i16_multiple_users:
38 ; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
39 ; FOLDING-NEXT: vle8.v v8, (a0)
40 ; FOLDING-NEXT: vle8.v v9, (a1)
41 ; FOLDING-NEXT: vle8.v v10, (a2)
42 ; FOLDING-NEXT: vwmul.vv v11, v8, v9
43 ; FOLDING-NEXT: vwadd.vv v9, v8, v10
44 ; FOLDING-NEXT: vwsub.vv v12, v8, v10
45 ; FOLDING-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
46 ; FOLDING-NEXT: vor.vv v8, v11, v9
47 ; FOLDING-NEXT: vor.vv v8, v8, v12
49 %a = load <vscale x 2 x i8>, ptr %x
50 %b = load <vscale x 2 x i8>, ptr %y
51 %b2 = load <vscale x 2 x i8>, ptr %z
52 %c = sext <vscale x 2 x i8> %a to <vscale x 2 x i16>
53 %d = sext <vscale x 2 x i8> %b to <vscale x 2 x i16>
54 %d2 = sext <vscale x 2 x i8> %b2 to <vscale x 2 x i16>
55 %e = mul <vscale x 2 x i16> %c, %d
56 %f = add <vscale x 2 x i16> %c, %d2
57 %g = sub <vscale x 2 x i16> %c, %d2
58 %h = or <vscale x 2 x i16> %e, %f
59 %i = or <vscale x 2 x i16> %h, %g
60 ret <vscale x 2 x i16> %i
63 define <vscale x 2 x i32> @vwop_vscale_sext_i16i32_multiple_users(ptr %x, ptr %y, ptr %z) {
64 ; NO_FOLDING-LABEL: vwop_vscale_sext_i16i32_multiple_users:
65 ; NO_FOLDING: # %bb.0:
66 ; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma
67 ; NO_FOLDING-NEXT: vle16.v v8, (a0)
68 ; NO_FOLDING-NEXT: vle16.v v9, (a1)
69 ; NO_FOLDING-NEXT: vle16.v v10, (a2)
70 ; NO_FOLDING-NEXT: vsext.vf2 v11, v8
71 ; NO_FOLDING-NEXT: vsext.vf2 v8, v9
72 ; NO_FOLDING-NEXT: vsext.vf2 v9, v10
73 ; NO_FOLDING-NEXT: vmul.vv v8, v11, v8
74 ; NO_FOLDING-NEXT: vadd.vv v10, v11, v9
75 ; NO_FOLDING-NEXT: vsub.vv v9, v11, v9
76 ; NO_FOLDING-NEXT: vor.vv v8, v8, v10
77 ; NO_FOLDING-NEXT: vor.vv v8, v8, v9
78 ; NO_FOLDING-NEXT: ret
80 ; FOLDING-LABEL: vwop_vscale_sext_i16i32_multiple_users:
82 ; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
83 ; FOLDING-NEXT: vle16.v v8, (a0)
84 ; FOLDING-NEXT: vle16.v v9, (a1)
85 ; FOLDING-NEXT: vle16.v v10, (a2)
86 ; FOLDING-NEXT: vwmul.vv v11, v8, v9
87 ; FOLDING-NEXT: vwadd.vv v9, v8, v10
88 ; FOLDING-NEXT: vwsub.vv v12, v8, v10
89 ; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma
90 ; FOLDING-NEXT: vor.vv v8, v11, v9
91 ; FOLDING-NEXT: vor.vv v8, v8, v12
93 %a = load <vscale x 2 x i16>, ptr %x
94 %b = load <vscale x 2 x i16>, ptr %y
95 %b2 = load <vscale x 2 x i16>, ptr %z
96 %c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
97 %d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
98 %d2 = sext <vscale x 2 x i16> %b2 to <vscale x 2 x i32>
99 %e = mul <vscale x 2 x i32> %c, %d
100 %f = add <vscale x 2 x i32> %c, %d2
101 %g = sub <vscale x 2 x i32> %c, %d2
102 %h = or <vscale x 2 x i32> %e, %f
103 %i = or <vscale x 2 x i32> %h, %g
104 ret <vscale x 2 x i32> %i
107 define <vscale x 2 x i64> @vwop_vscale_sext_i32i64_multiple_users(ptr %x, ptr %y, ptr %z) {
108 ; NO_FOLDING-LABEL: vwop_vscale_sext_i32i64_multiple_users:
109 ; NO_FOLDING: # %bb.0:
110 ; NO_FOLDING-NEXT: vl1re32.v v8, (a0)
111 ; NO_FOLDING-NEXT: vl1re32.v v9, (a1)
112 ; NO_FOLDING-NEXT: vl1re32.v v10, (a2)
113 ; NO_FOLDING-NEXT: vsetvli a0, zero, e64, m2, ta, ma
114 ; NO_FOLDING-NEXT: vsext.vf2 v12, v8
115 ; NO_FOLDING-NEXT: vsext.vf2 v14, v9
116 ; NO_FOLDING-NEXT: vsext.vf2 v8, v10
117 ; NO_FOLDING-NEXT: vmul.vv v10, v12, v14
118 ; NO_FOLDING-NEXT: vadd.vv v14, v12, v8
119 ; NO_FOLDING-NEXT: vsub.vv v8, v12, v8
120 ; NO_FOLDING-NEXT: vor.vv v10, v10, v14
121 ; NO_FOLDING-NEXT: vor.vv v8, v10, v8
122 ; NO_FOLDING-NEXT: ret
124 ; FOLDING-LABEL: vwop_vscale_sext_i32i64_multiple_users:
126 ; FOLDING-NEXT: vl1re32.v v8, (a0)
127 ; FOLDING-NEXT: vl1re32.v v9, (a1)
128 ; FOLDING-NEXT: vl1re32.v v10, (a2)
129 ; FOLDING-NEXT: vsetvli a0, zero, e32, m1, ta, ma
130 ; FOLDING-NEXT: vwmul.vv v12, v8, v9
131 ; FOLDING-NEXT: vwadd.vv v14, v8, v10
132 ; FOLDING-NEXT: vwsub.vv v16, v8, v10
133 ; FOLDING-NEXT: vsetvli zero, zero, e64, m2, ta, ma
134 ; FOLDING-NEXT: vor.vv v8, v12, v14
135 ; FOLDING-NEXT: vor.vv v8, v8, v16
137 %a = load <vscale x 2 x i32>, ptr %x
138 %b = load <vscale x 2 x i32>, ptr %y
139 %b2 = load <vscale x 2 x i32>, ptr %z
140 %c = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
141 %d = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
142 %d2 = sext <vscale x 2 x i32> %b2 to <vscale x 2 x i64>
143 %e = mul <vscale x 2 x i64> %c, %d
144 %f = add <vscale x 2 x i64> %c, %d2
145 %g = sub <vscale x 2 x i64> %c, %d2
146 %h = or <vscale x 2 x i64> %e, %f
147 %i = or <vscale x 2 x i64> %h, %g
148 ret <vscale x 2 x i64> %i
151 define <vscale x 2 x i32> @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) {
152 ; NO_FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users:
153 ; NO_FOLDING: # %bb.0:
154 ; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
155 ; NO_FOLDING-NEXT: vlm.v v8, (a0)
156 ; NO_FOLDING-NEXT: vlm.v v9, (a1)
157 ; NO_FOLDING-NEXT: vlm.v v10, (a2)
158 ; NO_FOLDING-NEXT: vmv.v.i v11, 0
159 ; NO_FOLDING-NEXT: vmv.v.v v0, v8
160 ; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
161 ; NO_FOLDING-NEXT: vmv.v.v v0, v9
162 ; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
163 ; NO_FOLDING-NEXT: vmv.v.v v0, v10
164 ; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
165 ; NO_FOLDING-NEXT: vmul.vv v9, v12, v9
166 ; NO_FOLDING-NEXT: li a0, 1
167 ; NO_FOLDING-NEXT: vsub.vv v11, v12, v10
168 ; NO_FOLDING-NEXT: vmv.v.v v0, v8
169 ; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t
170 ; NO_FOLDING-NEXT: vor.vv v8, v9, v10
171 ; NO_FOLDING-NEXT: vor.vv v8, v8, v11
172 ; NO_FOLDING-NEXT: ret
174 ; FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users:
176 ; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
177 ; FOLDING-NEXT: vlm.v v8, (a0)
178 ; FOLDING-NEXT: vlm.v v9, (a1)
179 ; FOLDING-NEXT: vlm.v v10, (a2)
180 ; FOLDING-NEXT: vmv.v.i v11, 0
181 ; FOLDING-NEXT: vmv.v.v v0, v8
182 ; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
183 ; FOLDING-NEXT: vmv.v.v v0, v9
184 ; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
185 ; FOLDING-NEXT: vmv.v.v v0, v10
186 ; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
187 ; FOLDING-NEXT: vmul.vv v9, v12, v9
188 ; FOLDING-NEXT: li a0, 1
189 ; FOLDING-NEXT: vsub.vv v11, v12, v10
190 ; FOLDING-NEXT: vmv.v.v v0, v8
191 ; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t
192 ; FOLDING-NEXT: vor.vv v8, v9, v10
193 ; FOLDING-NEXT: vor.vv v8, v8, v11
195 %a = load <vscale x 2 x i1>, ptr %x
196 %b = load <vscale x 2 x i1>, ptr %y
197 %b2 = load <vscale x 2 x i1>, ptr %z
198 %c = sext <vscale x 2 x i1> %a to <vscale x 2 x i32>
199 %d = sext <vscale x 2 x i1> %b to <vscale x 2 x i32>
200 %d2 = sext <vscale x 2 x i1> %b2 to <vscale x 2 x i32>
201 %e = mul <vscale x 2 x i32> %c, %d
202 %f = add <vscale x 2 x i32> %c, %d2
203 %g = sub <vscale x 2 x i32> %c, %d2
204 %h = or <vscale x 2 x i32> %e, %f
205 %i = or <vscale x 2 x i32> %h, %g
206 ret <vscale x 2 x i32> %i
209 define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) {
210 ; NO_FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users:
211 ; NO_FOLDING: # %bb.0:
212 ; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
213 ; NO_FOLDING-NEXT: vlm.v v8, (a0)
214 ; NO_FOLDING-NEXT: vlm.v v9, (a1)
215 ; NO_FOLDING-NEXT: vlm.v v10, (a2)
216 ; NO_FOLDING-NEXT: vmv.v.i v11, 0
217 ; NO_FOLDING-NEXT: vmv1r.v v0, v8
218 ; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
219 ; NO_FOLDING-NEXT: vmv1r.v v0, v9
220 ; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
221 ; NO_FOLDING-NEXT: vmv1r.v v0, v10
222 ; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
223 ; NO_FOLDING-NEXT: vmul.vv v9, v12, v9
224 ; NO_FOLDING-NEXT: li a0, 1
225 ; NO_FOLDING-NEXT: vsub.vv v11, v12, v10
226 ; NO_FOLDING-NEXT: vmv1r.v v0, v8
227 ; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t
228 ; NO_FOLDING-NEXT: vor.vv v8, v9, v10
229 ; NO_FOLDING-NEXT: vor.vv v8, v8, v11
230 ; NO_FOLDING-NEXT: ret
232 ; FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users:
234 ; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
235 ; FOLDING-NEXT: vlm.v v8, (a0)
236 ; FOLDING-NEXT: vlm.v v9, (a1)
237 ; FOLDING-NEXT: vlm.v v10, (a2)
238 ; FOLDING-NEXT: vmv.v.i v11, 0
239 ; FOLDING-NEXT: vmv1r.v v0, v8
240 ; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
241 ; FOLDING-NEXT: vmv1r.v v0, v9
242 ; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
243 ; FOLDING-NEXT: vmv1r.v v0, v10
244 ; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
245 ; FOLDING-NEXT: vmul.vv v9, v12, v9
246 ; FOLDING-NEXT: li a0, 1
247 ; FOLDING-NEXT: vsub.vv v11, v12, v10
248 ; FOLDING-NEXT: vmv1r.v v0, v8
249 ; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t
250 ; FOLDING-NEXT: vor.vv v8, v9, v10
251 ; FOLDING-NEXT: vor.vv v8, v8, v11
253 %a = load <vscale x 2 x i1>, ptr %x
254 %b = load <vscale x 2 x i1>, ptr %y
255 %b2 = load <vscale x 2 x i1>, ptr %z
256 %c = sext <vscale x 2 x i1> %a to <vscale x 2 x i8>
257 %d = sext <vscale x 2 x i1> %b to <vscale x 2 x i8>
258 %d2 = sext <vscale x 2 x i1> %b2 to <vscale x 2 x i8>
259 %e = mul <vscale x 2 x i8> %c, %d
260 %f = add <vscale x 2 x i8> %c, %d2
261 %g = sub <vscale x 2 x i8> %c, %d2
262 %h = or <vscale x 2 x i8> %e, %f
263 %i = or <vscale x 2 x i8> %h, %g
264 ret <vscale x 2 x i8> %i
267 define <vscale x 2 x i32> @vwop_vscale_sext_i8i32_multiple_users(ptr %x, ptr %y, ptr %z) {
268 ; NO_FOLDING-LABEL: vwop_vscale_sext_i8i32_multiple_users:
269 ; NO_FOLDING: # %bb.0:
270 ; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma
271 ; NO_FOLDING-NEXT: vle8.v v8, (a0)
272 ; NO_FOLDING-NEXT: vle8.v v9, (a1)
273 ; NO_FOLDING-NEXT: vle8.v v10, (a2)
274 ; NO_FOLDING-NEXT: vsext.vf4 v11, v8
275 ; NO_FOLDING-NEXT: vsext.vf4 v8, v9
276 ; NO_FOLDING-NEXT: vsext.vf4 v9, v10
277 ; NO_FOLDING-NEXT: vmul.vv v8, v11, v8
278 ; NO_FOLDING-NEXT: vadd.vv v10, v11, v9
279 ; NO_FOLDING-NEXT: vsub.vv v9, v11, v9
280 ; NO_FOLDING-NEXT: vor.vv v8, v8, v10
281 ; NO_FOLDING-NEXT: vor.vv v8, v8, v9
282 ; NO_FOLDING-NEXT: ret
284 ; FOLDING-LABEL: vwop_vscale_sext_i8i32_multiple_users:
286 ; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
287 ; FOLDING-NEXT: vle8.v v8, (a0)
288 ; FOLDING-NEXT: vle8.v v9, (a1)
289 ; FOLDING-NEXT: vle8.v v10, (a2)
290 ; FOLDING-NEXT: vsext.vf2 v11, v8
291 ; FOLDING-NEXT: vsext.vf2 v8, v9
292 ; FOLDING-NEXT: vsext.vf2 v9, v10
293 ; FOLDING-NEXT: vwmul.vv v10, v11, v8
294 ; FOLDING-NEXT: vwadd.vv v8, v11, v9
295 ; FOLDING-NEXT: vwsub.vv v12, v11, v9
296 ; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma
297 ; FOLDING-NEXT: vor.vv v8, v10, v8
298 ; FOLDING-NEXT: vor.vv v8, v8, v12
300 %a = load <vscale x 2 x i8>, ptr %x
301 %b = load <vscale x 2 x i8>, ptr %y
302 %b2 = load <vscale x 2 x i8>, ptr %z
303 %c = sext <vscale x 2 x i8> %a to <vscale x 2 x i32>
304 %d = sext <vscale x 2 x i8> %b to <vscale x 2 x i32>
305 %d2 = sext <vscale x 2 x i8> %b2 to <vscale x 2 x i32>
306 %e = mul <vscale x 2 x i32> %c, %d
307 %f = add <vscale x 2 x i32> %c, %d2
308 %g = sub <vscale x 2 x i32> %c, %d2
309 %h = or <vscale x 2 x i32> %e, %f
310 %i = or <vscale x 2 x i32> %h, %g
311 ret <vscale x 2 x i32> %i
314 define <vscale x 2 x i16> @vwop_vscale_zext_i8i16_multiple_users(ptr %x, ptr %y, ptr %z) {
315 ; NO_FOLDING-LABEL: vwop_vscale_zext_i8i16_multiple_users:
316 ; NO_FOLDING: # %bb.0:
317 ; NO_FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
318 ; NO_FOLDING-NEXT: vle8.v v8, (a0)
319 ; NO_FOLDING-NEXT: vle8.v v9, (a1)
320 ; NO_FOLDING-NEXT: vle8.v v10, (a2)
321 ; NO_FOLDING-NEXT: vzext.vf2 v11, v8
322 ; NO_FOLDING-NEXT: vzext.vf2 v8, v9
323 ; NO_FOLDING-NEXT: vzext.vf2 v9, v10
324 ; NO_FOLDING-NEXT: vmul.vv v8, v11, v8
325 ; NO_FOLDING-NEXT: vadd.vv v10, v11, v9
326 ; NO_FOLDING-NEXT: vsub.vv v9, v11, v9
327 ; NO_FOLDING-NEXT: vor.vv v8, v8, v10
328 ; NO_FOLDING-NEXT: vor.vv v8, v8, v9
329 ; NO_FOLDING-NEXT: ret
331 ; FOLDING-LABEL: vwop_vscale_zext_i8i16_multiple_users:
333 ; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
334 ; FOLDING-NEXT: vle8.v v8, (a0)
335 ; FOLDING-NEXT: vle8.v v9, (a1)
336 ; FOLDING-NEXT: vle8.v v10, (a2)
337 ; FOLDING-NEXT: vwmulu.vv v11, v8, v9
338 ; FOLDING-NEXT: vwaddu.vv v9, v8, v10
339 ; FOLDING-NEXT: vwsubu.vv v12, v8, v10
340 ; FOLDING-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
341 ; FOLDING-NEXT: vor.vv v8, v11, v9
342 ; FOLDING-NEXT: vor.vv v8, v8, v12
344 %a = load <vscale x 2 x i8>, ptr %x
345 %b = load <vscale x 2 x i8>, ptr %y
346 %b2 = load <vscale x 2 x i8>, ptr %z
347 %c = zext <vscale x 2 x i8> %a to <vscale x 2 x i16>
348 %d = zext <vscale x 2 x i8> %b to <vscale x 2 x i16>
349 %d2 = zext <vscale x 2 x i8> %b2 to <vscale x 2 x i16>
350 %e = mul <vscale x 2 x i16> %c, %d
351 %f = add <vscale x 2 x i16> %c, %d2
352 %g = sub <vscale x 2 x i16> %c, %d2
353 %h = or <vscale x 2 x i16> %e, %f
354 %i = or <vscale x 2 x i16> %h, %g
355 ret <vscale x 2 x i16> %i
358 define <vscale x 2 x i32> @vwop_vscale_zext_i16i32_multiple_users(ptr %x, ptr %y, ptr %z) {
359 ; NO_FOLDING-LABEL: vwop_vscale_zext_i16i32_multiple_users:
360 ; NO_FOLDING: # %bb.0:
361 ; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma
362 ; NO_FOLDING-NEXT: vle16.v v8, (a0)
363 ; NO_FOLDING-NEXT: vle16.v v9, (a1)
364 ; NO_FOLDING-NEXT: vle16.v v10, (a2)
365 ; NO_FOLDING-NEXT: vzext.vf2 v11, v8
366 ; NO_FOLDING-NEXT: vzext.vf2 v8, v9
367 ; NO_FOLDING-NEXT: vzext.vf2 v9, v10
368 ; NO_FOLDING-NEXT: vmul.vv v8, v11, v8
369 ; NO_FOLDING-NEXT: vadd.vv v10, v11, v9
370 ; NO_FOLDING-NEXT: vsub.vv v9, v11, v9
371 ; NO_FOLDING-NEXT: vor.vv v8, v8, v10
372 ; NO_FOLDING-NEXT: vor.vv v8, v8, v9
373 ; NO_FOLDING-NEXT: ret
375 ; FOLDING-LABEL: vwop_vscale_zext_i16i32_multiple_users:
377 ; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
378 ; FOLDING-NEXT: vle16.v v8, (a0)
379 ; FOLDING-NEXT: vle16.v v9, (a1)
380 ; FOLDING-NEXT: vle16.v v10, (a2)
381 ; FOLDING-NEXT: vwmulu.vv v11, v8, v9
382 ; FOLDING-NEXT: vwaddu.vv v9, v8, v10
383 ; FOLDING-NEXT: vwsubu.vv v12, v8, v10
384 ; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma
385 ; FOLDING-NEXT: vor.vv v8, v11, v9
386 ; FOLDING-NEXT: vor.vv v8, v8, v12
388 %a = load <vscale x 2 x i16>, ptr %x
389 %b = load <vscale x 2 x i16>, ptr %y
390 %b2 = load <vscale x 2 x i16>, ptr %z
391 %c = zext <vscale x 2 x i16> %a to <vscale x 2 x i32>
392 %d = zext <vscale x 2 x i16> %b to <vscale x 2 x i32>
393 %d2 = zext <vscale x 2 x i16> %b2 to <vscale x 2 x i32>
394 %e = mul <vscale x 2 x i32> %c, %d
395 %f = add <vscale x 2 x i32> %c, %d2
396 %g = sub <vscale x 2 x i32> %c, %d2
397 %h = or <vscale x 2 x i32> %e, %f
398 %i = or <vscale x 2 x i32> %h, %g
399 ret <vscale x 2 x i32> %i
402 define <vscale x 2 x i64> @vwop_vscale_zext_i32i64_multiple_users(ptr %x, ptr %y, ptr %z) {
403 ; NO_FOLDING-LABEL: vwop_vscale_zext_i32i64_multiple_users:
404 ; NO_FOLDING: # %bb.0:
405 ; NO_FOLDING-NEXT: vl1re32.v v8, (a0)
406 ; NO_FOLDING-NEXT: vl1re32.v v9, (a1)
407 ; NO_FOLDING-NEXT: vl1re32.v v10, (a2)
408 ; NO_FOLDING-NEXT: vsetvli a0, zero, e64, m2, ta, ma
409 ; NO_FOLDING-NEXT: vzext.vf2 v12, v8
410 ; NO_FOLDING-NEXT: vzext.vf2 v14, v9
411 ; NO_FOLDING-NEXT: vzext.vf2 v8, v10
412 ; NO_FOLDING-NEXT: vmul.vv v10, v12, v14
413 ; NO_FOLDING-NEXT: vadd.vv v14, v12, v8
414 ; NO_FOLDING-NEXT: vsub.vv v8, v12, v8
415 ; NO_FOLDING-NEXT: vor.vv v10, v10, v14
416 ; NO_FOLDING-NEXT: vor.vv v8, v10, v8
417 ; NO_FOLDING-NEXT: ret
419 ; FOLDING-LABEL: vwop_vscale_zext_i32i64_multiple_users:
421 ; FOLDING-NEXT: vl1re32.v v8, (a0)
422 ; FOLDING-NEXT: vl1re32.v v9, (a1)
423 ; FOLDING-NEXT: vl1re32.v v10, (a2)
424 ; FOLDING-NEXT: vsetvli a0, zero, e32, m1, ta, ma
425 ; FOLDING-NEXT: vwmulu.vv v12, v8, v9
426 ; FOLDING-NEXT: vwaddu.vv v14, v8, v10
427 ; FOLDING-NEXT: vwsubu.vv v16, v8, v10
428 ; FOLDING-NEXT: vsetvli zero, zero, e64, m2, ta, ma
429 ; FOLDING-NEXT: vor.vv v8, v12, v14
430 ; FOLDING-NEXT: vor.vv v8, v8, v16
432 %a = load <vscale x 2 x i32>, ptr %x
433 %b = load <vscale x 2 x i32>, ptr %y
434 %b2 = load <vscale x 2 x i32>, ptr %z
435 %c = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
436 %d = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
437 %d2 = zext <vscale x 2 x i32> %b2 to <vscale x 2 x i64>
438 %e = mul <vscale x 2 x i64> %c, %d
439 %f = add <vscale x 2 x i64> %c, %d2
440 %g = sub <vscale x 2 x i64> %c, %d2
441 %h = or <vscale x 2 x i64> %e, %f
442 %i = or <vscale x 2 x i64> %h, %g
443 ret <vscale x 2 x i64> %i
446 define <vscale x 2 x i32> @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) {
447 ; NO_FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users:
448 ; NO_FOLDING: # %bb.0:
449 ; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
450 ; NO_FOLDING-NEXT: vlm.v v0, (a0)
451 ; NO_FOLDING-NEXT: vlm.v v8, (a2)
452 ; NO_FOLDING-NEXT: vlm.v v9, (a1)
453 ; NO_FOLDING-NEXT: vmv.v.i v10, 0
454 ; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0
455 ; NO_FOLDING-NEXT: vmv.v.v v0, v8
456 ; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0
457 ; NO_FOLDING-NEXT: vadd.vv v10, v11, v8
458 ; NO_FOLDING-NEXT: vsub.vv v8, v11, v8
459 ; NO_FOLDING-NEXT: vmv.v.v v0, v9
460 ; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t
461 ; NO_FOLDING-NEXT: vor.vv v8, v10, v8
462 ; NO_FOLDING-NEXT: ret
464 ; FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users:
466 ; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
467 ; FOLDING-NEXT: vlm.v v0, (a0)
468 ; FOLDING-NEXT: vlm.v v8, (a2)
469 ; FOLDING-NEXT: vlm.v v9, (a1)
470 ; FOLDING-NEXT: vmv.v.i v10, 0
471 ; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0
472 ; FOLDING-NEXT: vmv.v.v v0, v8
473 ; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0
474 ; FOLDING-NEXT: vadd.vv v10, v11, v8
475 ; FOLDING-NEXT: vsub.vv v8, v11, v8
476 ; FOLDING-NEXT: vmv.v.v v0, v9
477 ; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t
478 ; FOLDING-NEXT: vor.vv v8, v10, v8
480 %a = load <vscale x 2 x i1>, ptr %x
481 %b = load <vscale x 2 x i1>, ptr %y
482 %b2 = load <vscale x 2 x i1>, ptr %z
483 %c = zext <vscale x 2 x i1> %a to <vscale x 2 x i32>
484 %d = zext <vscale x 2 x i1> %b to <vscale x 2 x i32>
485 %d2 = zext <vscale x 2 x i1> %b2 to <vscale x 2 x i32>
486 %e = mul <vscale x 2 x i32> %c, %d
487 %f = add <vscale x 2 x i32> %c, %d2
488 %g = sub <vscale x 2 x i32> %c, %d2
489 %h = or <vscale x 2 x i32> %e, %f
490 %i = or <vscale x 2 x i32> %h, %g
491 ret <vscale x 2 x i32> %i
494 define <vscale x 2 x i8> @vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) {
495 ; NO_FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users:
496 ; NO_FOLDING: # %bb.0:
497 ; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
498 ; NO_FOLDING-NEXT: vlm.v v0, (a0)
499 ; NO_FOLDING-NEXT: vlm.v v8, (a2)
500 ; NO_FOLDING-NEXT: vlm.v v9, (a1)
501 ; NO_FOLDING-NEXT: vmv.v.i v10, 0
502 ; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0
503 ; NO_FOLDING-NEXT: vmv1r.v v0, v8
504 ; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0
505 ; NO_FOLDING-NEXT: vadd.vv v10, v11, v8
506 ; NO_FOLDING-NEXT: vsub.vv v8, v11, v8
507 ; NO_FOLDING-NEXT: vmv1r.v v0, v9
508 ; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t
509 ; NO_FOLDING-NEXT: vor.vv v8, v10, v8
510 ; NO_FOLDING-NEXT: ret
512 ; FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users:
514 ; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
515 ; FOLDING-NEXT: vlm.v v0, (a0)
516 ; FOLDING-NEXT: vlm.v v8, (a2)
517 ; FOLDING-NEXT: vlm.v v9, (a1)
518 ; FOLDING-NEXT: vmv.v.i v10, 0
519 ; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0
520 ; FOLDING-NEXT: vmv1r.v v0, v8
521 ; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0
522 ; FOLDING-NEXT: vadd.vv v10, v11, v8
523 ; FOLDING-NEXT: vsub.vv v8, v11, v8
524 ; FOLDING-NEXT: vmv1r.v v0, v9
525 ; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t
526 ; FOLDING-NEXT: vor.vv v8, v10, v8
528 %a = load <vscale x 2 x i1>, ptr %x
529 %b = load <vscale x 2 x i1>, ptr %y
530 %b2 = load <vscale x 2 x i1>, ptr %z
531 %c = zext <vscale x 2 x i1> %a to <vscale x 2 x i8>
532 %d = zext <vscale x 2 x i1> %b to <vscale x 2 x i8>
533 %d2 = zext <vscale x 2 x i1> %b2 to <vscale x 2 x i8>
534 %e = mul <vscale x 2 x i8> %c, %d
535 %f = add <vscale x 2 x i8> %c, %d2
536 %g = sub <vscale x 2 x i8> %c, %d2
537 %h = or <vscale x 2 x i8> %e, %f
538 %i = or <vscale x 2 x i8> %h, %g
539 ret <vscale x 2 x i8> %i
542 define <vscale x 2 x i32> @vwop_vscale_zext_i8i32_multiple_users(ptr %x, ptr %y, ptr %z) {
543 ; NO_FOLDING-LABEL: vwop_vscale_zext_i8i32_multiple_users:
544 ; NO_FOLDING: # %bb.0:
545 ; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma
546 ; NO_FOLDING-NEXT: vle8.v v8, (a0)
547 ; NO_FOLDING-NEXT: vle8.v v9, (a1)
548 ; NO_FOLDING-NEXT: vle8.v v10, (a2)
549 ; NO_FOLDING-NEXT: vzext.vf4 v11, v8
550 ; NO_FOLDING-NEXT: vzext.vf4 v8, v9
551 ; NO_FOLDING-NEXT: vzext.vf4 v9, v10
552 ; NO_FOLDING-NEXT: vmul.vv v8, v11, v8
553 ; NO_FOLDING-NEXT: vadd.vv v10, v11, v9
554 ; NO_FOLDING-NEXT: vsub.vv v9, v11, v9
555 ; NO_FOLDING-NEXT: vor.vv v8, v8, v10
556 ; NO_FOLDING-NEXT: vor.vv v8, v8, v9
557 ; NO_FOLDING-NEXT: ret
559 ; FOLDING-LABEL: vwop_vscale_zext_i8i32_multiple_users:
561 ; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
562 ; FOLDING-NEXT: vle8.v v8, (a0)
563 ; FOLDING-NEXT: vle8.v v9, (a1)
564 ; FOLDING-NEXT: vle8.v v10, (a2)
565 ; FOLDING-NEXT: vzext.vf2 v11, v8
566 ; FOLDING-NEXT: vzext.vf2 v8, v9
567 ; FOLDING-NEXT: vzext.vf2 v9, v10
568 ; FOLDING-NEXT: vwmulu.vv v10, v11, v8
569 ; FOLDING-NEXT: vwaddu.vv v8, v11, v9
570 ; FOLDING-NEXT: vwsubu.vv v12, v11, v9
571 ; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma
572 ; FOLDING-NEXT: vor.vv v8, v10, v8
573 ; FOLDING-NEXT: vor.vv v8, v8, v12
575 %a = load <vscale x 2 x i8>, ptr %x
576 %b = load <vscale x 2 x i8>, ptr %y
577 %b2 = load <vscale x 2 x i8>, ptr %z
578 %c = zext <vscale x 2 x i8> %a to <vscale x 2 x i32>
579 %d = zext <vscale x 2 x i8> %b to <vscale x 2 x i32>
580 %d2 = zext <vscale x 2 x i8> %b2 to <vscale x 2 x i32>
581 %e = mul <vscale x 2 x i32> %c, %d
582 %f = add <vscale x 2 x i32> %c, %d2
583 %g = sub <vscale x 2 x i32> %c, %d2
584 %h = or <vscale x 2 x i32> %e, %f
585 %i = or <vscale x 2 x i32> %h, %g
586 ret <vscale x 2 x i32> %i
591 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: