1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define <2 x i16> @vwmulsu_v2i16(ptr %x, ptr %y) {
6 ; CHECK-LABEL: vwmulsu_v2i16:
8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
9 ; CHECK-NEXT: vle8.v v9, (a0)
10 ; CHECK-NEXT: vle8.v v10, (a1)
11 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
13 %a = load <2 x i8>, ptr %x
14 %b = load <2 x i8>, ptr %y
15 %c = zext <2 x i8> %a to <2 x i16>
16 %d = sext <2 x i8> %b to <2 x i16>
17 %e = mul <2 x i16> %c, %d
21 define <2 x i16> @vwmulsu_v2i16_swap(ptr %x, ptr %y) {
22 ; CHECK-LABEL: vwmulsu_v2i16_swap:
24 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
25 ; CHECK-NEXT: vle8.v v9, (a0)
26 ; CHECK-NEXT: vle8.v v10, (a1)
27 ; CHECK-NEXT: vwmulsu.vv v8, v9, v10
29 %a = load <2 x i8>, ptr %x
30 %b = load <2 x i8>, ptr %y
31 %c = sext <2 x i8> %a to <2 x i16>
32 %d = zext <2 x i8> %b to <2 x i16>
33 %e = mul <2 x i16> %c, %d
37 define <4 x i16> @vwmulsu_v4i16(ptr %x, ptr %y) {
38 ; CHECK-LABEL: vwmulsu_v4i16:
40 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
41 ; CHECK-NEXT: vle8.v v9, (a0)
42 ; CHECK-NEXT: vle8.v v10, (a1)
43 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
45 %a = load <4 x i8>, ptr %x
46 %b = load <4 x i8>, ptr %y
47 %c = zext <4 x i8> %a to <4 x i16>
48 %d = sext <4 x i8> %b to <4 x i16>
49 %e = mul <4 x i16> %c, %d
53 define <2 x i32> @vwmulsu_v2i32(ptr %x, ptr %y) {
54 ; CHECK-LABEL: vwmulsu_v2i32:
56 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
57 ; CHECK-NEXT: vle16.v v9, (a0)
58 ; CHECK-NEXT: vle16.v v10, (a1)
59 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
61 %a = load <2 x i16>, ptr %x
62 %b = load <2 x i16>, ptr %y
63 %c = zext <2 x i16> %a to <2 x i32>
64 %d = sext <2 x i16> %b to <2 x i32>
65 %e = mul <2 x i32> %c, %d
69 define <8 x i16> @vwmulsu_v8i16(ptr %x, ptr %y) {
70 ; CHECK-LABEL: vwmulsu_v8i16:
72 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
73 ; CHECK-NEXT: vle8.v v9, (a0)
74 ; CHECK-NEXT: vle8.v v10, (a1)
75 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
77 %a = load <8 x i8>, ptr %x
78 %b = load <8 x i8>, ptr %y
79 %c = zext <8 x i8> %a to <8 x i16>
80 %d = sext <8 x i8> %b to <8 x i16>
81 %e = mul <8 x i16> %c, %d
85 define <4 x i32> @vwmulsu_v4i32(ptr %x, ptr %y) {
86 ; CHECK-LABEL: vwmulsu_v4i32:
88 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
89 ; CHECK-NEXT: vle16.v v9, (a0)
90 ; CHECK-NEXT: vle16.v v10, (a1)
91 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
93 %a = load <4 x i16>, ptr %x
94 %b = load <4 x i16>, ptr %y
95 %c = zext <4 x i16> %a to <4 x i32>
96 %d = sext <4 x i16> %b to <4 x i32>
97 %e = mul <4 x i32> %c, %d
101 define <2 x i64> @vwmulsu_v2i64(ptr %x, ptr %y) {
102 ; CHECK-LABEL: vwmulsu_v2i64:
104 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
105 ; CHECK-NEXT: vle32.v v9, (a0)
106 ; CHECK-NEXT: vle32.v v10, (a1)
107 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
109 %a = load <2 x i32>, ptr %x
110 %b = load <2 x i32>, ptr %y
111 %c = zext <2 x i32> %a to <2 x i64>
112 %d = sext <2 x i32> %b to <2 x i64>
113 %e = mul <2 x i64> %c, %d
117 define <16 x i16> @vwmulsu_v16i16(ptr %x, ptr %y) {
118 ; CHECK-LABEL: vwmulsu_v16i16:
120 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
121 ; CHECK-NEXT: vle8.v v10, (a0)
122 ; CHECK-NEXT: vle8.v v11, (a1)
123 ; CHECK-NEXT: vwmulsu.vv v8, v11, v10
125 %a = load <16 x i8>, ptr %x
126 %b = load <16 x i8>, ptr %y
127 %c = zext <16 x i8> %a to <16 x i16>
128 %d = sext <16 x i8> %b to <16 x i16>
129 %e = mul <16 x i16> %c, %d
133 define <8 x i32> @vwmulsu_v8i32(ptr %x, ptr %y) {
134 ; CHECK-LABEL: vwmulsu_v8i32:
136 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
137 ; CHECK-NEXT: vle16.v v10, (a0)
138 ; CHECK-NEXT: vle16.v v11, (a1)
139 ; CHECK-NEXT: vwmulsu.vv v8, v11, v10
141 %a = load <8 x i16>, ptr %x
142 %b = load <8 x i16>, ptr %y
143 %c = zext <8 x i16> %a to <8 x i32>
144 %d = sext <8 x i16> %b to <8 x i32>
145 %e = mul <8 x i32> %c, %d
149 define <4 x i64> @vwmulsu_v4i64(ptr %x, ptr %y) {
150 ; CHECK-LABEL: vwmulsu_v4i64:
152 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
153 ; CHECK-NEXT: vle32.v v10, (a0)
154 ; CHECK-NEXT: vle32.v v11, (a1)
155 ; CHECK-NEXT: vwmulsu.vv v8, v11, v10
157 %a = load <4 x i32>, ptr %x
158 %b = load <4 x i32>, ptr %y
159 %c = zext <4 x i32> %a to <4 x i64>
160 %d = sext <4 x i32> %b to <4 x i64>
161 %e = mul <4 x i64> %c, %d
165 define <32 x i16> @vwmulsu_v32i16(ptr %x, ptr %y) {
166 ; CHECK-LABEL: vwmulsu_v32i16:
168 ; CHECK-NEXT: li a2, 32
169 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
170 ; CHECK-NEXT: vle8.v v12, (a0)
171 ; CHECK-NEXT: vle8.v v14, (a1)
172 ; CHECK-NEXT: vwmulsu.vv v8, v14, v12
174 %a = load <32 x i8>, ptr %x
175 %b = load <32 x i8>, ptr %y
176 %c = zext <32 x i8> %a to <32 x i16>
177 %d = sext <32 x i8> %b to <32 x i16>
178 %e = mul <32 x i16> %c, %d
182 define <16 x i32> @vwmulsu_v16i32(ptr %x, ptr %y) {
183 ; CHECK-LABEL: vwmulsu_v16i32:
185 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
186 ; CHECK-NEXT: vle16.v v12, (a0)
187 ; CHECK-NEXT: vle16.v v14, (a1)
188 ; CHECK-NEXT: vwmulsu.vv v8, v14, v12
190 %a = load <16 x i16>, ptr %x
191 %b = load <16 x i16>, ptr %y
192 %c = zext <16 x i16> %a to <16 x i32>
193 %d = sext <16 x i16> %b to <16 x i32>
194 %e = mul <16 x i32> %c, %d
198 define <8 x i64> @vwmulsu_v8i64(ptr %x, ptr %y) {
199 ; CHECK-LABEL: vwmulsu_v8i64:
201 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
202 ; CHECK-NEXT: vle32.v v12, (a0)
203 ; CHECK-NEXT: vle32.v v14, (a1)
204 ; CHECK-NEXT: vwmulsu.vv v8, v14, v12
206 %a = load <8 x i32>, ptr %x
207 %b = load <8 x i32>, ptr %y
208 %c = zext <8 x i32> %a to <8 x i64>
209 %d = sext <8 x i32> %b to <8 x i64>
210 %e = mul <8 x i64> %c, %d
214 define <64 x i16> @vwmulsu_v64i16(ptr %x, ptr %y) {
215 ; CHECK-LABEL: vwmulsu_v64i16:
217 ; CHECK-NEXT: li a2, 64
218 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
219 ; CHECK-NEXT: vle8.v v16, (a0)
220 ; CHECK-NEXT: vle8.v v20, (a1)
221 ; CHECK-NEXT: vwmulsu.vv v8, v20, v16
223 %a = load <64 x i8>, ptr %x
224 %b = load <64 x i8>, ptr %y
225 %c = zext <64 x i8> %a to <64 x i16>
226 %d = sext <64 x i8> %b to <64 x i16>
227 %e = mul <64 x i16> %c, %d
231 define <32 x i32> @vwmulsu_v32i32(ptr %x, ptr %y) {
232 ; CHECK-LABEL: vwmulsu_v32i32:
234 ; CHECK-NEXT: li a2, 32
235 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
236 ; CHECK-NEXT: vle16.v v16, (a0)
237 ; CHECK-NEXT: vle16.v v20, (a1)
238 ; CHECK-NEXT: vwmulsu.vv v8, v20, v16
240 %a = load <32 x i16>, ptr %x
241 %b = load <32 x i16>, ptr %y
242 %c = zext <32 x i16> %a to <32 x i32>
243 %d = sext <32 x i16> %b to <32 x i32>
244 %e = mul <32 x i32> %c, %d
248 define <16 x i64> @vwmulsu_v16i64(ptr %x, ptr %y) {
249 ; CHECK-LABEL: vwmulsu_v16i64:
251 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
252 ; CHECK-NEXT: vle32.v v16, (a0)
253 ; CHECK-NEXT: vle32.v v20, (a1)
254 ; CHECK-NEXT: vwmulsu.vv v8, v20, v16
256 %a = load <16 x i32>, ptr %x
257 %b = load <16 x i32>, ptr %y
258 %c = zext <16 x i32> %a to <16 x i64>
259 %d = sext <16 x i32> %b to <16 x i64>
260 %e = mul <16 x i64> %c, %d
264 define <128 x i16> @vwmulsu_v128i16(ptr %x, ptr %y) {
265 ; CHECK-LABEL: vwmulsu_v128i16:
267 ; CHECK-NEXT: addi sp, sp, -16
268 ; CHECK-NEXT: .cfi_def_cfa_offset 16
269 ; CHECK-NEXT: csrr a2, vlenb
270 ; CHECK-NEXT: slli a2, a2, 4
271 ; CHECK-NEXT: sub sp, sp, a2
272 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
273 ; CHECK-NEXT: li a2, 128
274 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
275 ; CHECK-NEXT: vle8.v v8, (a0)
276 ; CHECK-NEXT: addi a0, sp, 16
277 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
278 ; CHECK-NEXT: vle8.v v0, (a1)
279 ; CHECK-NEXT: li a0, 64
280 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
281 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
282 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
283 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
284 ; CHECK-NEXT: vmv4r.v v24, v8
285 ; CHECK-NEXT: vwmulsu.vv v8, v24, v16
286 ; CHECK-NEXT: csrr a0, vlenb
287 ; CHECK-NEXT: slli a0, a0, 3
288 ; CHECK-NEXT: add a0, sp, a0
289 ; CHECK-NEXT: addi a0, a0, 16
290 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
291 ; CHECK-NEXT: addi a0, sp, 16
292 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
293 ; CHECK-NEXT: vwmulsu.vv v8, v0, v16
294 ; CHECK-NEXT: csrr a0, vlenb
295 ; CHECK-NEXT: slli a0, a0, 3
296 ; CHECK-NEXT: add a0, sp, a0
297 ; CHECK-NEXT: addi a0, a0, 16
298 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
299 ; CHECK-NEXT: csrr a0, vlenb
300 ; CHECK-NEXT: slli a0, a0, 4
301 ; CHECK-NEXT: add sp, sp, a0
302 ; CHECK-NEXT: addi sp, sp, 16
304 %a = load <128 x i8>, ptr %x
305 %b = load <128 x i8>, ptr %y
306 %c = zext <128 x i8> %a to <128 x i16>
307 %d = sext <128 x i8> %b to <128 x i16>
308 %e = mul <128 x i16> %c, %d
312 define <64 x i32> @vwmulsu_v64i32(ptr %x, ptr %y) {
313 ; CHECK-LABEL: vwmulsu_v64i32:
315 ; CHECK-NEXT: addi sp, sp, -16
316 ; CHECK-NEXT: .cfi_def_cfa_offset 16
317 ; CHECK-NEXT: csrr a2, vlenb
318 ; CHECK-NEXT: slli a2, a2, 4
319 ; CHECK-NEXT: sub sp, sp, a2
320 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
321 ; CHECK-NEXT: li a2, 64
322 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
323 ; CHECK-NEXT: vle16.v v8, (a0)
324 ; CHECK-NEXT: addi a0, sp, 16
325 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
326 ; CHECK-NEXT: vle16.v v0, (a1)
327 ; CHECK-NEXT: li a0, 32
328 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
329 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
330 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
331 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
332 ; CHECK-NEXT: vmv4r.v v24, v8
333 ; CHECK-NEXT: vwmulsu.vv v8, v24, v16
334 ; CHECK-NEXT: csrr a0, vlenb
335 ; CHECK-NEXT: slli a0, a0, 3
336 ; CHECK-NEXT: add a0, sp, a0
337 ; CHECK-NEXT: addi a0, a0, 16
338 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
339 ; CHECK-NEXT: addi a0, sp, 16
340 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
341 ; CHECK-NEXT: vwmulsu.vv v8, v0, v16
342 ; CHECK-NEXT: csrr a0, vlenb
343 ; CHECK-NEXT: slli a0, a0, 3
344 ; CHECK-NEXT: add a0, sp, a0
345 ; CHECK-NEXT: addi a0, a0, 16
346 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
347 ; CHECK-NEXT: csrr a0, vlenb
348 ; CHECK-NEXT: slli a0, a0, 4
349 ; CHECK-NEXT: add sp, sp, a0
350 ; CHECK-NEXT: addi sp, sp, 16
352 %a = load <64 x i16>, ptr %x
353 %b = load <64 x i16>, ptr %y
354 %c = zext <64 x i16> %a to <64 x i32>
355 %d = sext <64 x i16> %b to <64 x i32>
356 %e = mul <64 x i32> %c, %d
360 define <32 x i64> @vwmulsu_v32i64(ptr %x, ptr %y) {
361 ; CHECK-LABEL: vwmulsu_v32i64:
363 ; CHECK-NEXT: addi sp, sp, -16
364 ; CHECK-NEXT: .cfi_def_cfa_offset 16
365 ; CHECK-NEXT: csrr a2, vlenb
366 ; CHECK-NEXT: slli a2, a2, 4
367 ; CHECK-NEXT: sub sp, sp, a2
368 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
369 ; CHECK-NEXT: li a2, 32
370 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
371 ; CHECK-NEXT: vle32.v v8, (a0)
372 ; CHECK-NEXT: addi a0, sp, 16
373 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
374 ; CHECK-NEXT: vle32.v v0, (a1)
375 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
376 ; CHECK-NEXT: vslidedown.vi v16, v8, 16
377 ; CHECK-NEXT: vslidedown.vi v8, v0, 16
378 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
379 ; CHECK-NEXT: vmv4r.v v24, v8
380 ; CHECK-NEXT: vwmulsu.vv v8, v24, v16
381 ; CHECK-NEXT: csrr a0, vlenb
382 ; CHECK-NEXT: slli a0, a0, 3
383 ; CHECK-NEXT: add a0, sp, a0
384 ; CHECK-NEXT: addi a0, a0, 16
385 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
386 ; CHECK-NEXT: addi a0, sp, 16
387 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
388 ; CHECK-NEXT: vwmulsu.vv v8, v0, v16
389 ; CHECK-NEXT: csrr a0, vlenb
390 ; CHECK-NEXT: slli a0, a0, 3
391 ; CHECK-NEXT: add a0, sp, a0
392 ; CHECK-NEXT: addi a0, a0, 16
393 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
394 ; CHECK-NEXT: csrr a0, vlenb
395 ; CHECK-NEXT: slli a0, a0, 4
396 ; CHECK-NEXT: add sp, sp, a0
397 ; CHECK-NEXT: addi sp, sp, 16
399 %a = load <32 x i32>, ptr %x
400 %b = load <32 x i32>, ptr %y
401 %c = zext <32 x i32> %a to <32 x i64>
402 %d = sext <32 x i32> %b to <32 x i64>
403 %e = mul <32 x i64> %c, %d
407 define <2 x i32> @vwmulsu_v2i32_v2i8(ptr %x, ptr %y) {
408 ; CHECK-LABEL: vwmulsu_v2i32_v2i8:
410 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
411 ; CHECK-NEXT: vle8.v v8, (a0)
412 ; CHECK-NEXT: vle8.v v9, (a1)
413 ; CHECK-NEXT: vzext.vf2 v10, v8
414 ; CHECK-NEXT: vsext.vf2 v11, v9
415 ; CHECK-NEXT: vwmulsu.vv v8, v11, v10
417 %a = load <2 x i8>, ptr %x
418 %b = load <2 x i8>, ptr %y
419 %c = zext <2 x i8> %a to <2 x i32>
420 %d = sext <2 x i8> %b to <2 x i32>
421 %e = mul <2 x i32> %c, %d
425 define <4 x i32> @vwmulsu_v4i32_v4i8_v4i16(ptr %x, ptr %y) {
426 ; CHECK-LABEL: vwmulsu_v4i32_v4i8_v4i16:
428 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
429 ; CHECK-NEXT: vle8.v v8, (a0)
430 ; CHECK-NEXT: vle16.v v9, (a1)
431 ; CHECK-NEXT: vzext.vf2 v10, v8
432 ; CHECK-NEXT: vwmulsu.vv v8, v9, v10
434 %a = load <4 x i8>, ptr %x
435 %b = load <4 x i16>, ptr %y
436 %c = zext <4 x i8> %a to <4 x i32>
437 %d = sext <4 x i16> %b to <4 x i32>
438 %e = mul <4 x i32> %c, %d
442 define <4 x i64> @vwmulsu_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
443 ; CHECK-LABEL: vwmulsu_v4i64_v4i32_v4i8:
445 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
446 ; CHECK-NEXT: vle8.v v8, (a1)
447 ; CHECK-NEXT: vle32.v v10, (a0)
448 ; CHECK-NEXT: vsext.vf4 v11, v8
449 ; CHECK-NEXT: vwmulsu.vv v8, v11, v10
451 %a = load <4 x i32>, ptr %x
452 %b = load <4 x i8>, ptr %y
453 %c = zext <4 x i32> %a to <4 x i64>
454 %d = sext <4 x i8> %b to <4 x i64>
455 %e = mul <4 x i64> %c, %d
459 define <2 x i16> @vwmulsu_vx_v2i16(ptr %x, i8 %y) {
460 ; CHECK-LABEL: vwmulsu_vx_v2i16:
462 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
463 ; CHECK-NEXT: vle8.v v9, (a0)
464 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
466 %a = load <2 x i8>, ptr %x
467 %b = insertelement <2 x i8> poison, i8 %y, i32 0
468 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
469 %d = sext <2 x i8> %a to <2 x i16>
470 %e = zext <2 x i8> %c to <2 x i16>
471 %f = mul <2 x i16> %d, %e
475 define <2 x i16> @vwmulsu_vx_v2i16_swap(ptr %x, i8 %y) {
476 ; CHECK-LABEL: vwmulsu_vx_v2i16_swap:
478 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
479 ; CHECK-NEXT: vle8.v v9, (a0)
480 ; CHECK-NEXT: vmv.v.x v10, a1
481 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
483 %a = load <2 x i8>, ptr %x
484 %b = insertelement <2 x i8> poison, i8 %y, i32 0
485 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
486 %d = zext <2 x i8> %a to <2 x i16>
487 %e = sext <2 x i8> %c to <2 x i16>
488 %f = mul <2 x i16> %d, %e
492 define <4 x i16> @vwmulsu_vx_v4i16(ptr %x, i8 %y) {
493 ; CHECK-LABEL: vwmulsu_vx_v4i16:
495 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
496 ; CHECK-NEXT: vle8.v v9, (a0)
497 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
499 %a = load <4 x i8>, ptr %x
500 %b = insertelement <4 x i8> poison, i8 %y, i32 0
501 %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
502 %d = sext <4 x i8> %a to <4 x i16>
503 %e = zext <4 x i8> %c to <4 x i16>
504 %f = mul <4 x i16> %d, %e
508 define <2 x i32> @vwmulsu_vx_v2i32(ptr %x, i16 %y) {
509 ; CHECK-LABEL: vwmulsu_vx_v2i32:
511 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
512 ; CHECK-NEXT: vle16.v v9, (a0)
513 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
515 %a = load <2 x i16>, ptr %x
516 %b = insertelement <2 x i16> poison, i16 %y, i32 0
517 %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
518 %d = sext <2 x i16> %a to <2 x i32>
519 %e = zext <2 x i16> %c to <2 x i32>
520 %f = mul <2 x i32> %d, %e
524 define <8 x i16> @vwmulsu_vx_v8i16(ptr %x, i8 %y) {
525 ; CHECK-LABEL: vwmulsu_vx_v8i16:
527 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
528 ; CHECK-NEXT: vle8.v v9, (a0)
529 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
531 %a = load <8 x i8>, ptr %x
532 %b = insertelement <8 x i8> poison, i8 %y, i32 0
533 %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
534 %d = sext <8 x i8> %a to <8 x i16>
535 %e = zext <8 x i8> %c to <8 x i16>
536 %f = mul <8 x i16> %d, %e
540 define <4 x i32> @vwmulsu_vx_v4i32(ptr %x, i16 %y) {
541 ; CHECK-LABEL: vwmulsu_vx_v4i32:
543 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
544 ; CHECK-NEXT: vle16.v v9, (a0)
545 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
547 %a = load <4 x i16>, ptr %x
548 %b = insertelement <4 x i16> poison, i16 %y, i32 0
549 %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
550 %d = sext <4 x i16> %a to <4 x i32>
551 %e = zext <4 x i16> %c to <4 x i32>
552 %f = mul <4 x i32> %d, %e
556 define <2 x i64> @vwmulsu_vx_v2i64(ptr %x, i32 %y) {
557 ; CHECK-LABEL: vwmulsu_vx_v2i64:
559 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
560 ; CHECK-NEXT: vle32.v v9, (a0)
561 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
563 %a = load <2 x i32>, ptr %x
564 %b = insertelement <2 x i32> poison, i32 %y, i64 0
565 %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
566 %d = sext <2 x i32> %a to <2 x i64>
567 %e = zext <2 x i32> %c to <2 x i64>
568 %f = mul <2 x i64> %d, %e
572 define <16 x i16> @vwmulsu_vx_v16i16(ptr %x, i8 %y) {
573 ; CHECK-LABEL: vwmulsu_vx_v16i16:
575 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
576 ; CHECK-NEXT: vle8.v v10, (a0)
577 ; CHECK-NEXT: vwmulsu.vx v8, v10, a1
579 %a = load <16 x i8>, ptr %x
580 %b = insertelement <16 x i8> poison, i8 %y, i32 0
581 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
582 %d = sext <16 x i8> %a to <16 x i16>
583 %e = zext <16 x i8> %c to <16 x i16>
584 %f = mul <16 x i16> %d, %e
588 define <8 x i32> @vwmulsu_vx_v8i32(ptr %x, i16 %y) {
589 ; CHECK-LABEL: vwmulsu_vx_v8i32:
591 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
592 ; CHECK-NEXT: vle16.v v10, (a0)
593 ; CHECK-NEXT: vwmulsu.vx v8, v10, a1
595 %a = load <8 x i16>, ptr %x
596 %b = insertelement <8 x i16> poison, i16 %y, i32 0
597 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
598 %d = sext <8 x i16> %a to <8 x i32>
599 %e = zext <8 x i16> %c to <8 x i32>
600 %f = mul <8 x i32> %d, %e
604 define <4 x i64> @vwmulsu_vx_v4i64(ptr %x, i32 %y) {
605 ; CHECK-LABEL: vwmulsu_vx_v4i64:
607 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
608 ; CHECK-NEXT: vle32.v v10, (a0)
609 ; CHECK-NEXT: vwmulsu.vx v8, v10, a1
611 %a = load <4 x i32>, ptr %x
612 %b = insertelement <4 x i32> poison, i32 %y, i64 0
613 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
614 %d = sext <4 x i32> %a to <4 x i64>
615 %e = zext <4 x i32> %c to <4 x i64>
616 %f = mul <4 x i64> %d, %e
620 define <32 x i16> @vwmulsu_vx_v32i16(ptr %x, i8 %y) {
621 ; CHECK-LABEL: vwmulsu_vx_v32i16:
623 ; CHECK-NEXT: li a2, 32
624 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
625 ; CHECK-NEXT: vle8.v v12, (a0)
626 ; CHECK-NEXT: vwmulsu.vx v8, v12, a1
628 %a = load <32 x i8>, ptr %x
629 %b = insertelement <32 x i8> poison, i8 %y, i32 0
630 %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
631 %d = sext <32 x i8> %a to <32 x i16>
632 %e = zext <32 x i8> %c to <32 x i16>
633 %f = mul <32 x i16> %d, %e
637 define <16 x i32> @vwmulsu_vx_v16i32(ptr %x, i16 %y) {
638 ; CHECK-LABEL: vwmulsu_vx_v16i32:
640 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
641 ; CHECK-NEXT: vle16.v v12, (a0)
642 ; CHECK-NEXT: vwmulsu.vx v8, v12, a1
644 %a = load <16 x i16>, ptr %x
645 %b = insertelement <16 x i16> poison, i16 %y, i32 0
646 %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
647 %d = sext <16 x i16> %a to <16 x i32>
648 %e = zext <16 x i16> %c to <16 x i32>
649 %f = mul <16 x i32> %d, %e
653 define <8 x i64> @vwmulsu_vx_v8i64(ptr %x, i32 %y) {
654 ; CHECK-LABEL: vwmulsu_vx_v8i64:
656 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
657 ; CHECK-NEXT: vle32.v v12, (a0)
658 ; CHECK-NEXT: vwmulsu.vx v8, v12, a1
660 %a = load <8 x i32>, ptr %x
661 %b = insertelement <8 x i32> poison, i32 %y, i64 0
662 %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
663 %d = sext <8 x i32> %a to <8 x i64>
664 %e = zext <8 x i32> %c to <8 x i64>
665 %f = mul <8 x i64> %d, %e
669 define <64 x i16> @vwmulsu_vx_v64i16(ptr %x, i8 %y) {
670 ; CHECK-LABEL: vwmulsu_vx_v64i16:
672 ; CHECK-NEXT: li a2, 64
673 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
674 ; CHECK-NEXT: vle8.v v16, (a0)
675 ; CHECK-NEXT: vwmulsu.vx v8, v16, a1
677 %a = load <64 x i8>, ptr %x
678 %b = insertelement <64 x i8> poison, i8 %y, i32 0
679 %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
680 %d = sext <64 x i8> %a to <64 x i16>
681 %e = zext <64 x i8> %c to <64 x i16>
682 %f = mul <64 x i16> %d, %e
686 define <32 x i32> @vwmulsu_vx_v32i32(ptr %x, i16 %y) {
687 ; CHECK-LABEL: vwmulsu_vx_v32i32:
689 ; CHECK-NEXT: li a2, 32
690 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
691 ; CHECK-NEXT: vle16.v v16, (a0)
692 ; CHECK-NEXT: vwmulsu.vx v8, v16, a1
694 %a = load <32 x i16>, ptr %x
695 %b = insertelement <32 x i16> poison, i16 %y, i32 0
696 %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
697 %d = sext <32 x i16> %a to <32 x i32>
698 %e = zext <32 x i16> %c to <32 x i32>
699 %f = mul <32 x i32> %d, %e
703 define <16 x i64> @vwmulsu_vx_v16i64(ptr %x, i32 %y) {
704 ; CHECK-LABEL: vwmulsu_vx_v16i64:
706 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
707 ; CHECK-NEXT: vle32.v v16, (a0)
708 ; CHECK-NEXT: vwmulsu.vx v8, v16, a1
710 %a = load <16 x i32>, ptr %x
711 %b = insertelement <16 x i32> poison, i32 %y, i64 0
712 %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
713 %d = sext <16 x i32> %a to <16 x i64>
714 %e = zext <16 x i32> %c to <16 x i64>
715 %f = mul <16 x i64> %d, %e
719 define <8 x i16> @vwmulsu_vx_v8i16_i8(ptr %x, ptr %y) {
720 ; CHECK-LABEL: vwmulsu_vx_v8i16_i8:
722 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
723 ; CHECK-NEXT: vle8.v v9, (a0)
724 ; CHECK-NEXT: lbu a0, 0(a1)
725 ; CHECK-NEXT: vwmulsu.vx v8, v9, a0
727 %a = load <8 x i8>, ptr %x
729 %c = zext i8 %b to i16
730 %d = insertelement <8 x i16> poison, i16 %c, i32 0
731 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
732 %f = sext <8 x i8> %a to <8 x i16>
733 %g = mul <8 x i16> %e, %f
737 define <8 x i16> @vwmulsu_vx_v8i16_i8_swap(ptr %x, ptr %y) {
738 ; CHECK-LABEL: vwmulsu_vx_v8i16_i8_swap:
740 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
741 ; CHECK-NEXT: vle8.v v9, (a0)
742 ; CHECK-NEXT: vlse8.v v10, (a1), zero
743 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
745 %a = load <8 x i8>, ptr %x
747 %c = sext i8 %b to i16
748 %d = insertelement <8 x i16> poison, i16 %c, i32 0
749 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
750 %f = zext <8 x i8> %a to <8 x i16>
751 %g = mul <8 x i16> %e, %f
755 define <4 x i32> @vwmulsu_vx_v4i32_i8(ptr %x, ptr %y) {
756 ; CHECK-LABEL: vwmulsu_vx_v4i32_i8:
758 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
759 ; CHECK-NEXT: vle16.v v9, (a0)
760 ; CHECK-NEXT: lbu a0, 0(a1)
761 ; CHECK-NEXT: vwmul.vx v8, v9, a0
763 %a = load <4 x i16>, ptr %x
765 %c = zext i8 %b to i32
766 %d = insertelement <4 x i32> poison, i32 %c, i32 0
767 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
768 %f = sext <4 x i16> %a to <4 x i32>
769 %g = mul <4 x i32> %e, %f
773 define <4 x i32> @vwmulsu_vx_v4i32_i16(ptr %x, ptr %y) {
774 ; CHECK-LABEL: vwmulsu_vx_v4i32_i16:
776 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
777 ; CHECK-NEXT: vle16.v v9, (a0)
778 ; CHECK-NEXT: lhu a0, 0(a1)
779 ; CHECK-NEXT: vwmulsu.vx v8, v9, a0
781 %a = load <4 x i16>, ptr %x
782 %b = load i16, ptr %y
783 %c = zext i16 %b to i32
784 %d = insertelement <4 x i32> poison, i32 %c, i32 0
785 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
786 %f = sext <4 x i16> %a to <4 x i32>
787 %g = mul <4 x i32> %e, %f
791 define <2 x i64> @vwmulsu_vx_v2i64_i8(ptr %x, ptr %y) {
792 ; RV32-LABEL: vwmulsu_vx_v2i64_i8:
794 ; RV32-NEXT: addi sp, sp, -16
795 ; RV32-NEXT: .cfi_def_cfa_offset 16
796 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
797 ; RV32-NEXT: lbu a1, 0(a1)
798 ; RV32-NEXT: vle32.v v8, (a0)
799 ; RV32-NEXT: sw zero, 12(sp)
800 ; RV32-NEXT: sw a1, 8(sp)
801 ; RV32-NEXT: addi a0, sp, 8
802 ; RV32-NEXT: vlse64.v v9, (a0), zero
803 ; RV32-NEXT: vsext.vf2 v10, v8
804 ; RV32-NEXT: vmul.vv v8, v9, v10
805 ; RV32-NEXT: addi sp, sp, 16
808 ; RV64-LABEL: vwmulsu_vx_v2i64_i8:
810 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
811 ; RV64-NEXT: vle32.v v9, (a0)
812 ; RV64-NEXT: lbu a0, 0(a1)
813 ; RV64-NEXT: vwmul.vx v8, v9, a0
815 %a = load <2 x i32>, ptr %x
817 %c = zext i8 %b to i64
818 %d = insertelement <2 x i64> poison, i64 %c, i64 0
819 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
820 %f = sext <2 x i32> %a to <2 x i64>
821 %g = mul <2 x i64> %e, %f
825 define <2 x i64> @vwmulsu_vx_v2i64_i16(ptr %x, ptr %y) {
826 ; RV32-LABEL: vwmulsu_vx_v2i64_i16:
828 ; RV32-NEXT: addi sp, sp, -16
829 ; RV32-NEXT: .cfi_def_cfa_offset 16
830 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
831 ; RV32-NEXT: lhu a1, 0(a1)
832 ; RV32-NEXT: vle32.v v8, (a0)
833 ; RV32-NEXT: sw zero, 12(sp)
834 ; RV32-NEXT: sw a1, 8(sp)
835 ; RV32-NEXT: addi a0, sp, 8
836 ; RV32-NEXT: vlse64.v v9, (a0), zero
837 ; RV32-NEXT: vsext.vf2 v10, v8
838 ; RV32-NEXT: vmul.vv v8, v9, v10
839 ; RV32-NEXT: addi sp, sp, 16
842 ; RV64-LABEL: vwmulsu_vx_v2i64_i16:
844 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
845 ; RV64-NEXT: vle32.v v9, (a0)
846 ; RV64-NEXT: lhu a0, 0(a1)
847 ; RV64-NEXT: vwmul.vx v8, v9, a0
849 %a = load <2 x i32>, ptr %x
850 %b = load i16, ptr %y
851 %c = zext i16 %b to i64
852 %d = insertelement <2 x i64> poison, i64 %c, i64 0
853 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
854 %f = sext <2 x i32> %a to <2 x i64>
855 %g = mul <2 x i64> %e, %f
859 define <2 x i64> @vwmulsu_vx_v2i64_i32(ptr %x, ptr %y) {
860 ; RV32-LABEL: vwmulsu_vx_v2i64_i32:
862 ; RV32-NEXT: addi sp, sp, -16
863 ; RV32-NEXT: .cfi_def_cfa_offset 16
864 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
865 ; RV32-NEXT: lw a1, 0(a1)
866 ; RV32-NEXT: vle32.v v8, (a0)
867 ; RV32-NEXT: sw zero, 12(sp)
868 ; RV32-NEXT: sw a1, 8(sp)
869 ; RV32-NEXT: addi a0, sp, 8
870 ; RV32-NEXT: vlse64.v v9, (a0), zero
871 ; RV32-NEXT: vsext.vf2 v10, v8
872 ; RV32-NEXT: vmul.vv v8, v9, v10
873 ; RV32-NEXT: addi sp, sp, 16
876 ; RV64-LABEL: vwmulsu_vx_v2i64_i32:
878 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
879 ; RV64-NEXT: vle32.v v9, (a0)
880 ; RV64-NEXT: lwu a0, 0(a1)
881 ; RV64-NEXT: vwmulsu.vx v8, v9, a0
883 %a = load <2 x i32>, ptr %x
884 %b = load i32, ptr %y
885 %c = zext i32 %b to i64
886 %d = insertelement <2 x i64> poison, i64 %c, i64 0
887 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
888 %f = sext <2 x i32> %a to <2 x i64>
889 %g = mul <2 x i64> %e, %f
893 define <8 x i16> @vwmulsu_vx_v8i16_i8_and(ptr %x, i16 %y) {
894 ; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and:
896 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
897 ; CHECK-NEXT: vle8.v v9, (a0)
898 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
900 %a = load <8 x i8>, ptr %x
902 %c = insertelement <8 x i16> poison, i16 %b, i32 0
903 %d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer
904 %e = sext <8 x i8> %a to <8 x i16>
905 %f = mul <8 x i16> %d, %e
909 define <8 x i16> @vwmulsu_vx_v8i16_i8_and1(ptr %x, i16 %y) {
910 ; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and1:
912 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
913 ; CHECK-NEXT: vle8.v v9, (a0)
914 ; CHECK-NEXT: andi a0, a1, 254
915 ; CHECK-NEXT: vwmulsu.vx v8, v9, a0
917 %a = load <8 x i8>, ptr %x
919 %c = insertelement <8 x i16> poison, i16 %b, i32 0
920 %d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer
921 %e = sext <8 x i8> %a to <8 x i16>
922 %f = mul <8 x i16> %d, %e
926 define <4 x i32> @vwmulsu_vx_v4i32_i16_and(ptr %x, i32 %y) {
927 ; CHECK-LABEL: vwmulsu_vx_v4i32_i16_and:
929 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
930 ; CHECK-NEXT: vle16.v v9, (a0)
931 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
933 %a = load <4 x i16>, ptr %x
934 %b = and i32 %y, 65535
935 %c = insertelement <4 x i32> poison, i32 %b, i32 0
936 %d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer
937 %e = sext <4 x i16> %a to <4 x i32>
938 %f = mul <4 x i32> %d, %e
942 define <4 x i32> @vwmulsu_vx_v4i32_i16_zext(ptr %x, i16 %y) {
943 ; CHECK-LABEL: vwmulsu_vx_v4i32_i16_zext:
945 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
946 ; CHECK-NEXT: vle16.v v9, (a0)
947 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
949 %a = load <4 x i16>, ptr %x
950 %b = zext i16 %y to i32
951 %c = insertelement <4 x i32> poison, i32 %b, i32 0
952 %d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer
953 %e = sext <4 x i16> %a to <4 x i32>
954 %f = mul <4 x i32> %d, %e