1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define <2 x i16> @vwmulsu_v2i16(ptr %x, ptr %y) {
6 ; CHECK-LABEL: vwmulsu_v2i16:
8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
9 ; CHECK-NEXT: vle8.v v9, (a0)
10 ; CHECK-NEXT: vle8.v v10, (a1)
11 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
13 %a = load <2 x i8>, ptr %x
14 %b = load <2 x i8>, ptr %y
15 %c = zext <2 x i8> %a to <2 x i16>
16 %d = sext <2 x i8> %b to <2 x i16>
17 %e = mul <2 x i16> %c, %d
21 define <2 x i16> @vwmulsu_v2i16_swap(ptr %x, ptr %y) {
22 ; CHECK-LABEL: vwmulsu_v2i16_swap:
24 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
25 ; CHECK-NEXT: vle8.v v9, (a0)
26 ; CHECK-NEXT: vle8.v v10, (a1)
27 ; CHECK-NEXT: vwmulsu.vv v8, v9, v10
29 %a = load <2 x i8>, ptr %x
30 %b = load <2 x i8>, ptr %y
31 %c = sext <2 x i8> %a to <2 x i16>
32 %d = zext <2 x i8> %b to <2 x i16>
33 %e = mul <2 x i16> %c, %d
37 define <4 x i16> @vwmulsu_v4i16(ptr %x, ptr %y) {
38 ; CHECK-LABEL: vwmulsu_v4i16:
40 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
41 ; CHECK-NEXT: vle8.v v9, (a0)
42 ; CHECK-NEXT: vle8.v v10, (a1)
43 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
45 %a = load <4 x i8>, ptr %x
46 %b = load <4 x i8>, ptr %y
47 %c = zext <4 x i8> %a to <4 x i16>
48 %d = sext <4 x i8> %b to <4 x i16>
49 %e = mul <4 x i16> %c, %d
53 define <2 x i32> @vwmulsu_v2i32(ptr %x, ptr %y) {
54 ; CHECK-LABEL: vwmulsu_v2i32:
56 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
57 ; CHECK-NEXT: vle16.v v9, (a0)
58 ; CHECK-NEXT: vle16.v v10, (a1)
59 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
61 %a = load <2 x i16>, ptr %x
62 %b = load <2 x i16>, ptr %y
63 %c = zext <2 x i16> %a to <2 x i32>
64 %d = sext <2 x i16> %b to <2 x i32>
65 %e = mul <2 x i32> %c, %d
69 define <8 x i16> @vwmulsu_v8i16(ptr %x, ptr %y) {
70 ; CHECK-LABEL: vwmulsu_v8i16:
72 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
73 ; CHECK-NEXT: vle8.v v9, (a0)
74 ; CHECK-NEXT: vle8.v v10, (a1)
75 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
77 %a = load <8 x i8>, ptr %x
78 %b = load <8 x i8>, ptr %y
79 %c = zext <8 x i8> %a to <8 x i16>
80 %d = sext <8 x i8> %b to <8 x i16>
81 %e = mul <8 x i16> %c, %d
85 define <4 x i32> @vwmulsu_v4i32(ptr %x, ptr %y) {
86 ; CHECK-LABEL: vwmulsu_v4i32:
88 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
89 ; CHECK-NEXT: vle16.v v9, (a0)
90 ; CHECK-NEXT: vle16.v v10, (a1)
91 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
93 %a = load <4 x i16>, ptr %x
94 %b = load <4 x i16>, ptr %y
95 %c = zext <4 x i16> %a to <4 x i32>
96 %d = sext <4 x i16> %b to <4 x i32>
97 %e = mul <4 x i32> %c, %d
101 define <2 x i64> @vwmulsu_v2i64(ptr %x, ptr %y) {
102 ; CHECK-LABEL: vwmulsu_v2i64:
104 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
105 ; CHECK-NEXT: vle32.v v9, (a0)
106 ; CHECK-NEXT: vle32.v v10, (a1)
107 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
109 %a = load <2 x i32>, ptr %x
110 %b = load <2 x i32>, ptr %y
111 %c = zext <2 x i32> %a to <2 x i64>
112 %d = sext <2 x i32> %b to <2 x i64>
113 %e = mul <2 x i64> %c, %d
117 define <16 x i16> @vwmulsu_v16i16(ptr %x, ptr %y) {
118 ; CHECK-LABEL: vwmulsu_v16i16:
120 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
121 ; CHECK-NEXT: vle8.v v10, (a0)
122 ; CHECK-NEXT: vle8.v v11, (a1)
123 ; CHECK-NEXT: vwmulsu.vv v8, v11, v10
125 %a = load <16 x i8>, ptr %x
126 %b = load <16 x i8>, ptr %y
127 %c = zext <16 x i8> %a to <16 x i16>
128 %d = sext <16 x i8> %b to <16 x i16>
129 %e = mul <16 x i16> %c, %d
133 define <8 x i32> @vwmulsu_v8i32(ptr %x, ptr %y) {
134 ; CHECK-LABEL: vwmulsu_v8i32:
136 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
137 ; CHECK-NEXT: vle16.v v10, (a0)
138 ; CHECK-NEXT: vle16.v v11, (a1)
139 ; CHECK-NEXT: vwmulsu.vv v8, v11, v10
141 %a = load <8 x i16>, ptr %x
142 %b = load <8 x i16>, ptr %y
143 %c = zext <8 x i16> %a to <8 x i32>
144 %d = sext <8 x i16> %b to <8 x i32>
145 %e = mul <8 x i32> %c, %d
149 define <4 x i64> @vwmulsu_v4i64(ptr %x, ptr %y) {
150 ; CHECK-LABEL: vwmulsu_v4i64:
152 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
153 ; CHECK-NEXT: vle32.v v10, (a0)
154 ; CHECK-NEXT: vle32.v v11, (a1)
155 ; CHECK-NEXT: vwmulsu.vv v8, v11, v10
157 %a = load <4 x i32>, ptr %x
158 %b = load <4 x i32>, ptr %y
159 %c = zext <4 x i32> %a to <4 x i64>
160 %d = sext <4 x i32> %b to <4 x i64>
161 %e = mul <4 x i64> %c, %d
165 define <32 x i16> @vwmulsu_v32i16(ptr %x, ptr %y) {
166 ; CHECK-LABEL: vwmulsu_v32i16:
168 ; CHECK-NEXT: li a2, 32
169 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
170 ; CHECK-NEXT: vle8.v v12, (a0)
171 ; CHECK-NEXT: vle8.v v14, (a1)
172 ; CHECK-NEXT: vwmulsu.vv v8, v14, v12
174 %a = load <32 x i8>, ptr %x
175 %b = load <32 x i8>, ptr %y
176 %c = zext <32 x i8> %a to <32 x i16>
177 %d = sext <32 x i8> %b to <32 x i16>
178 %e = mul <32 x i16> %c, %d
182 define <16 x i32> @vwmulsu_v16i32(ptr %x, ptr %y) {
183 ; CHECK-LABEL: vwmulsu_v16i32:
185 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
186 ; CHECK-NEXT: vle16.v v12, (a0)
187 ; CHECK-NEXT: vle16.v v14, (a1)
188 ; CHECK-NEXT: vwmulsu.vv v8, v14, v12
190 %a = load <16 x i16>, ptr %x
191 %b = load <16 x i16>, ptr %y
192 %c = zext <16 x i16> %a to <16 x i32>
193 %d = sext <16 x i16> %b to <16 x i32>
194 %e = mul <16 x i32> %c, %d
198 define <8 x i64> @vwmulsu_v8i64(ptr %x, ptr %y) {
199 ; CHECK-LABEL: vwmulsu_v8i64:
201 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
202 ; CHECK-NEXT: vle32.v v12, (a0)
203 ; CHECK-NEXT: vle32.v v14, (a1)
204 ; CHECK-NEXT: vwmulsu.vv v8, v14, v12
206 %a = load <8 x i32>, ptr %x
207 %b = load <8 x i32>, ptr %y
208 %c = zext <8 x i32> %a to <8 x i64>
209 %d = sext <8 x i32> %b to <8 x i64>
210 %e = mul <8 x i64> %c, %d
214 define <64 x i16> @vwmulsu_v64i16(ptr %x, ptr %y) {
215 ; CHECK-LABEL: vwmulsu_v64i16:
217 ; CHECK-NEXT: li a2, 64
218 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
219 ; CHECK-NEXT: vle8.v v16, (a0)
220 ; CHECK-NEXT: vle8.v v20, (a1)
221 ; CHECK-NEXT: vwmulsu.vv v8, v20, v16
223 %a = load <64 x i8>, ptr %x
224 %b = load <64 x i8>, ptr %y
225 %c = zext <64 x i8> %a to <64 x i16>
226 %d = sext <64 x i8> %b to <64 x i16>
227 %e = mul <64 x i16> %c, %d
231 define <32 x i32> @vwmulsu_v32i32(ptr %x, ptr %y) {
232 ; CHECK-LABEL: vwmulsu_v32i32:
234 ; CHECK-NEXT: li a2, 32
235 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
236 ; CHECK-NEXT: vle16.v v16, (a0)
237 ; CHECK-NEXT: vle16.v v20, (a1)
238 ; CHECK-NEXT: vwmulsu.vv v8, v20, v16
240 %a = load <32 x i16>, ptr %x
241 %b = load <32 x i16>, ptr %y
242 %c = zext <32 x i16> %a to <32 x i32>
243 %d = sext <32 x i16> %b to <32 x i32>
244 %e = mul <32 x i32> %c, %d
248 define <16 x i64> @vwmulsu_v16i64(ptr %x, ptr %y) {
249 ; CHECK-LABEL: vwmulsu_v16i64:
251 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
252 ; CHECK-NEXT: vle32.v v16, (a0)
253 ; CHECK-NEXT: vle32.v v20, (a1)
254 ; CHECK-NEXT: vwmulsu.vv v8, v20, v16
256 %a = load <16 x i32>, ptr %x
257 %b = load <16 x i32>, ptr %y
258 %c = zext <16 x i32> %a to <16 x i64>
259 %d = sext <16 x i32> %b to <16 x i64>
260 %e = mul <16 x i64> %c, %d
264 define <128 x i16> @vwmulsu_v128i16(ptr %x, ptr %y) {
265 ; CHECK-LABEL: vwmulsu_v128i16:
267 ; CHECK-NEXT: addi sp, sp, -16
268 ; CHECK-NEXT: .cfi_def_cfa_offset 16
269 ; CHECK-NEXT: csrr a2, vlenb
270 ; CHECK-NEXT: slli a2, a2, 4
271 ; CHECK-NEXT: sub sp, sp, a2
272 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
273 ; CHECK-NEXT: li a2, 128
274 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
275 ; CHECK-NEXT: vle8.v v8, (a0)
276 ; CHECK-NEXT: addi a0, sp, 16
277 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
278 ; CHECK-NEXT: vle8.v v0, (a1)
279 ; CHECK-NEXT: li a0, 64
280 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
281 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
282 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
283 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
284 ; CHECK-NEXT: vwmulsu.vv v24, v8, v16
285 ; CHECK-NEXT: csrr a0, vlenb
286 ; CHECK-NEXT: slli a0, a0, 3
287 ; CHECK-NEXT: add a0, sp, a0
288 ; CHECK-NEXT: addi a0, a0, 16
289 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
290 ; CHECK-NEXT: addi a0, sp, 16
291 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
292 ; CHECK-NEXT: vwmulsu.vv v8, v0, v16
293 ; CHECK-NEXT: csrr a0, vlenb
294 ; CHECK-NEXT: slli a0, a0, 3
295 ; CHECK-NEXT: add a0, sp, a0
296 ; CHECK-NEXT: addi a0, a0, 16
297 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
298 ; CHECK-NEXT: csrr a0, vlenb
299 ; CHECK-NEXT: slli a0, a0, 4
300 ; CHECK-NEXT: add sp, sp, a0
301 ; CHECK-NEXT: addi sp, sp, 16
303 %a = load <128 x i8>, ptr %x
304 %b = load <128 x i8>, ptr %y
305 %c = zext <128 x i8> %a to <128 x i16>
306 %d = sext <128 x i8> %b to <128 x i16>
307 %e = mul <128 x i16> %c, %d
311 define <64 x i32> @vwmulsu_v64i32(ptr %x, ptr %y) {
312 ; CHECK-LABEL: vwmulsu_v64i32:
314 ; CHECK-NEXT: addi sp, sp, -16
315 ; CHECK-NEXT: .cfi_def_cfa_offset 16
316 ; CHECK-NEXT: csrr a2, vlenb
317 ; CHECK-NEXT: slli a2, a2, 4
318 ; CHECK-NEXT: sub sp, sp, a2
319 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
320 ; CHECK-NEXT: li a2, 64
321 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
322 ; CHECK-NEXT: vle16.v v8, (a0)
323 ; CHECK-NEXT: addi a0, sp, 16
324 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
325 ; CHECK-NEXT: vle16.v v0, (a1)
326 ; CHECK-NEXT: li a0, 32
327 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
328 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
329 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
330 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
331 ; CHECK-NEXT: vwmulsu.vv v24, v8, v16
332 ; CHECK-NEXT: csrr a0, vlenb
333 ; CHECK-NEXT: slli a0, a0, 3
334 ; CHECK-NEXT: add a0, sp, a0
335 ; CHECK-NEXT: addi a0, a0, 16
336 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
337 ; CHECK-NEXT: addi a0, sp, 16
338 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
339 ; CHECK-NEXT: vwmulsu.vv v8, v0, v16
340 ; CHECK-NEXT: csrr a0, vlenb
341 ; CHECK-NEXT: slli a0, a0, 3
342 ; CHECK-NEXT: add a0, sp, a0
343 ; CHECK-NEXT: addi a0, a0, 16
344 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
345 ; CHECK-NEXT: csrr a0, vlenb
346 ; CHECK-NEXT: slli a0, a0, 4
347 ; CHECK-NEXT: add sp, sp, a0
348 ; CHECK-NEXT: addi sp, sp, 16
350 %a = load <64 x i16>, ptr %x
351 %b = load <64 x i16>, ptr %y
352 %c = zext <64 x i16> %a to <64 x i32>
353 %d = sext <64 x i16> %b to <64 x i32>
354 %e = mul <64 x i32> %c, %d
358 define <32 x i64> @vwmulsu_v32i64(ptr %x, ptr %y) {
359 ; CHECK-LABEL: vwmulsu_v32i64:
361 ; CHECK-NEXT: addi sp, sp, -16
362 ; CHECK-NEXT: .cfi_def_cfa_offset 16
363 ; CHECK-NEXT: csrr a2, vlenb
364 ; CHECK-NEXT: slli a2, a2, 4
365 ; CHECK-NEXT: sub sp, sp, a2
366 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
367 ; CHECK-NEXT: li a2, 32
368 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
369 ; CHECK-NEXT: vle32.v v8, (a0)
370 ; CHECK-NEXT: addi a0, sp, 16
371 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
372 ; CHECK-NEXT: vle32.v v0, (a1)
373 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
374 ; CHECK-NEXT: vslidedown.vi v16, v8, 16
375 ; CHECK-NEXT: vslidedown.vi v8, v0, 16
376 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
377 ; CHECK-NEXT: vwmulsu.vv v24, v8, v16
378 ; CHECK-NEXT: csrr a0, vlenb
379 ; CHECK-NEXT: slli a0, a0, 3
380 ; CHECK-NEXT: add a0, sp, a0
381 ; CHECK-NEXT: addi a0, a0, 16
382 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
383 ; CHECK-NEXT: addi a0, sp, 16
384 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
385 ; CHECK-NEXT: vwmulsu.vv v8, v0, v16
386 ; CHECK-NEXT: csrr a0, vlenb
387 ; CHECK-NEXT: slli a0, a0, 3
388 ; CHECK-NEXT: add a0, sp, a0
389 ; CHECK-NEXT: addi a0, a0, 16
390 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
391 ; CHECK-NEXT: csrr a0, vlenb
392 ; CHECK-NEXT: slli a0, a0, 4
393 ; CHECK-NEXT: add sp, sp, a0
394 ; CHECK-NEXT: addi sp, sp, 16
396 %a = load <32 x i32>, ptr %x
397 %b = load <32 x i32>, ptr %y
398 %c = zext <32 x i32> %a to <32 x i64>
399 %d = sext <32 x i32> %b to <32 x i64>
400 %e = mul <32 x i64> %c, %d
404 define <2 x i32> @vwmulsu_v2i32_v2i8(ptr %x, ptr %y) {
405 ; CHECK-LABEL: vwmulsu_v2i32_v2i8:
407 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
408 ; CHECK-NEXT: vle8.v v8, (a0)
409 ; CHECK-NEXT: vle8.v v9, (a1)
410 ; CHECK-NEXT: vzext.vf2 v10, v8
411 ; CHECK-NEXT: vsext.vf2 v11, v9
412 ; CHECK-NEXT: vwmulsu.vv v8, v11, v10
414 %a = load <2 x i8>, ptr %x
415 %b = load <2 x i8>, ptr %y
416 %c = zext <2 x i8> %a to <2 x i32>
417 %d = sext <2 x i8> %b to <2 x i32>
418 %e = mul <2 x i32> %c, %d
422 define <4 x i32> @vwmulsu_v4i32_v4i8_v4i16(ptr %x, ptr %y) {
423 ; CHECK-LABEL: vwmulsu_v4i32_v4i8_v4i16:
425 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
426 ; CHECK-NEXT: vle8.v v8, (a0)
427 ; CHECK-NEXT: vle16.v v9, (a1)
428 ; CHECK-NEXT: vzext.vf2 v10, v8
429 ; CHECK-NEXT: vwmulsu.vv v8, v9, v10
431 %a = load <4 x i8>, ptr %x
432 %b = load <4 x i16>, ptr %y
433 %c = zext <4 x i8> %a to <4 x i32>
434 %d = sext <4 x i16> %b to <4 x i32>
435 %e = mul <4 x i32> %c, %d
439 define <4 x i64> @vwmulsu_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
440 ; CHECK-LABEL: vwmulsu_v4i64_v4i32_v4i8:
442 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
443 ; CHECK-NEXT: vle8.v v8, (a1)
444 ; CHECK-NEXT: vle32.v v10, (a0)
445 ; CHECK-NEXT: vsext.vf4 v11, v8
446 ; CHECK-NEXT: vwmulsu.vv v8, v11, v10
448 %a = load <4 x i32>, ptr %x
449 %b = load <4 x i8>, ptr %y
450 %c = zext <4 x i32> %a to <4 x i64>
451 %d = sext <4 x i8> %b to <4 x i64>
452 %e = mul <4 x i64> %c, %d
456 define <2 x i16> @vwmulsu_vx_v2i16(ptr %x, i8 %y) {
457 ; CHECK-LABEL: vwmulsu_vx_v2i16:
459 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
460 ; CHECK-NEXT: vle8.v v9, (a0)
461 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
463 %a = load <2 x i8>, ptr %x
464 %b = insertelement <2 x i8> poison, i8 %y, i32 0
465 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
466 %d = sext <2 x i8> %a to <2 x i16>
467 %e = zext <2 x i8> %c to <2 x i16>
468 %f = mul <2 x i16> %d, %e
472 define <2 x i16> @vwmulsu_vx_v2i16_swap(ptr %x, i8 %y) {
473 ; CHECK-LABEL: vwmulsu_vx_v2i16_swap:
475 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
476 ; CHECK-NEXT: vle8.v v9, (a0)
477 ; CHECK-NEXT: vmv.v.x v10, a1
478 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
480 %a = load <2 x i8>, ptr %x
481 %b = insertelement <2 x i8> poison, i8 %y, i32 0
482 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
483 %d = zext <2 x i8> %a to <2 x i16>
484 %e = sext <2 x i8> %c to <2 x i16>
485 %f = mul <2 x i16> %d, %e
489 define <4 x i16> @vwmulsu_vx_v4i16(ptr %x, i8 %y) {
490 ; CHECK-LABEL: vwmulsu_vx_v4i16:
492 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
493 ; CHECK-NEXT: vle8.v v9, (a0)
494 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
496 %a = load <4 x i8>, ptr %x
497 %b = insertelement <4 x i8> poison, i8 %y, i32 0
498 %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
499 %d = sext <4 x i8> %a to <4 x i16>
500 %e = zext <4 x i8> %c to <4 x i16>
501 %f = mul <4 x i16> %d, %e
505 define <2 x i32> @vwmulsu_vx_v2i32(ptr %x, i16 %y) {
506 ; CHECK-LABEL: vwmulsu_vx_v2i32:
508 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
509 ; CHECK-NEXT: vle16.v v9, (a0)
510 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
512 %a = load <2 x i16>, ptr %x
513 %b = insertelement <2 x i16> poison, i16 %y, i32 0
514 %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
515 %d = sext <2 x i16> %a to <2 x i32>
516 %e = zext <2 x i16> %c to <2 x i32>
517 %f = mul <2 x i32> %d, %e
521 define <8 x i16> @vwmulsu_vx_v8i16(ptr %x, i8 %y) {
522 ; CHECK-LABEL: vwmulsu_vx_v8i16:
524 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
525 ; CHECK-NEXT: vle8.v v9, (a0)
526 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
528 %a = load <8 x i8>, ptr %x
529 %b = insertelement <8 x i8> poison, i8 %y, i32 0
530 %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
531 %d = sext <8 x i8> %a to <8 x i16>
532 %e = zext <8 x i8> %c to <8 x i16>
533 %f = mul <8 x i16> %d, %e
537 define <4 x i32> @vwmulsu_vx_v4i32(ptr %x, i16 %y) {
538 ; CHECK-LABEL: vwmulsu_vx_v4i32:
540 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
541 ; CHECK-NEXT: vle16.v v9, (a0)
542 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
544 %a = load <4 x i16>, ptr %x
545 %b = insertelement <4 x i16> poison, i16 %y, i32 0
546 %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
547 %d = sext <4 x i16> %a to <4 x i32>
548 %e = zext <4 x i16> %c to <4 x i32>
549 %f = mul <4 x i32> %d, %e
553 define <2 x i64> @vwmulsu_vx_v2i64(ptr %x, i32 %y) {
554 ; CHECK-LABEL: vwmulsu_vx_v2i64:
556 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
557 ; CHECK-NEXT: vle32.v v9, (a0)
558 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
560 %a = load <2 x i32>, ptr %x
561 %b = insertelement <2 x i32> poison, i32 %y, i64 0
562 %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
563 %d = sext <2 x i32> %a to <2 x i64>
564 %e = zext <2 x i32> %c to <2 x i64>
565 %f = mul <2 x i64> %d, %e
569 define <16 x i16> @vwmulsu_vx_v16i16(ptr %x, i8 %y) {
570 ; CHECK-LABEL: vwmulsu_vx_v16i16:
572 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
573 ; CHECK-NEXT: vle8.v v10, (a0)
574 ; CHECK-NEXT: vwmulsu.vx v8, v10, a1
576 %a = load <16 x i8>, ptr %x
577 %b = insertelement <16 x i8> poison, i8 %y, i32 0
578 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
579 %d = sext <16 x i8> %a to <16 x i16>
580 %e = zext <16 x i8> %c to <16 x i16>
581 %f = mul <16 x i16> %d, %e
585 define <8 x i32> @vwmulsu_vx_v8i32(ptr %x, i16 %y) {
586 ; CHECK-LABEL: vwmulsu_vx_v8i32:
588 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
589 ; CHECK-NEXT: vle16.v v10, (a0)
590 ; CHECK-NEXT: vwmulsu.vx v8, v10, a1
592 %a = load <8 x i16>, ptr %x
593 %b = insertelement <8 x i16> poison, i16 %y, i32 0
594 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
595 %d = sext <8 x i16> %a to <8 x i32>
596 %e = zext <8 x i16> %c to <8 x i32>
597 %f = mul <8 x i32> %d, %e
601 define <4 x i64> @vwmulsu_vx_v4i64(ptr %x, i32 %y) {
602 ; CHECK-LABEL: vwmulsu_vx_v4i64:
604 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
605 ; CHECK-NEXT: vle32.v v10, (a0)
606 ; CHECK-NEXT: vwmulsu.vx v8, v10, a1
608 %a = load <4 x i32>, ptr %x
609 %b = insertelement <4 x i32> poison, i32 %y, i64 0
610 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
611 %d = sext <4 x i32> %a to <4 x i64>
612 %e = zext <4 x i32> %c to <4 x i64>
613 %f = mul <4 x i64> %d, %e
617 define <32 x i16> @vwmulsu_vx_v32i16(ptr %x, i8 %y) {
618 ; CHECK-LABEL: vwmulsu_vx_v32i16:
620 ; CHECK-NEXT: li a2, 32
621 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
622 ; CHECK-NEXT: vle8.v v12, (a0)
623 ; CHECK-NEXT: vwmulsu.vx v8, v12, a1
625 %a = load <32 x i8>, ptr %x
626 %b = insertelement <32 x i8> poison, i8 %y, i32 0
627 %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
628 %d = sext <32 x i8> %a to <32 x i16>
629 %e = zext <32 x i8> %c to <32 x i16>
630 %f = mul <32 x i16> %d, %e
634 define <16 x i32> @vwmulsu_vx_v16i32(ptr %x, i16 %y) {
635 ; CHECK-LABEL: vwmulsu_vx_v16i32:
637 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
638 ; CHECK-NEXT: vle16.v v12, (a0)
639 ; CHECK-NEXT: vwmulsu.vx v8, v12, a1
641 %a = load <16 x i16>, ptr %x
642 %b = insertelement <16 x i16> poison, i16 %y, i32 0
643 %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
644 %d = sext <16 x i16> %a to <16 x i32>
645 %e = zext <16 x i16> %c to <16 x i32>
646 %f = mul <16 x i32> %d, %e
650 define <8 x i64> @vwmulsu_vx_v8i64(ptr %x, i32 %y) {
651 ; CHECK-LABEL: vwmulsu_vx_v8i64:
653 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
654 ; CHECK-NEXT: vle32.v v12, (a0)
655 ; CHECK-NEXT: vwmulsu.vx v8, v12, a1
657 %a = load <8 x i32>, ptr %x
658 %b = insertelement <8 x i32> poison, i32 %y, i64 0
659 %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
660 %d = sext <8 x i32> %a to <8 x i64>
661 %e = zext <8 x i32> %c to <8 x i64>
662 %f = mul <8 x i64> %d, %e
666 define <64 x i16> @vwmulsu_vx_v64i16(ptr %x, i8 %y) {
667 ; CHECK-LABEL: vwmulsu_vx_v64i16:
669 ; CHECK-NEXT: li a2, 64
670 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
671 ; CHECK-NEXT: vle8.v v16, (a0)
672 ; CHECK-NEXT: vwmulsu.vx v8, v16, a1
674 %a = load <64 x i8>, ptr %x
675 %b = insertelement <64 x i8> poison, i8 %y, i32 0
676 %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
677 %d = sext <64 x i8> %a to <64 x i16>
678 %e = zext <64 x i8> %c to <64 x i16>
679 %f = mul <64 x i16> %d, %e
683 define <32 x i32> @vwmulsu_vx_v32i32(ptr %x, i16 %y) {
684 ; CHECK-LABEL: vwmulsu_vx_v32i32:
686 ; CHECK-NEXT: li a2, 32
687 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
688 ; CHECK-NEXT: vle16.v v16, (a0)
689 ; CHECK-NEXT: vwmulsu.vx v8, v16, a1
691 %a = load <32 x i16>, ptr %x
692 %b = insertelement <32 x i16> poison, i16 %y, i32 0
693 %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
694 %d = sext <32 x i16> %a to <32 x i32>
695 %e = zext <32 x i16> %c to <32 x i32>
696 %f = mul <32 x i32> %d, %e
700 define <16 x i64> @vwmulsu_vx_v16i64(ptr %x, i32 %y) {
701 ; CHECK-LABEL: vwmulsu_vx_v16i64:
703 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
704 ; CHECK-NEXT: vle32.v v16, (a0)
705 ; CHECK-NEXT: vwmulsu.vx v8, v16, a1
707 %a = load <16 x i32>, ptr %x
708 %b = insertelement <16 x i32> poison, i32 %y, i64 0
709 %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
710 %d = sext <16 x i32> %a to <16 x i64>
711 %e = zext <16 x i32> %c to <16 x i64>
712 %f = mul <16 x i64> %d, %e
716 define <8 x i16> @vwmulsu_vx_v8i16_i8(ptr %x, ptr %y) {
717 ; CHECK-LABEL: vwmulsu_vx_v8i16_i8:
719 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
720 ; CHECK-NEXT: vle8.v v9, (a0)
721 ; CHECK-NEXT: lbu a0, 0(a1)
722 ; CHECK-NEXT: vwmulsu.vx v8, v9, a0
724 %a = load <8 x i8>, ptr %x
726 %c = zext i8 %b to i16
727 %d = insertelement <8 x i16> poison, i16 %c, i32 0
728 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
729 %f = sext <8 x i8> %a to <8 x i16>
730 %g = mul <8 x i16> %e, %f
734 define <8 x i16> @vwmulsu_vx_v8i16_i8_swap(ptr %x, ptr %y) {
735 ; CHECK-LABEL: vwmulsu_vx_v8i16_i8_swap:
737 ; CHECK-NEXT: lb a1, 0(a1)
738 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
739 ; CHECK-NEXT: vle8.v v9, (a0)
740 ; CHECK-NEXT: vmv.v.x v10, a1
741 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9
743 %a = load <8 x i8>, ptr %x
745 %c = sext i8 %b to i16
746 %d = insertelement <8 x i16> poison, i16 %c, i32 0
747 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
748 %f = zext <8 x i8> %a to <8 x i16>
749 %g = mul <8 x i16> %e, %f
753 define <4 x i32> @vwmulsu_vx_v4i32_i8(ptr %x, ptr %y) {
754 ; CHECK-LABEL: vwmulsu_vx_v4i32_i8:
756 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
757 ; CHECK-NEXT: vle16.v v9, (a0)
758 ; CHECK-NEXT: lbu a0, 0(a1)
759 ; CHECK-NEXT: vwmul.vx v8, v9, a0
761 %a = load <4 x i16>, ptr %x
763 %c = zext i8 %b to i32
764 %d = insertelement <4 x i32> poison, i32 %c, i32 0
765 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
766 %f = sext <4 x i16> %a to <4 x i32>
767 %g = mul <4 x i32> %e, %f
771 define <4 x i32> @vwmulsu_vx_v4i32_i16(ptr %x, ptr %y) {
772 ; CHECK-LABEL: vwmulsu_vx_v4i32_i16:
774 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
775 ; CHECK-NEXT: vle16.v v9, (a0)
776 ; CHECK-NEXT: lhu a0, 0(a1)
777 ; CHECK-NEXT: vwmulsu.vx v8, v9, a0
779 %a = load <4 x i16>, ptr %x
780 %b = load i16, ptr %y
781 %c = zext i16 %b to i32
782 %d = insertelement <4 x i32> poison, i32 %c, i32 0
783 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
784 %f = sext <4 x i16> %a to <4 x i32>
785 %g = mul <4 x i32> %e, %f
789 define <2 x i64> @vwmulsu_vx_v2i64_i8(ptr %x, ptr %y) {
790 ; RV32-LABEL: vwmulsu_vx_v2i64_i8:
792 ; RV32-NEXT: addi sp, sp, -16
793 ; RV32-NEXT: .cfi_def_cfa_offset 16
794 ; RV32-NEXT: lbu a1, 0(a1)
795 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
796 ; RV32-NEXT: vle32.v v8, (a0)
797 ; RV32-NEXT: sw zero, 12(sp)
798 ; RV32-NEXT: sw a1, 8(sp)
799 ; RV32-NEXT: addi a0, sp, 8
800 ; RV32-NEXT: vlse64.v v9, (a0), zero
801 ; RV32-NEXT: vsext.vf2 v10, v8
802 ; RV32-NEXT: vmul.vv v8, v9, v10
803 ; RV32-NEXT: addi sp, sp, 16
806 ; RV64-LABEL: vwmulsu_vx_v2i64_i8:
808 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
809 ; RV64-NEXT: vle32.v v9, (a0)
810 ; RV64-NEXT: lbu a0, 0(a1)
811 ; RV64-NEXT: vwmul.vx v8, v9, a0
813 %a = load <2 x i32>, ptr %x
815 %c = zext i8 %b to i64
816 %d = insertelement <2 x i64> poison, i64 %c, i64 0
817 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
818 %f = sext <2 x i32> %a to <2 x i64>
819 %g = mul <2 x i64> %e, %f
823 define <2 x i64> @vwmulsu_vx_v2i64_i16(ptr %x, ptr %y) {
824 ; RV32-LABEL: vwmulsu_vx_v2i64_i16:
826 ; RV32-NEXT: addi sp, sp, -16
827 ; RV32-NEXT: .cfi_def_cfa_offset 16
828 ; RV32-NEXT: lhu a1, 0(a1)
829 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
830 ; RV32-NEXT: vle32.v v8, (a0)
831 ; RV32-NEXT: sw zero, 12(sp)
832 ; RV32-NEXT: sw a1, 8(sp)
833 ; RV32-NEXT: addi a0, sp, 8
834 ; RV32-NEXT: vlse64.v v9, (a0), zero
835 ; RV32-NEXT: vsext.vf2 v10, v8
836 ; RV32-NEXT: vmul.vv v8, v9, v10
837 ; RV32-NEXT: addi sp, sp, 16
840 ; RV64-LABEL: vwmulsu_vx_v2i64_i16:
842 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
843 ; RV64-NEXT: vle32.v v9, (a0)
844 ; RV64-NEXT: lhu a0, 0(a1)
845 ; RV64-NEXT: vwmul.vx v8, v9, a0
847 %a = load <2 x i32>, ptr %x
848 %b = load i16, ptr %y
849 %c = zext i16 %b to i64
850 %d = insertelement <2 x i64> poison, i64 %c, i64 0
851 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
852 %f = sext <2 x i32> %a to <2 x i64>
853 %g = mul <2 x i64> %e, %f
857 define <2 x i64> @vwmulsu_vx_v2i64_i32(ptr %x, ptr %y) {
858 ; RV32-LABEL: vwmulsu_vx_v2i64_i32:
860 ; RV32-NEXT: addi sp, sp, -16
861 ; RV32-NEXT: .cfi_def_cfa_offset 16
862 ; RV32-NEXT: lw a1, 0(a1)
863 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
864 ; RV32-NEXT: vle32.v v8, (a0)
865 ; RV32-NEXT: sw zero, 12(sp)
866 ; RV32-NEXT: sw a1, 8(sp)
867 ; RV32-NEXT: addi a0, sp, 8
868 ; RV32-NEXT: vlse64.v v9, (a0), zero
869 ; RV32-NEXT: vsext.vf2 v10, v8
870 ; RV32-NEXT: vmul.vv v8, v9, v10
871 ; RV32-NEXT: addi sp, sp, 16
874 ; RV64-LABEL: vwmulsu_vx_v2i64_i32:
876 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
877 ; RV64-NEXT: vle32.v v9, (a0)
878 ; RV64-NEXT: lwu a0, 0(a1)
879 ; RV64-NEXT: vwmulsu.vx v8, v9, a0
881 %a = load <2 x i32>, ptr %x
882 %b = load i32, ptr %y
883 %c = zext i32 %b to i64
884 %d = insertelement <2 x i64> poison, i64 %c, i64 0
885 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
886 %f = sext <2 x i32> %a to <2 x i64>
887 %g = mul <2 x i64> %e, %f
891 define <8 x i16> @vwmulsu_vx_v8i16_i8_and(ptr %x, i16 %y) {
892 ; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and:
894 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
895 ; CHECK-NEXT: vle8.v v9, (a0)
896 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
898 %a = load <8 x i8>, ptr %x
900 %c = insertelement <8 x i16> poison, i16 %b, i32 0
901 %d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer
902 %e = sext <8 x i8> %a to <8 x i16>
903 %f = mul <8 x i16> %d, %e
907 define <8 x i16> @vwmulsu_vx_v8i16_i8_and1(ptr %x, i16 %y) {
908 ; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and1:
910 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
911 ; CHECK-NEXT: vle8.v v9, (a0)
912 ; CHECK-NEXT: andi a0, a1, 254
913 ; CHECK-NEXT: vwmulsu.vx v8, v9, a0
915 %a = load <8 x i8>, ptr %x
917 %c = insertelement <8 x i16> poison, i16 %b, i32 0
918 %d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer
919 %e = sext <8 x i8> %a to <8 x i16>
920 %f = mul <8 x i16> %d, %e
924 define <4 x i32> @vwmulsu_vx_v4i32_i16_and(ptr %x, i32 %y) {
925 ; CHECK-LABEL: vwmulsu_vx_v4i32_i16_and:
927 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
928 ; CHECK-NEXT: vle16.v v9, (a0)
929 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
931 %a = load <4 x i16>, ptr %x
932 %b = and i32 %y, 65535
933 %c = insertelement <4 x i32> poison, i32 %b, i32 0
934 %d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer
935 %e = sext <4 x i16> %a to <4 x i32>
936 %f = mul <4 x i32> %d, %e
940 define <4 x i32> @vwmulsu_vx_v4i32_i16_zext(ptr %x, i16 %y) {
941 ; CHECK-LABEL: vwmulsu_vx_v4i32_i16_zext:
943 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
944 ; CHECK-NEXT: vle16.v v9, (a0)
945 ; CHECK-NEXT: vwmulsu.vx v8, v9, a1
947 %a = load <4 x i16>, ptr %x
948 %b = zext i16 %y to i32
949 %c = insertelement <4 x i32> poison, i32 %b, i32 0
950 %d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer
951 %e = sext <4 x i16> %a to <4 x i32>
952 %f = mul <4 x i32> %d, %e