1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define <2 x i16> @vwmul_v2i16(ptr %x, ptr %y) {
6 ; CHECK-LABEL: vwmul_v2i16:
8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
9 ; CHECK-NEXT: vle8.v v9, (a0)
10 ; CHECK-NEXT: vle8.v v10, (a1)
11 ; CHECK-NEXT: vwmul.vv v8, v9, v10
13 %a = load <2 x i8>, ptr %x
14 %b = load <2 x i8>, ptr %y
15 %c = sext <2 x i8> %a to <2 x i16>
16 %d = sext <2 x i8> %b to <2 x i16>
17 %e = mul <2 x i16> %c, %d
21 define <2 x i16> @vwmul_v2i16_multiple_users(ptr %x, ptr %y, ptr %z) {
22 ; CHECK-LABEL: vwmul_v2i16_multiple_users:
24 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
25 ; CHECK-NEXT: vle8.v v8, (a0)
26 ; CHECK-NEXT: vle8.v v9, (a1)
27 ; CHECK-NEXT: vle8.v v10, (a2)
28 ; CHECK-NEXT: vwmul.vv v11, v8, v9
29 ; CHECK-NEXT: vwmul.vv v9, v8, v10
30 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
31 ; CHECK-NEXT: vor.vv v8, v11, v9
33 %a = load <2 x i8>, ptr %x
34 %b = load <2 x i8>, ptr %y
35 %b2 = load <2 x i8>, ptr %z
36 %c = sext <2 x i8> %a to <2 x i16>
37 %d = sext <2 x i8> %b to <2 x i16>
38 %d2 = sext <2 x i8> %b2 to <2 x i16>
39 %e = mul <2 x i16> %c, %d
40 %f = mul <2 x i16> %c, %d2
41 %g = or <2 x i16> %e, %f
45 define <4 x i16> @vwmul_v4i16(ptr %x, ptr %y) {
46 ; CHECK-LABEL: vwmul_v4i16:
48 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
49 ; CHECK-NEXT: vle8.v v9, (a0)
50 ; CHECK-NEXT: vle8.v v10, (a1)
51 ; CHECK-NEXT: vwmul.vv v8, v9, v10
53 %a = load <4 x i8>, ptr %x
54 %b = load <4 x i8>, ptr %y
55 %c = sext <4 x i8> %a to <4 x i16>
56 %d = sext <4 x i8> %b to <4 x i16>
57 %e = mul <4 x i16> %c, %d
61 define <2 x i32> @vwmul_v2i32(ptr %x, ptr %y) {
62 ; CHECK-LABEL: vwmul_v2i32:
64 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
65 ; CHECK-NEXT: vle16.v v9, (a0)
66 ; CHECK-NEXT: vle16.v v10, (a1)
67 ; CHECK-NEXT: vwmul.vv v8, v9, v10
69 %a = load <2 x i16>, ptr %x
70 %b = load <2 x i16>, ptr %y
71 %c = sext <2 x i16> %a to <2 x i32>
72 %d = sext <2 x i16> %b to <2 x i32>
73 %e = mul <2 x i32> %c, %d
77 define <8 x i16> @vwmul_v8i16(ptr %x, ptr %y) {
78 ; CHECK-LABEL: vwmul_v8i16:
80 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
81 ; CHECK-NEXT: vle8.v v9, (a0)
82 ; CHECK-NEXT: vle8.v v10, (a1)
83 ; CHECK-NEXT: vwmul.vv v8, v9, v10
85 %a = load <8 x i8>, ptr %x
86 %b = load <8 x i8>, ptr %y
87 %c = sext <8 x i8> %a to <8 x i16>
88 %d = sext <8 x i8> %b to <8 x i16>
89 %e = mul <8 x i16> %c, %d
93 define <4 x i32> @vwmul_v4i32(ptr %x, ptr %y) {
94 ; CHECK-LABEL: vwmul_v4i32:
96 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
97 ; CHECK-NEXT: vle16.v v9, (a0)
98 ; CHECK-NEXT: vle16.v v10, (a1)
99 ; CHECK-NEXT: vwmul.vv v8, v9, v10
101 %a = load <4 x i16>, ptr %x
102 %b = load <4 x i16>, ptr %y
103 %c = sext <4 x i16> %a to <4 x i32>
104 %d = sext <4 x i16> %b to <4 x i32>
105 %e = mul <4 x i32> %c, %d
109 define <2 x i64> @vwmul_v2i64(ptr %x, ptr %y) {
110 ; CHECK-LABEL: vwmul_v2i64:
112 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
113 ; CHECK-NEXT: vle32.v v9, (a0)
114 ; CHECK-NEXT: vle32.v v10, (a1)
115 ; CHECK-NEXT: vwmul.vv v8, v9, v10
117 %a = load <2 x i32>, ptr %x
118 %b = load <2 x i32>, ptr %y
119 %c = sext <2 x i32> %a to <2 x i64>
120 %d = sext <2 x i32> %b to <2 x i64>
121 %e = mul <2 x i64> %c, %d
125 define <16 x i16> @vwmul_v16i16(ptr %x, ptr %y) {
126 ; CHECK-LABEL: vwmul_v16i16:
128 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
129 ; CHECK-NEXT: vle8.v v10, (a0)
130 ; CHECK-NEXT: vle8.v v11, (a1)
131 ; CHECK-NEXT: vwmul.vv v8, v10, v11
133 %a = load <16 x i8>, ptr %x
134 %b = load <16 x i8>, ptr %y
135 %c = sext <16 x i8> %a to <16 x i16>
136 %d = sext <16 x i8> %b to <16 x i16>
137 %e = mul <16 x i16> %c, %d
141 define <8 x i32> @vwmul_v8i32(ptr %x, ptr %y) {
142 ; CHECK-LABEL: vwmul_v8i32:
144 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
145 ; CHECK-NEXT: vle16.v v10, (a0)
146 ; CHECK-NEXT: vle16.v v11, (a1)
147 ; CHECK-NEXT: vwmul.vv v8, v10, v11
149 %a = load <8 x i16>, ptr %x
150 %b = load <8 x i16>, ptr %y
151 %c = sext <8 x i16> %a to <8 x i32>
152 %d = sext <8 x i16> %b to <8 x i32>
153 %e = mul <8 x i32> %c, %d
157 define <4 x i64> @vwmul_v4i64(ptr %x, ptr %y) {
158 ; CHECK-LABEL: vwmul_v4i64:
160 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
161 ; CHECK-NEXT: vle32.v v10, (a0)
162 ; CHECK-NEXT: vle32.v v11, (a1)
163 ; CHECK-NEXT: vwmul.vv v8, v10, v11
165 %a = load <4 x i32>, ptr %x
166 %b = load <4 x i32>, ptr %y
167 %c = sext <4 x i32> %a to <4 x i64>
168 %d = sext <4 x i32> %b to <4 x i64>
169 %e = mul <4 x i64> %c, %d
173 define <32 x i16> @vwmul_v32i16(ptr %x, ptr %y) {
174 ; CHECK-LABEL: vwmul_v32i16:
176 ; CHECK-NEXT: li a2, 32
177 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
178 ; CHECK-NEXT: vle8.v v12, (a0)
179 ; CHECK-NEXT: vle8.v v14, (a1)
180 ; CHECK-NEXT: vwmul.vv v8, v12, v14
182 %a = load <32 x i8>, ptr %x
183 %b = load <32 x i8>, ptr %y
184 %c = sext <32 x i8> %a to <32 x i16>
185 %d = sext <32 x i8> %b to <32 x i16>
186 %e = mul <32 x i16> %c, %d
190 define <16 x i32> @vwmul_v16i32(ptr %x, ptr %y) {
191 ; CHECK-LABEL: vwmul_v16i32:
193 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
194 ; CHECK-NEXT: vle16.v v12, (a0)
195 ; CHECK-NEXT: vle16.v v14, (a1)
196 ; CHECK-NEXT: vwmul.vv v8, v12, v14
198 %a = load <16 x i16>, ptr %x
199 %b = load <16 x i16>, ptr %y
200 %c = sext <16 x i16> %a to <16 x i32>
201 %d = sext <16 x i16> %b to <16 x i32>
202 %e = mul <16 x i32> %c, %d
206 define <8 x i64> @vwmul_v8i64(ptr %x, ptr %y) {
207 ; CHECK-LABEL: vwmul_v8i64:
209 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
210 ; CHECK-NEXT: vle32.v v12, (a0)
211 ; CHECK-NEXT: vle32.v v14, (a1)
212 ; CHECK-NEXT: vwmul.vv v8, v12, v14
214 %a = load <8 x i32>, ptr %x
215 %b = load <8 x i32>, ptr %y
216 %c = sext <8 x i32> %a to <8 x i64>
217 %d = sext <8 x i32> %b to <8 x i64>
218 %e = mul <8 x i64> %c, %d
222 define <64 x i16> @vwmul_v64i16(ptr %x, ptr %y) {
223 ; CHECK-LABEL: vwmul_v64i16:
225 ; CHECK-NEXT: li a2, 64
226 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
227 ; CHECK-NEXT: vle8.v v16, (a0)
228 ; CHECK-NEXT: vle8.v v20, (a1)
229 ; CHECK-NEXT: vwmul.vv v8, v16, v20
231 %a = load <64 x i8>, ptr %x
232 %b = load <64 x i8>, ptr %y
233 %c = sext <64 x i8> %a to <64 x i16>
234 %d = sext <64 x i8> %b to <64 x i16>
235 %e = mul <64 x i16> %c, %d
239 define <32 x i32> @vwmul_v32i32(ptr %x, ptr %y) {
240 ; CHECK-LABEL: vwmul_v32i32:
242 ; CHECK-NEXT: li a2, 32
243 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
244 ; CHECK-NEXT: vle16.v v16, (a0)
245 ; CHECK-NEXT: vle16.v v20, (a1)
246 ; CHECK-NEXT: vwmul.vv v8, v16, v20
248 %a = load <32 x i16>, ptr %x
249 %b = load <32 x i16>, ptr %y
250 %c = sext <32 x i16> %a to <32 x i32>
251 %d = sext <32 x i16> %b to <32 x i32>
252 %e = mul <32 x i32> %c, %d
256 define <16 x i64> @vwmul_v16i64(ptr %x, ptr %y) {
257 ; CHECK-LABEL: vwmul_v16i64:
259 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
260 ; CHECK-NEXT: vle32.v v16, (a0)
261 ; CHECK-NEXT: vle32.v v20, (a1)
262 ; CHECK-NEXT: vwmul.vv v8, v16, v20
264 %a = load <16 x i32>, ptr %x
265 %b = load <16 x i32>, ptr %y
266 %c = sext <16 x i32> %a to <16 x i64>
267 %d = sext <16 x i32> %b to <16 x i64>
268 %e = mul <16 x i64> %c, %d
272 define <128 x i16> @vwmul_v128i16(ptr %x, ptr %y) {
273 ; CHECK-LABEL: vwmul_v128i16:
275 ; CHECK-NEXT: addi sp, sp, -16
276 ; CHECK-NEXT: .cfi_def_cfa_offset 16
277 ; CHECK-NEXT: csrr a2, vlenb
278 ; CHECK-NEXT: slli a2, a2, 4
279 ; CHECK-NEXT: sub sp, sp, a2
280 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
281 ; CHECK-NEXT: li a2, 128
282 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
283 ; CHECK-NEXT: vle8.v v8, (a0)
284 ; CHECK-NEXT: addi a0, sp, 16
285 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
286 ; CHECK-NEXT: vle8.v v0, (a1)
287 ; CHECK-NEXT: li a0, 64
288 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
289 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
290 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
291 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
292 ; CHECK-NEXT: vmv4r.v v24, v8
293 ; CHECK-NEXT: vwmul.vv v8, v16, v24
294 ; CHECK-NEXT: csrr a0, vlenb
295 ; CHECK-NEXT: slli a0, a0, 3
296 ; CHECK-NEXT: add a0, sp, a0
297 ; CHECK-NEXT: addi a0, a0, 16
298 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
299 ; CHECK-NEXT: addi a0, sp, 16
300 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
301 ; CHECK-NEXT: vwmul.vv v8, v16, v0
302 ; CHECK-NEXT: csrr a0, vlenb
303 ; CHECK-NEXT: slli a0, a0, 3
304 ; CHECK-NEXT: add a0, sp, a0
305 ; CHECK-NEXT: addi a0, a0, 16
306 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
307 ; CHECK-NEXT: csrr a0, vlenb
308 ; CHECK-NEXT: slli a0, a0, 4
309 ; CHECK-NEXT: add sp, sp, a0
310 ; CHECK-NEXT: addi sp, sp, 16
312 %a = load <128 x i8>, ptr %x
313 %b = load <128 x i8>, ptr %y
314 %c = sext <128 x i8> %a to <128 x i16>
315 %d = sext <128 x i8> %b to <128 x i16>
316 %e = mul <128 x i16> %c, %d
320 define <64 x i32> @vwmul_v64i32(ptr %x, ptr %y) {
321 ; CHECK-LABEL: vwmul_v64i32:
323 ; CHECK-NEXT: addi sp, sp, -16
324 ; CHECK-NEXT: .cfi_def_cfa_offset 16
325 ; CHECK-NEXT: csrr a2, vlenb
326 ; CHECK-NEXT: slli a2, a2, 4
327 ; CHECK-NEXT: sub sp, sp, a2
328 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
329 ; CHECK-NEXT: li a2, 64
330 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
331 ; CHECK-NEXT: vle16.v v8, (a0)
332 ; CHECK-NEXT: addi a0, sp, 16
333 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
334 ; CHECK-NEXT: vle16.v v0, (a1)
335 ; CHECK-NEXT: li a0, 32
336 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
337 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
338 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
339 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
340 ; CHECK-NEXT: vmv4r.v v24, v8
341 ; CHECK-NEXT: vwmul.vv v8, v16, v24
342 ; CHECK-NEXT: csrr a0, vlenb
343 ; CHECK-NEXT: slli a0, a0, 3
344 ; CHECK-NEXT: add a0, sp, a0
345 ; CHECK-NEXT: addi a0, a0, 16
346 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
347 ; CHECK-NEXT: addi a0, sp, 16
348 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
349 ; CHECK-NEXT: vwmul.vv v8, v16, v0
350 ; CHECK-NEXT: csrr a0, vlenb
351 ; CHECK-NEXT: slli a0, a0, 3
352 ; CHECK-NEXT: add a0, sp, a0
353 ; CHECK-NEXT: addi a0, a0, 16
354 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
355 ; CHECK-NEXT: csrr a0, vlenb
356 ; CHECK-NEXT: slli a0, a0, 4
357 ; CHECK-NEXT: add sp, sp, a0
358 ; CHECK-NEXT: addi sp, sp, 16
360 %a = load <64 x i16>, ptr %x
361 %b = load <64 x i16>, ptr %y
362 %c = sext <64 x i16> %a to <64 x i32>
363 %d = sext <64 x i16> %b to <64 x i32>
364 %e = mul <64 x i32> %c, %d
368 define <32 x i64> @vwmul_v32i64(ptr %x, ptr %y) {
369 ; CHECK-LABEL: vwmul_v32i64:
371 ; CHECK-NEXT: addi sp, sp, -16
372 ; CHECK-NEXT: .cfi_def_cfa_offset 16
373 ; CHECK-NEXT: csrr a2, vlenb
374 ; CHECK-NEXT: slli a2, a2, 4
375 ; CHECK-NEXT: sub sp, sp, a2
376 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
377 ; CHECK-NEXT: li a2, 32
378 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
379 ; CHECK-NEXT: vle32.v v8, (a0)
380 ; CHECK-NEXT: addi a0, sp, 16
381 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
382 ; CHECK-NEXT: vle32.v v0, (a1)
383 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
384 ; CHECK-NEXT: vslidedown.vi v16, v8, 16
385 ; CHECK-NEXT: vslidedown.vi v8, v0, 16
386 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
387 ; CHECK-NEXT: vmv4r.v v24, v8
388 ; CHECK-NEXT: vwmul.vv v8, v16, v24
389 ; CHECK-NEXT: csrr a0, vlenb
390 ; CHECK-NEXT: slli a0, a0, 3
391 ; CHECK-NEXT: add a0, sp, a0
392 ; CHECK-NEXT: addi a0, a0, 16
393 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
394 ; CHECK-NEXT: addi a0, sp, 16
395 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
396 ; CHECK-NEXT: vwmul.vv v8, v16, v0
397 ; CHECK-NEXT: csrr a0, vlenb
398 ; CHECK-NEXT: slli a0, a0, 3
399 ; CHECK-NEXT: add a0, sp, a0
400 ; CHECK-NEXT: addi a0, a0, 16
401 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
402 ; CHECK-NEXT: csrr a0, vlenb
403 ; CHECK-NEXT: slli a0, a0, 4
404 ; CHECK-NEXT: add sp, sp, a0
405 ; CHECK-NEXT: addi sp, sp, 16
407 %a = load <32 x i32>, ptr %x
408 %b = load <32 x i32>, ptr %y
409 %c = sext <32 x i32> %a to <32 x i64>
410 %d = sext <32 x i32> %b to <32 x i64>
411 %e = mul <32 x i64> %c, %d
415 define <2 x i32> @vwmul_v2i32_v2i8(ptr %x, ptr %y) {
416 ; CHECK-LABEL: vwmul_v2i32_v2i8:
418 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
419 ; CHECK-NEXT: vle8.v v8, (a1)
420 ; CHECK-NEXT: vle8.v v9, (a0)
421 ; CHECK-NEXT: vsext.vf2 v10, v8
422 ; CHECK-NEXT: vsext.vf2 v11, v9
423 ; CHECK-NEXT: vwmul.vv v8, v11, v10
425 %a = load <2 x i8>, ptr %x
426 %b = load <2 x i8>, ptr %y
427 %c = sext <2 x i8> %a to <2 x i32>
428 %d = sext <2 x i8> %b to <2 x i32>
429 %e = mul <2 x i32> %c, %d
433 define <4 x i32> @vwmul_v4i32_v4i8_v4i16(ptr %x, ptr %y) {
434 ; CHECK-LABEL: vwmul_v4i32_v4i8_v4i16:
436 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
437 ; CHECK-NEXT: vle8.v v8, (a0)
438 ; CHECK-NEXT: vle16.v v9, (a1)
439 ; CHECK-NEXT: vsext.vf2 v10, v8
440 ; CHECK-NEXT: vwmul.vv v8, v10, v9
442 %a = load <4 x i8>, ptr %x
443 %b = load <4 x i16>, ptr %y
444 %c = sext <4 x i8> %a to <4 x i32>
445 %d = sext <4 x i16> %b to <4 x i32>
446 %e = mul <4 x i32> %c, %d
450 define <4 x i64> @vwmul_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
451 ; CHECK-LABEL: vwmul_v4i64_v4i32_v4i8:
453 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
454 ; CHECK-NEXT: vle8.v v8, (a1)
455 ; CHECK-NEXT: vle32.v v10, (a0)
456 ; CHECK-NEXT: vsext.vf4 v11, v8
457 ; CHECK-NEXT: vwmul.vv v8, v10, v11
459 %a = load <4 x i32>, ptr %x
460 %b = load <4 x i8>, ptr %y
461 %c = sext <4 x i32> %a to <4 x i64>
462 %d = sext <4 x i8> %b to <4 x i64>
463 %e = mul <4 x i64> %c, %d
467 define <2 x i16> @vwmul_vx_v2i16(ptr %x, i8 %y) {
468 ; CHECK-LABEL: vwmul_vx_v2i16:
470 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
471 ; CHECK-NEXT: vle8.v v9, (a0)
472 ; CHECK-NEXT: vwmul.vx v8, v9, a1
474 %a = load <2 x i8>, ptr %x
475 %b = insertelement <2 x i8> poison, i8 %y, i32 0
476 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
477 %d = sext <2 x i8> %a to <2 x i16>
478 %e = sext <2 x i8> %c to <2 x i16>
479 %f = mul <2 x i16> %d, %e
483 define <4 x i16> @vwmul_vx_v4i16(ptr %x, i8 %y) {
484 ; CHECK-LABEL: vwmul_vx_v4i16:
486 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
487 ; CHECK-NEXT: vle8.v v9, (a0)
488 ; CHECK-NEXT: vwmul.vx v8, v9, a1
490 %a = load <4 x i8>, ptr %x
491 %b = insertelement <4 x i8> poison, i8 %y, i32 0
492 %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
493 %d = sext <4 x i8> %a to <4 x i16>
494 %e = sext <4 x i8> %c to <4 x i16>
495 %f = mul <4 x i16> %d, %e
499 define <2 x i32> @vwmul_vx_v2i32(ptr %x, i16 %y) {
500 ; CHECK-LABEL: vwmul_vx_v2i32:
502 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
503 ; CHECK-NEXT: vle16.v v9, (a0)
504 ; CHECK-NEXT: vwmul.vx v8, v9, a1
506 %a = load <2 x i16>, ptr %x
507 %b = insertelement <2 x i16> poison, i16 %y, i32 0
508 %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
509 %d = sext <2 x i16> %a to <2 x i32>
510 %e = sext <2 x i16> %c to <2 x i32>
511 %f = mul <2 x i32> %d, %e
515 define <8 x i16> @vwmul_vx_v8i16(ptr %x, i8 %y) {
516 ; CHECK-LABEL: vwmul_vx_v8i16:
518 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
519 ; CHECK-NEXT: vle8.v v9, (a0)
520 ; CHECK-NEXT: vwmul.vx v8, v9, a1
522 %a = load <8 x i8>, ptr %x
523 %b = insertelement <8 x i8> poison, i8 %y, i32 0
524 %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
525 %d = sext <8 x i8> %a to <8 x i16>
526 %e = sext <8 x i8> %c to <8 x i16>
527 %f = mul <8 x i16> %d, %e
531 define <4 x i32> @vwmul_vx_v4i32(ptr %x, i16 %y) {
532 ; CHECK-LABEL: vwmul_vx_v4i32:
534 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
535 ; CHECK-NEXT: vle16.v v9, (a0)
536 ; CHECK-NEXT: vwmul.vx v8, v9, a1
538 %a = load <4 x i16>, ptr %x
539 %b = insertelement <4 x i16> poison, i16 %y, i32 0
540 %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
541 %d = sext <4 x i16> %a to <4 x i32>
542 %e = sext <4 x i16> %c to <4 x i32>
543 %f = mul <4 x i32> %d, %e
547 define <2 x i64> @vwmul_vx_v2i64(ptr %x, i32 %y) {
548 ; CHECK-LABEL: vwmul_vx_v2i64:
550 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
551 ; CHECK-NEXT: vle32.v v9, (a0)
552 ; CHECK-NEXT: vwmul.vx v8, v9, a1
554 %a = load <2 x i32>, ptr %x
555 %b = insertelement <2 x i32> poison, i32 %y, i64 0
556 %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
557 %d = sext <2 x i32> %a to <2 x i64>
558 %e = sext <2 x i32> %c to <2 x i64>
559 %f = mul <2 x i64> %d, %e
563 define <16 x i16> @vwmul_vx_v16i16(ptr %x, i8 %y) {
564 ; CHECK-LABEL: vwmul_vx_v16i16:
566 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
567 ; CHECK-NEXT: vle8.v v10, (a0)
568 ; CHECK-NEXT: vwmul.vx v8, v10, a1
570 %a = load <16 x i8>, ptr %x
571 %b = insertelement <16 x i8> poison, i8 %y, i32 0
572 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
573 %d = sext <16 x i8> %a to <16 x i16>
574 %e = sext <16 x i8> %c to <16 x i16>
575 %f = mul <16 x i16> %d, %e
579 define <8 x i32> @vwmul_vx_v8i32(ptr %x, i16 %y) {
580 ; CHECK-LABEL: vwmul_vx_v8i32:
582 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
583 ; CHECK-NEXT: vle16.v v10, (a0)
584 ; CHECK-NEXT: vwmul.vx v8, v10, a1
586 %a = load <8 x i16>, ptr %x
587 %b = insertelement <8 x i16> poison, i16 %y, i32 0
588 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
589 %d = sext <8 x i16> %a to <8 x i32>
590 %e = sext <8 x i16> %c to <8 x i32>
591 %f = mul <8 x i32> %d, %e
595 define <4 x i64> @vwmul_vx_v4i64(ptr %x, i32 %y) {
596 ; CHECK-LABEL: vwmul_vx_v4i64:
598 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
599 ; CHECK-NEXT: vle32.v v10, (a0)
600 ; CHECK-NEXT: vwmul.vx v8, v10, a1
602 %a = load <4 x i32>, ptr %x
603 %b = insertelement <4 x i32> poison, i32 %y, i64 0
604 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
605 %d = sext <4 x i32> %a to <4 x i64>
606 %e = sext <4 x i32> %c to <4 x i64>
607 %f = mul <4 x i64> %d, %e
611 define <32 x i16> @vwmul_vx_v32i16(ptr %x, i8 %y) {
612 ; CHECK-LABEL: vwmul_vx_v32i16:
614 ; CHECK-NEXT: li a2, 32
615 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
616 ; CHECK-NEXT: vle8.v v12, (a0)
617 ; CHECK-NEXT: vwmul.vx v8, v12, a1
619 %a = load <32 x i8>, ptr %x
620 %b = insertelement <32 x i8> poison, i8 %y, i32 0
621 %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
622 %d = sext <32 x i8> %a to <32 x i16>
623 %e = sext <32 x i8> %c to <32 x i16>
624 %f = mul <32 x i16> %d, %e
628 define <16 x i32> @vwmul_vx_v16i32(ptr %x, i16 %y) {
629 ; CHECK-LABEL: vwmul_vx_v16i32:
631 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
632 ; CHECK-NEXT: vle16.v v12, (a0)
633 ; CHECK-NEXT: vwmul.vx v8, v12, a1
635 %a = load <16 x i16>, ptr %x
636 %b = insertelement <16 x i16> poison, i16 %y, i32 0
637 %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
638 %d = sext <16 x i16> %a to <16 x i32>
639 %e = sext <16 x i16> %c to <16 x i32>
640 %f = mul <16 x i32> %d, %e
644 define <8 x i64> @vwmul_vx_v8i64(ptr %x, i32 %y) {
645 ; CHECK-LABEL: vwmul_vx_v8i64:
647 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
648 ; CHECK-NEXT: vle32.v v12, (a0)
649 ; CHECK-NEXT: vwmul.vx v8, v12, a1
651 %a = load <8 x i32>, ptr %x
652 %b = insertelement <8 x i32> poison, i32 %y, i64 0
653 %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
654 %d = sext <8 x i32> %a to <8 x i64>
655 %e = sext <8 x i32> %c to <8 x i64>
656 %f = mul <8 x i64> %d, %e
660 define <64 x i16> @vwmul_vx_v64i16(ptr %x, i8 %y) {
661 ; CHECK-LABEL: vwmul_vx_v64i16:
663 ; CHECK-NEXT: li a2, 64
664 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
665 ; CHECK-NEXT: vle8.v v16, (a0)
666 ; CHECK-NEXT: vwmul.vx v8, v16, a1
668 %a = load <64 x i8>, ptr %x
669 %b = insertelement <64 x i8> poison, i8 %y, i32 0
670 %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
671 %d = sext <64 x i8> %a to <64 x i16>
672 %e = sext <64 x i8> %c to <64 x i16>
673 %f = mul <64 x i16> %d, %e
677 define <32 x i32> @vwmul_vx_v32i32(ptr %x, i16 %y) {
678 ; CHECK-LABEL: vwmul_vx_v32i32:
680 ; CHECK-NEXT: li a2, 32
681 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
682 ; CHECK-NEXT: vle16.v v16, (a0)
683 ; CHECK-NEXT: vwmul.vx v8, v16, a1
685 %a = load <32 x i16>, ptr %x
686 %b = insertelement <32 x i16> poison, i16 %y, i32 0
687 %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
688 %d = sext <32 x i16> %a to <32 x i32>
689 %e = sext <32 x i16> %c to <32 x i32>
690 %f = mul <32 x i32> %d, %e
694 define <16 x i64> @vwmul_vx_v16i64(ptr %x, i32 %y) {
695 ; CHECK-LABEL: vwmul_vx_v16i64:
697 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
698 ; CHECK-NEXT: vle32.v v16, (a0)
699 ; CHECK-NEXT: vwmul.vx v8, v16, a1
701 %a = load <16 x i32>, ptr %x
702 %b = insertelement <16 x i32> poison, i32 %y, i64 0
703 %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
704 %d = sext <16 x i32> %a to <16 x i64>
705 %e = sext <16 x i32> %c to <16 x i64>
706 %f = mul <16 x i64> %d, %e
710 define <8 x i16> @vwmul_vx_v8i16_i8(ptr %x, ptr %y) {
711 ; CHECK-LABEL: vwmul_vx_v8i16_i8:
713 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
714 ; CHECK-NEXT: vle8.v v9, (a0)
715 ; CHECK-NEXT: lb a0, 0(a1)
716 ; CHECK-NEXT: vwmul.vx v8, v9, a0
718 %a = load <8 x i8>, ptr %x
720 %c = sext i8 %b to i16
721 %d = insertelement <8 x i16> poison, i16 %c, i32 0
722 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
723 %f = sext <8 x i8> %a to <8 x i16>
724 %g = mul <8 x i16> %e, %f
728 define <8 x i16> @vwmul_vx_v8i16_i16(ptr %x, ptr %y) {
729 ; CHECK-LABEL: vwmul_vx_v8i16_i16:
731 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
732 ; CHECK-NEXT: vle8.v v8, (a0)
733 ; CHECK-NEXT: lh a0, 0(a1)
734 ; CHECK-NEXT: vsext.vf2 v9, v8
735 ; CHECK-NEXT: vmul.vx v8, v9, a0
737 %a = load <8 x i8>, ptr %x
738 %b = load i16, ptr %y
739 %d = insertelement <8 x i16> poison, i16 %b, i32 0
740 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
741 %f = sext <8 x i8> %a to <8 x i16>
742 %g = mul <8 x i16> %e, %f
746 define <4 x i32> @vwmul_vx_v4i32_i8(ptr %x, ptr %y) {
747 ; CHECK-LABEL: vwmul_vx_v4i32_i8:
749 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
750 ; CHECK-NEXT: vle16.v v9, (a0)
751 ; CHECK-NEXT: lb a0, 0(a1)
752 ; CHECK-NEXT: vwmul.vx v8, v9, a0
754 %a = load <4 x i16>, ptr %x
756 %c = sext i8 %b to i32
757 %d = insertelement <4 x i32> poison, i32 %c, i32 0
758 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
759 %f = sext <4 x i16> %a to <4 x i32>
760 %g = mul <4 x i32> %e, %f
764 define <4 x i32> @vwmul_vx_v4i32_i16(ptr %x, ptr %y) {
765 ; CHECK-LABEL: vwmul_vx_v4i32_i16:
767 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
768 ; CHECK-NEXT: vle16.v v9, (a0)
769 ; CHECK-NEXT: lh a0, 0(a1)
770 ; CHECK-NEXT: vwmul.vx v8, v9, a0
772 %a = load <4 x i16>, ptr %x
773 %b = load i16, ptr %y
774 %c = sext i16 %b to i32
775 %d = insertelement <4 x i32> poison, i32 %c, i32 0
776 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
777 %f = sext <4 x i16> %a to <4 x i32>
778 %g = mul <4 x i32> %e, %f
782 define <4 x i32> @vwmul_vx_v4i32_i32(ptr %x, ptr %y) {
783 ; CHECK-LABEL: vwmul_vx_v4i32_i32:
785 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
786 ; CHECK-NEXT: vle16.v v8, (a0)
787 ; CHECK-NEXT: lw a0, 0(a1)
788 ; CHECK-NEXT: vsext.vf2 v9, v8
789 ; CHECK-NEXT: vmul.vx v8, v9, a0
791 %a = load <4 x i16>, ptr %x
792 %b = load i32, ptr %y
793 %d = insertelement <4 x i32> poison, i32 %b, i32 0
794 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
795 %f = sext <4 x i16> %a to <4 x i32>
796 %g = mul <4 x i32> %e, %f
800 define <2 x i64> @vwmul_vx_v2i64_i8(ptr %x, ptr %y) {
801 ; RV32-LABEL: vwmul_vx_v2i64_i8:
803 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
804 ; RV32-NEXT: vle32.v v8, (a0)
805 ; RV32-NEXT: lb a0, 0(a1)
806 ; RV32-NEXT: vsext.vf2 v9, v8
807 ; RV32-NEXT: vmul.vx v8, v9, a0
810 ; RV64-LABEL: vwmul_vx_v2i64_i8:
812 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
813 ; RV64-NEXT: vle32.v v9, (a0)
814 ; RV64-NEXT: lb a0, 0(a1)
815 ; RV64-NEXT: vwmul.vx v8, v9, a0
817 %a = load <2 x i32>, ptr %x
819 %c = sext i8 %b to i64
820 %d = insertelement <2 x i64> poison, i64 %c, i64 0
821 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
822 %f = sext <2 x i32> %a to <2 x i64>
823 %g = mul <2 x i64> %e, %f
827 define <2 x i64> @vwmul_vx_v2i64_i16(ptr %x, ptr %y) {
828 ; RV32-LABEL: vwmul_vx_v2i64_i16:
830 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
831 ; RV32-NEXT: vle32.v v8, (a0)
832 ; RV32-NEXT: lh a0, 0(a1)
833 ; RV32-NEXT: vsext.vf2 v9, v8
834 ; RV32-NEXT: vmul.vx v8, v9, a0
837 ; RV64-LABEL: vwmul_vx_v2i64_i16:
839 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
840 ; RV64-NEXT: vle32.v v9, (a0)
841 ; RV64-NEXT: lh a0, 0(a1)
842 ; RV64-NEXT: vwmul.vx v8, v9, a0
844 %a = load <2 x i32>, ptr %x
845 %b = load i16, ptr %y
846 %c = sext i16 %b to i64
847 %d = insertelement <2 x i64> poison, i64 %c, i64 0
848 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
849 %f = sext <2 x i32> %a to <2 x i64>
850 %g = mul <2 x i64> %e, %f
854 define <2 x i64> @vwmul_vx_v2i64_i32(ptr %x, ptr %y) {
855 ; RV32-LABEL: vwmul_vx_v2i64_i32:
857 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
858 ; RV32-NEXT: vle32.v v8, (a0)
859 ; RV32-NEXT: lw a0, 0(a1)
860 ; RV32-NEXT: vsext.vf2 v9, v8
861 ; RV32-NEXT: vmul.vx v8, v9, a0
864 ; RV64-LABEL: vwmul_vx_v2i64_i32:
866 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
867 ; RV64-NEXT: vle32.v v9, (a0)
868 ; RV64-NEXT: lw a0, 0(a1)
869 ; RV64-NEXT: vwmul.vx v8, v9, a0
871 %a = load <2 x i32>, ptr %x
872 %b = load i32, ptr %y
873 %c = sext i32 %b to i64
874 %d = insertelement <2 x i64> poison, i64 %c, i64 0
875 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
876 %f = sext <2 x i32> %a to <2 x i64>
877 %g = mul <2 x i64> %e, %f
881 define <2 x i64> @vwmul_vx_v2i64_i64(ptr %x, ptr %y) {
882 ; RV32-LABEL: vwmul_vx_v2i64_i64:
884 ; RV32-NEXT: addi sp, sp, -16
885 ; RV32-NEXT: .cfi_def_cfa_offset 16
886 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
887 ; RV32-NEXT: lw a2, 4(a1)
888 ; RV32-NEXT: lw a1, 0(a1)
889 ; RV32-NEXT: vle32.v v8, (a0)
890 ; RV32-NEXT: sw a2, 12(sp)
891 ; RV32-NEXT: sw a1, 8(sp)
892 ; RV32-NEXT: addi a0, sp, 8
893 ; RV32-NEXT: vlse64.v v9, (a0), zero
894 ; RV32-NEXT: vsext.vf2 v10, v8
895 ; RV32-NEXT: vmul.vv v8, v9, v10
896 ; RV32-NEXT: addi sp, sp, 16
899 ; RV64-LABEL: vwmul_vx_v2i64_i64:
901 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
902 ; RV64-NEXT: vle32.v v8, (a0)
903 ; RV64-NEXT: ld a0, 0(a1)
904 ; RV64-NEXT: vsext.vf2 v9, v8
905 ; RV64-NEXT: vmul.vx v8, v9, a0
907 %a = load <2 x i32>, ptr %x
908 %b = load i64, ptr %y
909 %d = insertelement <2 x i64> poison, i64 %b, i64 0
910 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
911 %f = sext <2 x i32> %a to <2 x i64>
912 %g = mul <2 x i64> %e, %f