1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define <2 x i16> @vwmul_v2i16(ptr %x, ptr %y) {
6 ; CHECK-LABEL: vwmul_v2i16:
8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
9 ; CHECK-NEXT: vle8.v v9, (a0)
10 ; CHECK-NEXT: vle8.v v10, (a1)
11 ; CHECK-NEXT: vwmul.vv v8, v9, v10
13 %a = load <2 x i8>, ptr %x
14 %b = load <2 x i8>, ptr %y
15 %c = sext <2 x i8> %a to <2 x i16>
16 %d = sext <2 x i8> %b to <2 x i16>
17 %e = mul <2 x i16> %c, %d
21 define <2 x i16> @vwmul_v2i16_multiple_users(ptr %x, ptr %y, ptr %z) {
22 ; CHECK-LABEL: vwmul_v2i16_multiple_users:
24 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
25 ; CHECK-NEXT: vle8.v v8, (a0)
26 ; CHECK-NEXT: vle8.v v9, (a1)
27 ; CHECK-NEXT: vle8.v v10, (a2)
28 ; CHECK-NEXT: vwmul.vv v11, v8, v9
29 ; CHECK-NEXT: vwmul.vv v9, v8, v10
30 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
31 ; CHECK-NEXT: vor.vv v8, v11, v9
33 %a = load <2 x i8>, ptr %x
34 %b = load <2 x i8>, ptr %y
35 %b2 = load <2 x i8>, ptr %z
36 %c = sext <2 x i8> %a to <2 x i16>
37 %d = sext <2 x i8> %b to <2 x i16>
38 %d2 = sext <2 x i8> %b2 to <2 x i16>
39 %e = mul <2 x i16> %c, %d
40 %f = mul <2 x i16> %c, %d2
41 %g = or <2 x i16> %e, %f
45 define <4 x i16> @vwmul_v4i16(ptr %x, ptr %y) {
46 ; CHECK-LABEL: vwmul_v4i16:
48 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
49 ; CHECK-NEXT: vle8.v v9, (a0)
50 ; CHECK-NEXT: vle8.v v10, (a1)
51 ; CHECK-NEXT: vwmul.vv v8, v9, v10
53 %a = load <4 x i8>, ptr %x
54 %b = load <4 x i8>, ptr %y
55 %c = sext <4 x i8> %a to <4 x i16>
56 %d = sext <4 x i8> %b to <4 x i16>
57 %e = mul <4 x i16> %c, %d
61 define <2 x i32> @vwmul_v2i32(ptr %x, ptr %y) {
62 ; CHECK-LABEL: vwmul_v2i32:
64 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
65 ; CHECK-NEXT: vle16.v v9, (a0)
66 ; CHECK-NEXT: vle16.v v10, (a1)
67 ; CHECK-NEXT: vwmul.vv v8, v9, v10
69 %a = load <2 x i16>, ptr %x
70 %b = load <2 x i16>, ptr %y
71 %c = sext <2 x i16> %a to <2 x i32>
72 %d = sext <2 x i16> %b to <2 x i32>
73 %e = mul <2 x i32> %c, %d
77 define <8 x i16> @vwmul_v8i16(ptr %x, ptr %y) {
78 ; CHECK-LABEL: vwmul_v8i16:
80 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
81 ; CHECK-NEXT: vle8.v v9, (a0)
82 ; CHECK-NEXT: vle8.v v10, (a1)
83 ; CHECK-NEXT: vwmul.vv v8, v9, v10
85 %a = load <8 x i8>, ptr %x
86 %b = load <8 x i8>, ptr %y
87 %c = sext <8 x i8> %a to <8 x i16>
88 %d = sext <8 x i8> %b to <8 x i16>
89 %e = mul <8 x i16> %c, %d
93 define <4 x i32> @vwmul_v4i32(ptr %x, ptr %y) {
94 ; CHECK-LABEL: vwmul_v4i32:
96 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
97 ; CHECK-NEXT: vle16.v v9, (a0)
98 ; CHECK-NEXT: vle16.v v10, (a1)
99 ; CHECK-NEXT: vwmul.vv v8, v9, v10
101 %a = load <4 x i16>, ptr %x
102 %b = load <4 x i16>, ptr %y
103 %c = sext <4 x i16> %a to <4 x i32>
104 %d = sext <4 x i16> %b to <4 x i32>
105 %e = mul <4 x i32> %c, %d
109 define <2 x i64> @vwmul_v2i64(ptr %x, ptr %y) {
110 ; CHECK-LABEL: vwmul_v2i64:
112 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
113 ; CHECK-NEXT: vle32.v v9, (a0)
114 ; CHECK-NEXT: vle32.v v10, (a1)
115 ; CHECK-NEXT: vwmul.vv v8, v9, v10
117 %a = load <2 x i32>, ptr %x
118 %b = load <2 x i32>, ptr %y
119 %c = sext <2 x i32> %a to <2 x i64>
120 %d = sext <2 x i32> %b to <2 x i64>
121 %e = mul <2 x i64> %c, %d
125 define <16 x i16> @vwmul_v16i16(ptr %x, ptr %y) {
126 ; CHECK-LABEL: vwmul_v16i16:
128 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
129 ; CHECK-NEXT: vle8.v v10, (a0)
130 ; CHECK-NEXT: vle8.v v11, (a1)
131 ; CHECK-NEXT: vwmul.vv v8, v10, v11
133 %a = load <16 x i8>, ptr %x
134 %b = load <16 x i8>, ptr %y
135 %c = sext <16 x i8> %a to <16 x i16>
136 %d = sext <16 x i8> %b to <16 x i16>
137 %e = mul <16 x i16> %c, %d
141 define <8 x i32> @vwmul_v8i32(ptr %x, ptr %y) {
142 ; CHECK-LABEL: vwmul_v8i32:
144 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
145 ; CHECK-NEXT: vle16.v v10, (a0)
146 ; CHECK-NEXT: vle16.v v11, (a1)
147 ; CHECK-NEXT: vwmul.vv v8, v10, v11
149 %a = load <8 x i16>, ptr %x
150 %b = load <8 x i16>, ptr %y
151 %c = sext <8 x i16> %a to <8 x i32>
152 %d = sext <8 x i16> %b to <8 x i32>
153 %e = mul <8 x i32> %c, %d
157 define <4 x i64> @vwmul_v4i64(ptr %x, ptr %y) {
158 ; CHECK-LABEL: vwmul_v4i64:
160 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
161 ; CHECK-NEXT: vle32.v v10, (a0)
162 ; CHECK-NEXT: vle32.v v11, (a1)
163 ; CHECK-NEXT: vwmul.vv v8, v10, v11
165 %a = load <4 x i32>, ptr %x
166 %b = load <4 x i32>, ptr %y
167 %c = sext <4 x i32> %a to <4 x i64>
168 %d = sext <4 x i32> %b to <4 x i64>
169 %e = mul <4 x i64> %c, %d
173 define <32 x i16> @vwmul_v32i16(ptr %x, ptr %y) {
174 ; CHECK-LABEL: vwmul_v32i16:
176 ; CHECK-NEXT: li a2, 32
177 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
178 ; CHECK-NEXT: vle8.v v12, (a0)
179 ; CHECK-NEXT: vle8.v v14, (a1)
180 ; CHECK-NEXT: vwmul.vv v8, v12, v14
182 %a = load <32 x i8>, ptr %x
183 %b = load <32 x i8>, ptr %y
184 %c = sext <32 x i8> %a to <32 x i16>
185 %d = sext <32 x i8> %b to <32 x i16>
186 %e = mul <32 x i16> %c, %d
190 define <16 x i32> @vwmul_v16i32(ptr %x, ptr %y) {
191 ; CHECK-LABEL: vwmul_v16i32:
193 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
194 ; CHECK-NEXT: vle16.v v12, (a0)
195 ; CHECK-NEXT: vle16.v v14, (a1)
196 ; CHECK-NEXT: vwmul.vv v8, v12, v14
198 %a = load <16 x i16>, ptr %x
199 %b = load <16 x i16>, ptr %y
200 %c = sext <16 x i16> %a to <16 x i32>
201 %d = sext <16 x i16> %b to <16 x i32>
202 %e = mul <16 x i32> %c, %d
206 define <8 x i64> @vwmul_v8i64(ptr %x, ptr %y) {
207 ; CHECK-LABEL: vwmul_v8i64:
209 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
210 ; CHECK-NEXT: vle32.v v12, (a0)
211 ; CHECK-NEXT: vle32.v v14, (a1)
212 ; CHECK-NEXT: vwmul.vv v8, v12, v14
214 %a = load <8 x i32>, ptr %x
215 %b = load <8 x i32>, ptr %y
216 %c = sext <8 x i32> %a to <8 x i64>
217 %d = sext <8 x i32> %b to <8 x i64>
218 %e = mul <8 x i64> %c, %d
222 define <64 x i16> @vwmul_v64i16(ptr %x, ptr %y) {
223 ; CHECK-LABEL: vwmul_v64i16:
225 ; CHECK-NEXT: li a2, 64
226 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
227 ; CHECK-NEXT: vle8.v v16, (a0)
228 ; CHECK-NEXT: vle8.v v20, (a1)
229 ; CHECK-NEXT: vwmul.vv v8, v16, v20
231 %a = load <64 x i8>, ptr %x
232 %b = load <64 x i8>, ptr %y
233 %c = sext <64 x i8> %a to <64 x i16>
234 %d = sext <64 x i8> %b to <64 x i16>
235 %e = mul <64 x i16> %c, %d
239 define <32 x i32> @vwmul_v32i32(ptr %x, ptr %y) {
240 ; CHECK-LABEL: vwmul_v32i32:
242 ; CHECK-NEXT: li a2, 32
243 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
244 ; CHECK-NEXT: vle16.v v16, (a0)
245 ; CHECK-NEXT: vle16.v v20, (a1)
246 ; CHECK-NEXT: vwmul.vv v8, v16, v20
248 %a = load <32 x i16>, ptr %x
249 %b = load <32 x i16>, ptr %y
250 %c = sext <32 x i16> %a to <32 x i32>
251 %d = sext <32 x i16> %b to <32 x i32>
252 %e = mul <32 x i32> %c, %d
256 define <16 x i64> @vwmul_v16i64(ptr %x, ptr %y) {
257 ; CHECK-LABEL: vwmul_v16i64:
259 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
260 ; CHECK-NEXT: vle32.v v16, (a0)
261 ; CHECK-NEXT: vle32.v v20, (a1)
262 ; CHECK-NEXT: vwmul.vv v8, v16, v20
264 %a = load <16 x i32>, ptr %x
265 %b = load <16 x i32>, ptr %y
266 %c = sext <16 x i32> %a to <16 x i64>
267 %d = sext <16 x i32> %b to <16 x i64>
268 %e = mul <16 x i64> %c, %d
272 define <128 x i16> @vwmul_v128i16(ptr %x, ptr %y) {
273 ; CHECK-LABEL: vwmul_v128i16:
275 ; CHECK-NEXT: addi sp, sp, -16
276 ; CHECK-NEXT: .cfi_def_cfa_offset 16
277 ; CHECK-NEXT: csrr a2, vlenb
278 ; CHECK-NEXT: slli a2, a2, 4
279 ; CHECK-NEXT: sub sp, sp, a2
280 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
281 ; CHECK-NEXT: li a2, 128
282 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
283 ; CHECK-NEXT: vle8.v v8, (a0)
284 ; CHECK-NEXT: addi a0, sp, 16
285 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
286 ; CHECK-NEXT: vle8.v v0, (a1)
287 ; CHECK-NEXT: li a0, 64
288 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
289 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
290 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
291 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
292 ; CHECK-NEXT: vwmul.vv v24, v16, v8
293 ; CHECK-NEXT: csrr a0, vlenb
294 ; CHECK-NEXT: slli a0, a0, 3
295 ; CHECK-NEXT: add a0, sp, a0
296 ; CHECK-NEXT: addi a0, a0, 16
297 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
298 ; CHECK-NEXT: addi a0, sp, 16
299 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
300 ; CHECK-NEXT: vwmul.vv v8, v16, v0
301 ; CHECK-NEXT: csrr a0, vlenb
302 ; CHECK-NEXT: slli a0, a0, 3
303 ; CHECK-NEXT: add a0, sp, a0
304 ; CHECK-NEXT: addi a0, a0, 16
305 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
306 ; CHECK-NEXT: csrr a0, vlenb
307 ; CHECK-NEXT: slli a0, a0, 4
308 ; CHECK-NEXT: add sp, sp, a0
309 ; CHECK-NEXT: addi sp, sp, 16
311 %a = load <128 x i8>, ptr %x
312 %b = load <128 x i8>, ptr %y
313 %c = sext <128 x i8> %a to <128 x i16>
314 %d = sext <128 x i8> %b to <128 x i16>
315 %e = mul <128 x i16> %c, %d
319 define <64 x i32> @vwmul_v64i32(ptr %x, ptr %y) {
320 ; CHECK-LABEL: vwmul_v64i32:
322 ; CHECK-NEXT: addi sp, sp, -16
323 ; CHECK-NEXT: .cfi_def_cfa_offset 16
324 ; CHECK-NEXT: csrr a2, vlenb
325 ; CHECK-NEXT: slli a2, a2, 4
326 ; CHECK-NEXT: sub sp, sp, a2
327 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
328 ; CHECK-NEXT: li a2, 64
329 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
330 ; CHECK-NEXT: vle16.v v8, (a0)
331 ; CHECK-NEXT: addi a0, sp, 16
332 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
333 ; CHECK-NEXT: vle16.v v0, (a1)
334 ; CHECK-NEXT: li a0, 32
335 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
336 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
337 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
338 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
339 ; CHECK-NEXT: vwmul.vv v24, v16, v8
340 ; CHECK-NEXT: csrr a0, vlenb
341 ; CHECK-NEXT: slli a0, a0, 3
342 ; CHECK-NEXT: add a0, sp, a0
343 ; CHECK-NEXT: addi a0, a0, 16
344 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
345 ; CHECK-NEXT: addi a0, sp, 16
346 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
347 ; CHECK-NEXT: vwmul.vv v8, v16, v0
348 ; CHECK-NEXT: csrr a0, vlenb
349 ; CHECK-NEXT: slli a0, a0, 3
350 ; CHECK-NEXT: add a0, sp, a0
351 ; CHECK-NEXT: addi a0, a0, 16
352 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
353 ; CHECK-NEXT: csrr a0, vlenb
354 ; CHECK-NEXT: slli a0, a0, 4
355 ; CHECK-NEXT: add sp, sp, a0
356 ; CHECK-NEXT: addi sp, sp, 16
358 %a = load <64 x i16>, ptr %x
359 %b = load <64 x i16>, ptr %y
360 %c = sext <64 x i16> %a to <64 x i32>
361 %d = sext <64 x i16> %b to <64 x i32>
362 %e = mul <64 x i32> %c, %d
366 define <32 x i64> @vwmul_v32i64(ptr %x, ptr %y) {
367 ; CHECK-LABEL: vwmul_v32i64:
369 ; CHECK-NEXT: addi sp, sp, -16
370 ; CHECK-NEXT: .cfi_def_cfa_offset 16
371 ; CHECK-NEXT: csrr a2, vlenb
372 ; CHECK-NEXT: slli a2, a2, 4
373 ; CHECK-NEXT: sub sp, sp, a2
374 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
375 ; CHECK-NEXT: li a2, 32
376 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
377 ; CHECK-NEXT: vle32.v v8, (a0)
378 ; CHECK-NEXT: addi a0, sp, 16
379 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
380 ; CHECK-NEXT: vle32.v v0, (a1)
381 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
382 ; CHECK-NEXT: vslidedown.vi v16, v8, 16
383 ; CHECK-NEXT: vslidedown.vi v8, v0, 16
384 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
385 ; CHECK-NEXT: vwmul.vv v24, v16, v8
386 ; CHECK-NEXT: csrr a0, vlenb
387 ; CHECK-NEXT: slli a0, a0, 3
388 ; CHECK-NEXT: add a0, sp, a0
389 ; CHECK-NEXT: addi a0, a0, 16
390 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
391 ; CHECK-NEXT: addi a0, sp, 16
392 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
393 ; CHECK-NEXT: vwmul.vv v8, v16, v0
394 ; CHECK-NEXT: csrr a0, vlenb
395 ; CHECK-NEXT: slli a0, a0, 3
396 ; CHECK-NEXT: add a0, sp, a0
397 ; CHECK-NEXT: addi a0, a0, 16
398 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
399 ; CHECK-NEXT: csrr a0, vlenb
400 ; CHECK-NEXT: slli a0, a0, 4
401 ; CHECK-NEXT: add sp, sp, a0
402 ; CHECK-NEXT: addi sp, sp, 16
404 %a = load <32 x i32>, ptr %x
405 %b = load <32 x i32>, ptr %y
406 %c = sext <32 x i32> %a to <32 x i64>
407 %d = sext <32 x i32> %b to <32 x i64>
408 %e = mul <32 x i64> %c, %d
412 define <2 x i32> @vwmul_v2i32_v2i8(ptr %x, ptr %y) {
413 ; CHECK-LABEL: vwmul_v2i32_v2i8:
415 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
416 ; CHECK-NEXT: vle8.v v8, (a0)
417 ; CHECK-NEXT: vle8.v v9, (a1)
418 ; CHECK-NEXT: vsext.vf2 v10, v8
419 ; CHECK-NEXT: vsext.vf2 v11, v9
420 ; CHECK-NEXT: vwmul.vv v8, v10, v11
422 %a = load <2 x i8>, ptr %x
423 %b = load <2 x i8>, ptr %y
424 %c = sext <2 x i8> %a to <2 x i32>
425 %d = sext <2 x i8> %b to <2 x i32>
426 %e = mul <2 x i32> %c, %d
430 define <4 x i32> @vwmul_v4i32_v4i8_v4i16(ptr %x, ptr %y) {
431 ; CHECK-LABEL: vwmul_v4i32_v4i8_v4i16:
433 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
434 ; CHECK-NEXT: vle8.v v8, (a0)
435 ; CHECK-NEXT: vle16.v v9, (a1)
436 ; CHECK-NEXT: vsext.vf2 v10, v8
437 ; CHECK-NEXT: vwmul.vv v8, v10, v9
439 %a = load <4 x i8>, ptr %x
440 %b = load <4 x i16>, ptr %y
441 %c = sext <4 x i8> %a to <4 x i32>
442 %d = sext <4 x i16> %b to <4 x i32>
443 %e = mul <4 x i32> %c, %d
447 define <4 x i64> @vwmul_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
448 ; CHECK-LABEL: vwmul_v4i64_v4i32_v4i8:
450 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
451 ; CHECK-NEXT: vle8.v v8, (a1)
452 ; CHECK-NEXT: vle32.v v10, (a0)
453 ; CHECK-NEXT: vsext.vf4 v11, v8
454 ; CHECK-NEXT: vwmul.vv v8, v10, v11
456 %a = load <4 x i32>, ptr %x
457 %b = load <4 x i8>, ptr %y
458 %c = sext <4 x i32> %a to <4 x i64>
459 %d = sext <4 x i8> %b to <4 x i64>
460 %e = mul <4 x i64> %c, %d
464 define <2 x i16> @vwmul_vx_v2i16(ptr %x, i8 %y) {
465 ; CHECK-LABEL: vwmul_vx_v2i16:
467 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
468 ; CHECK-NEXT: vle8.v v9, (a0)
469 ; CHECK-NEXT: vwmul.vx v8, v9, a1
471 %a = load <2 x i8>, ptr %x
472 %b = insertelement <2 x i8> poison, i8 %y, i32 0
473 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
474 %d = sext <2 x i8> %a to <2 x i16>
475 %e = sext <2 x i8> %c to <2 x i16>
476 %f = mul <2 x i16> %d, %e
480 define <4 x i16> @vwmul_vx_v4i16(ptr %x, i8 %y) {
481 ; CHECK-LABEL: vwmul_vx_v4i16:
483 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
484 ; CHECK-NEXT: vle8.v v9, (a0)
485 ; CHECK-NEXT: vwmul.vx v8, v9, a1
487 %a = load <4 x i8>, ptr %x
488 %b = insertelement <4 x i8> poison, i8 %y, i32 0
489 %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
490 %d = sext <4 x i8> %a to <4 x i16>
491 %e = sext <4 x i8> %c to <4 x i16>
492 %f = mul <4 x i16> %d, %e
496 define <2 x i32> @vwmul_vx_v2i32(ptr %x, i16 %y) {
497 ; CHECK-LABEL: vwmul_vx_v2i32:
499 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
500 ; CHECK-NEXT: vle16.v v9, (a0)
501 ; CHECK-NEXT: vwmul.vx v8, v9, a1
503 %a = load <2 x i16>, ptr %x
504 %b = insertelement <2 x i16> poison, i16 %y, i32 0
505 %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
506 %d = sext <2 x i16> %a to <2 x i32>
507 %e = sext <2 x i16> %c to <2 x i32>
508 %f = mul <2 x i32> %d, %e
512 define <8 x i16> @vwmul_vx_v8i16(ptr %x, i8 %y) {
513 ; CHECK-LABEL: vwmul_vx_v8i16:
515 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
516 ; CHECK-NEXT: vle8.v v9, (a0)
517 ; CHECK-NEXT: vwmul.vx v8, v9, a1
519 %a = load <8 x i8>, ptr %x
520 %b = insertelement <8 x i8> poison, i8 %y, i32 0
521 %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
522 %d = sext <8 x i8> %a to <8 x i16>
523 %e = sext <8 x i8> %c to <8 x i16>
524 %f = mul <8 x i16> %d, %e
528 define <4 x i32> @vwmul_vx_v4i32(ptr %x, i16 %y) {
529 ; CHECK-LABEL: vwmul_vx_v4i32:
531 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
532 ; CHECK-NEXT: vle16.v v9, (a0)
533 ; CHECK-NEXT: vwmul.vx v8, v9, a1
535 %a = load <4 x i16>, ptr %x
536 %b = insertelement <4 x i16> poison, i16 %y, i32 0
537 %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
538 %d = sext <4 x i16> %a to <4 x i32>
539 %e = sext <4 x i16> %c to <4 x i32>
540 %f = mul <4 x i32> %d, %e
544 define <2 x i64> @vwmul_vx_v2i64(ptr %x, i32 %y) {
545 ; CHECK-LABEL: vwmul_vx_v2i64:
547 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
548 ; CHECK-NEXT: vle32.v v9, (a0)
549 ; CHECK-NEXT: vwmul.vx v8, v9, a1
551 %a = load <2 x i32>, ptr %x
552 %b = insertelement <2 x i32> poison, i32 %y, i64 0
553 %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
554 %d = sext <2 x i32> %a to <2 x i64>
555 %e = sext <2 x i32> %c to <2 x i64>
556 %f = mul <2 x i64> %d, %e
560 define <16 x i16> @vwmul_vx_v16i16(ptr %x, i8 %y) {
561 ; CHECK-LABEL: vwmul_vx_v16i16:
563 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
564 ; CHECK-NEXT: vle8.v v10, (a0)
565 ; CHECK-NEXT: vwmul.vx v8, v10, a1
567 %a = load <16 x i8>, ptr %x
568 %b = insertelement <16 x i8> poison, i8 %y, i32 0
569 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
570 %d = sext <16 x i8> %a to <16 x i16>
571 %e = sext <16 x i8> %c to <16 x i16>
572 %f = mul <16 x i16> %d, %e
576 define <8 x i32> @vwmul_vx_v8i32(ptr %x, i16 %y) {
577 ; CHECK-LABEL: vwmul_vx_v8i32:
579 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
580 ; CHECK-NEXT: vle16.v v10, (a0)
581 ; CHECK-NEXT: vwmul.vx v8, v10, a1
583 %a = load <8 x i16>, ptr %x
584 %b = insertelement <8 x i16> poison, i16 %y, i32 0
585 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
586 %d = sext <8 x i16> %a to <8 x i32>
587 %e = sext <8 x i16> %c to <8 x i32>
588 %f = mul <8 x i32> %d, %e
592 define <4 x i64> @vwmul_vx_v4i64(ptr %x, i32 %y) {
593 ; CHECK-LABEL: vwmul_vx_v4i64:
595 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
596 ; CHECK-NEXT: vle32.v v10, (a0)
597 ; CHECK-NEXT: vwmul.vx v8, v10, a1
599 %a = load <4 x i32>, ptr %x
600 %b = insertelement <4 x i32> poison, i32 %y, i64 0
601 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
602 %d = sext <4 x i32> %a to <4 x i64>
603 %e = sext <4 x i32> %c to <4 x i64>
604 %f = mul <4 x i64> %d, %e
608 define <32 x i16> @vwmul_vx_v32i16(ptr %x, i8 %y) {
609 ; CHECK-LABEL: vwmul_vx_v32i16:
611 ; CHECK-NEXT: li a2, 32
612 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
613 ; CHECK-NEXT: vle8.v v12, (a0)
614 ; CHECK-NEXT: vwmul.vx v8, v12, a1
616 %a = load <32 x i8>, ptr %x
617 %b = insertelement <32 x i8> poison, i8 %y, i32 0
618 %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
619 %d = sext <32 x i8> %a to <32 x i16>
620 %e = sext <32 x i8> %c to <32 x i16>
621 %f = mul <32 x i16> %d, %e
625 define <16 x i32> @vwmul_vx_v16i32(ptr %x, i16 %y) {
626 ; CHECK-LABEL: vwmul_vx_v16i32:
628 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
629 ; CHECK-NEXT: vle16.v v12, (a0)
630 ; CHECK-NEXT: vwmul.vx v8, v12, a1
632 %a = load <16 x i16>, ptr %x
633 %b = insertelement <16 x i16> poison, i16 %y, i32 0
634 %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
635 %d = sext <16 x i16> %a to <16 x i32>
636 %e = sext <16 x i16> %c to <16 x i32>
637 %f = mul <16 x i32> %d, %e
641 define <8 x i64> @vwmul_vx_v8i64(ptr %x, i32 %y) {
642 ; CHECK-LABEL: vwmul_vx_v8i64:
644 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
645 ; CHECK-NEXT: vle32.v v12, (a0)
646 ; CHECK-NEXT: vwmul.vx v8, v12, a1
648 %a = load <8 x i32>, ptr %x
649 %b = insertelement <8 x i32> poison, i32 %y, i64 0
650 %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
651 %d = sext <8 x i32> %a to <8 x i64>
652 %e = sext <8 x i32> %c to <8 x i64>
653 %f = mul <8 x i64> %d, %e
657 define <64 x i16> @vwmul_vx_v64i16(ptr %x, i8 %y) {
658 ; CHECK-LABEL: vwmul_vx_v64i16:
660 ; CHECK-NEXT: li a2, 64
661 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
662 ; CHECK-NEXT: vle8.v v16, (a0)
663 ; CHECK-NEXT: vwmul.vx v8, v16, a1
665 %a = load <64 x i8>, ptr %x
666 %b = insertelement <64 x i8> poison, i8 %y, i32 0
667 %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
668 %d = sext <64 x i8> %a to <64 x i16>
669 %e = sext <64 x i8> %c to <64 x i16>
670 %f = mul <64 x i16> %d, %e
674 define <32 x i32> @vwmul_vx_v32i32(ptr %x, i16 %y) {
675 ; CHECK-LABEL: vwmul_vx_v32i32:
677 ; CHECK-NEXT: li a2, 32
678 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
679 ; CHECK-NEXT: vle16.v v16, (a0)
680 ; CHECK-NEXT: vwmul.vx v8, v16, a1
682 %a = load <32 x i16>, ptr %x
683 %b = insertelement <32 x i16> poison, i16 %y, i32 0
684 %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
685 %d = sext <32 x i16> %a to <32 x i32>
686 %e = sext <32 x i16> %c to <32 x i32>
687 %f = mul <32 x i32> %d, %e
691 define <16 x i64> @vwmul_vx_v16i64(ptr %x, i32 %y) {
692 ; CHECK-LABEL: vwmul_vx_v16i64:
694 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
695 ; CHECK-NEXT: vle32.v v16, (a0)
696 ; CHECK-NEXT: vwmul.vx v8, v16, a1
698 %a = load <16 x i32>, ptr %x
699 %b = insertelement <16 x i32> poison, i32 %y, i64 0
700 %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
701 %d = sext <16 x i32> %a to <16 x i64>
702 %e = sext <16 x i32> %c to <16 x i64>
703 %f = mul <16 x i64> %d, %e
707 define <8 x i16> @vwmul_vx_v8i16_i8(ptr %x, ptr %y) {
708 ; CHECK-LABEL: vwmul_vx_v8i16_i8:
710 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
711 ; CHECK-NEXT: vle8.v v9, (a0)
712 ; CHECK-NEXT: lb a0, 0(a1)
713 ; CHECK-NEXT: vwmul.vx v8, v9, a0
715 %a = load <8 x i8>, ptr %x
717 %c = sext i8 %b to i16
718 %d = insertelement <8 x i16> poison, i16 %c, i32 0
719 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
720 %f = sext <8 x i8> %a to <8 x i16>
721 %g = mul <8 x i16> %e, %f
725 define <8 x i16> @vwmul_vx_v8i16_i16(ptr %x, ptr %y) {
726 ; CHECK-LABEL: vwmul_vx_v8i16_i16:
728 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
729 ; CHECK-NEXT: vle8.v v8, (a0)
730 ; CHECK-NEXT: lh a0, 0(a1)
731 ; CHECK-NEXT: vsext.vf2 v9, v8
732 ; CHECK-NEXT: vmul.vx v8, v9, a0
734 %a = load <8 x i8>, ptr %x
735 %b = load i16, ptr %y
736 %d = insertelement <8 x i16> poison, i16 %b, i32 0
737 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
738 %f = sext <8 x i8> %a to <8 x i16>
739 %g = mul <8 x i16> %e, %f
743 define <4 x i32> @vwmul_vx_v4i32_i8(ptr %x, ptr %y) {
744 ; CHECK-LABEL: vwmul_vx_v4i32_i8:
746 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
747 ; CHECK-NEXT: vle16.v v9, (a0)
748 ; CHECK-NEXT: lb a0, 0(a1)
749 ; CHECK-NEXT: vwmul.vx v8, v9, a0
751 %a = load <4 x i16>, ptr %x
753 %c = sext i8 %b to i32
754 %d = insertelement <4 x i32> poison, i32 %c, i32 0
755 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
756 %f = sext <4 x i16> %a to <4 x i32>
757 %g = mul <4 x i32> %e, %f
761 define <4 x i32> @vwmul_vx_v4i32_i16(ptr %x, ptr %y) {
762 ; CHECK-LABEL: vwmul_vx_v4i32_i16:
764 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
765 ; CHECK-NEXT: vle16.v v9, (a0)
766 ; CHECK-NEXT: lh a0, 0(a1)
767 ; CHECK-NEXT: vwmul.vx v8, v9, a0
769 %a = load <4 x i16>, ptr %x
770 %b = load i16, ptr %y
771 %c = sext i16 %b to i32
772 %d = insertelement <4 x i32> poison, i32 %c, i32 0
773 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
774 %f = sext <4 x i16> %a to <4 x i32>
775 %g = mul <4 x i32> %e, %f
779 define <4 x i32> @vwmul_vx_v4i32_i32(ptr %x, ptr %y) {
780 ; CHECK-LABEL: vwmul_vx_v4i32_i32:
782 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
783 ; CHECK-NEXT: vle16.v v8, (a0)
784 ; CHECK-NEXT: lw a0, 0(a1)
785 ; CHECK-NEXT: vsext.vf2 v9, v8
786 ; CHECK-NEXT: vmul.vx v8, v9, a0
788 %a = load <4 x i16>, ptr %x
789 %b = load i32, ptr %y
790 %d = insertelement <4 x i32> poison, i32 %b, i32 0
791 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
792 %f = sext <4 x i16> %a to <4 x i32>
793 %g = mul <4 x i32> %e, %f
797 define <2 x i64> @vwmul_vx_v2i64_i8(ptr %x, ptr %y) {
798 ; CHECK-LABEL: vwmul_vx_v2i64_i8:
800 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
801 ; CHECK-NEXT: vle32.v v9, (a0)
802 ; CHECK-NEXT: lb a0, 0(a1)
803 ; CHECK-NEXT: vwmul.vx v8, v9, a0
805 %a = load <2 x i32>, ptr %x
807 %c = sext i8 %b to i64
808 %d = insertelement <2 x i64> poison, i64 %c, i64 0
809 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
810 %f = sext <2 x i32> %a to <2 x i64>
811 %g = mul <2 x i64> %e, %f
815 define <2 x i64> @vwmul_vx_v2i64_i16(ptr %x, ptr %y) {
816 ; CHECK-LABEL: vwmul_vx_v2i64_i16:
818 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
819 ; CHECK-NEXT: vle32.v v9, (a0)
820 ; CHECK-NEXT: lh a0, 0(a1)
821 ; CHECK-NEXT: vwmul.vx v8, v9, a0
823 %a = load <2 x i32>, ptr %x
824 %b = load i16, ptr %y
825 %c = sext i16 %b to i64
826 %d = insertelement <2 x i64> poison, i64 %c, i64 0
827 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
828 %f = sext <2 x i32> %a to <2 x i64>
829 %g = mul <2 x i64> %e, %f
833 define <2 x i64> @vwmul_vx_v2i64_i32(ptr %x, ptr %y) {
834 ; CHECK-LABEL: vwmul_vx_v2i64_i32:
836 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
837 ; CHECK-NEXT: vle32.v v9, (a0)
838 ; CHECK-NEXT: lw a0, 0(a1)
839 ; CHECK-NEXT: vwmul.vx v8, v9, a0
841 %a = load <2 x i32>, ptr %x
842 %b = load i32, ptr %y
843 %c = sext i32 %b to i64
844 %d = insertelement <2 x i64> poison, i64 %c, i64 0
845 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
846 %f = sext <2 x i32> %a to <2 x i64>
847 %g = mul <2 x i64> %e, %f
851 define <2 x i64> @vwmul_vx_v2i64_i64(ptr %x, ptr %y) {
852 ; RV32-LABEL: vwmul_vx_v2i64_i64:
854 ; RV32-NEXT: addi sp, sp, -16
855 ; RV32-NEXT: .cfi_def_cfa_offset 16
856 ; RV32-NEXT: lw a2, 4(a1)
857 ; RV32-NEXT: lw a1, 0(a1)
858 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
859 ; RV32-NEXT: vle32.v v8, (a0)
860 ; RV32-NEXT: sw a2, 12(sp)
861 ; RV32-NEXT: sw a1, 8(sp)
862 ; RV32-NEXT: addi a0, sp, 8
863 ; RV32-NEXT: vlse64.v v9, (a0), zero
864 ; RV32-NEXT: vsext.vf2 v10, v8
865 ; RV32-NEXT: vmul.vv v8, v9, v10
866 ; RV32-NEXT: addi sp, sp, 16
869 ; RV64-LABEL: vwmul_vx_v2i64_i64:
871 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
872 ; RV64-NEXT: vle32.v v8, (a0)
873 ; RV64-NEXT: ld a0, 0(a1)
874 ; RV64-NEXT: vsext.vf2 v9, v8
875 ; RV64-NEXT: vmul.vx v8, v9, a0
877 %a = load <2 x i32>, ptr %x
878 %b = load i64, ptr %y
879 %d = insertelement <2 x i64> poison, i64 %b, i64 0
880 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
881 %f = sext <2 x i32> %a to <2 x i64>
882 %g = mul <2 x i64> %e, %f
886 define <2 x i16> @vwmul_v2i16_multiuse(ptr %x, ptr %y, ptr %z, ptr %w) {
887 ; CHECK-LABEL: vwmul_v2i16_multiuse:
889 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
890 ; CHECK-NEXT: vle8.v v8, (a0)
891 ; CHECK-NEXT: vle8.v v9, (a1)
892 ; CHECK-NEXT: vle8.v v10, (a2)
893 ; CHECK-NEXT: vle8.v v11, (a3)
894 ; CHECK-NEXT: vsext.vf2 v12, v8
895 ; CHECK-NEXT: vsext.vf2 v8, v9
896 ; CHECK-NEXT: vsext.vf2 v9, v10
897 ; CHECK-NEXT: vsext.vf2 v10, v11
898 ; CHECK-NEXT: vmul.vv v11, v12, v10
899 ; CHECK-NEXT: vmul.vv v10, v8, v10
900 ; CHECK-NEXT: vdivu.vv v8, v8, v9
901 ; CHECK-NEXT: vor.vv v9, v11, v10
902 ; CHECK-NEXT: vor.vv v8, v9, v8
904 %a = load <2 x i8>, ptr %x
905 %b = load <2 x i8>, ptr %y
906 %c = load <2 x i8>, ptr %z
907 %d = load <2 x i8>, ptr %w
909 %as = sext <2 x i8> %a to <2 x i16>
910 %bs = sext <2 x i8> %b to <2 x i16>
911 %cs = sext <2 x i8> %c to <2 x i16>
912 %ds = sext <2 x i8> %d to <2 x i16>
914 %e = mul <2 x i16> %as, %ds
915 %f = mul <2 x i16> %bs, %ds ; shares 1 use with %e
916 %g = udiv <2 x i16> %bs, %cs ; shares 1 use with %f, and no uses with %e
918 %h = or <2 x i16> %e, %f
919 %i = or <2 x i16> %h, %g