1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define <2 x i16> @vwmul_v2i16(ptr %x, ptr %y) {
6 ; CHECK-LABEL: vwmul_v2i16:
8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
9 ; CHECK-NEXT: vle8.v v9, (a0)
10 ; CHECK-NEXT: vle8.v v10, (a1)
11 ; CHECK-NEXT: vwmul.vv v8, v9, v10
13 %a = load <2 x i8>, ptr %x
14 %b = load <2 x i8>, ptr %y
15 %c = sext <2 x i8> %a to <2 x i16>
16 %d = sext <2 x i8> %b to <2 x i16>
17 %e = mul <2 x i16> %c, %d
21 define <2 x i16> @vwmul_v2i16_multiple_users(ptr %x, ptr %y, ptr %z) {
22 ; CHECK-LABEL: vwmul_v2i16_multiple_users:
24 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
25 ; CHECK-NEXT: vle8.v v8, (a0)
26 ; CHECK-NEXT: vle8.v v9, (a1)
27 ; CHECK-NEXT: vle8.v v10, (a2)
28 ; CHECK-NEXT: vwmul.vv v11, v8, v9
29 ; CHECK-NEXT: vwmul.vv v9, v8, v10
30 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
31 ; CHECK-NEXT: vor.vv v8, v11, v9
33 %a = load <2 x i8>, ptr %x
34 %b = load <2 x i8>, ptr %y
35 %b2 = load <2 x i8>, ptr %z
36 %c = sext <2 x i8> %a to <2 x i16>
37 %d = sext <2 x i8> %b to <2 x i16>
38 %d2 = sext <2 x i8> %b2 to <2 x i16>
39 %e = mul <2 x i16> %c, %d
40 %f = mul <2 x i16> %c, %d2
41 %g = or <2 x i16> %e, %f
45 define <4 x i16> @vwmul_v4i16(ptr %x, ptr %y) {
46 ; CHECK-LABEL: vwmul_v4i16:
48 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
49 ; CHECK-NEXT: vle8.v v9, (a0)
50 ; CHECK-NEXT: vle8.v v10, (a1)
51 ; CHECK-NEXT: vwmul.vv v8, v9, v10
53 %a = load <4 x i8>, ptr %x
54 %b = load <4 x i8>, ptr %y
55 %c = sext <4 x i8> %a to <4 x i16>
56 %d = sext <4 x i8> %b to <4 x i16>
57 %e = mul <4 x i16> %c, %d
61 define <2 x i32> @vwmul_v2i32(ptr %x, ptr %y) {
62 ; CHECK-LABEL: vwmul_v2i32:
64 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
65 ; CHECK-NEXT: vle16.v v9, (a0)
66 ; CHECK-NEXT: vle16.v v10, (a1)
67 ; CHECK-NEXT: vwmul.vv v8, v9, v10
69 %a = load <2 x i16>, ptr %x
70 %b = load <2 x i16>, ptr %y
71 %c = sext <2 x i16> %a to <2 x i32>
72 %d = sext <2 x i16> %b to <2 x i32>
73 %e = mul <2 x i32> %c, %d
77 define <8 x i16> @vwmul_v8i16(ptr %x, ptr %y) {
78 ; CHECK-LABEL: vwmul_v8i16:
80 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
81 ; CHECK-NEXT: vle8.v v9, (a0)
82 ; CHECK-NEXT: vle8.v v10, (a1)
83 ; CHECK-NEXT: vwmul.vv v8, v9, v10
85 %a = load <8 x i8>, ptr %x
86 %b = load <8 x i8>, ptr %y
87 %c = sext <8 x i8> %a to <8 x i16>
88 %d = sext <8 x i8> %b to <8 x i16>
89 %e = mul <8 x i16> %c, %d
93 define <4 x i32> @vwmul_v4i32(ptr %x, ptr %y) {
94 ; CHECK-LABEL: vwmul_v4i32:
96 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
97 ; CHECK-NEXT: vle16.v v9, (a0)
98 ; CHECK-NEXT: vle16.v v10, (a1)
99 ; CHECK-NEXT: vwmul.vv v8, v9, v10
101 %a = load <4 x i16>, ptr %x
102 %b = load <4 x i16>, ptr %y
103 %c = sext <4 x i16> %a to <4 x i32>
104 %d = sext <4 x i16> %b to <4 x i32>
105 %e = mul <4 x i32> %c, %d
109 define <2 x i64> @vwmul_v2i64(ptr %x, ptr %y) {
110 ; CHECK-LABEL: vwmul_v2i64:
112 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
113 ; CHECK-NEXT: vle32.v v9, (a0)
114 ; CHECK-NEXT: vle32.v v10, (a1)
115 ; CHECK-NEXT: vwmul.vv v8, v9, v10
117 %a = load <2 x i32>, ptr %x
118 %b = load <2 x i32>, ptr %y
119 %c = sext <2 x i32> %a to <2 x i64>
120 %d = sext <2 x i32> %b to <2 x i64>
121 %e = mul <2 x i64> %c, %d
125 define <16 x i16> @vwmul_v16i16(ptr %x, ptr %y) {
126 ; CHECK-LABEL: vwmul_v16i16:
128 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
129 ; CHECK-NEXT: vle8.v v10, (a0)
130 ; CHECK-NEXT: vle8.v v11, (a1)
131 ; CHECK-NEXT: vwmul.vv v8, v10, v11
133 %a = load <16 x i8>, ptr %x
134 %b = load <16 x i8>, ptr %y
135 %c = sext <16 x i8> %a to <16 x i16>
136 %d = sext <16 x i8> %b to <16 x i16>
137 %e = mul <16 x i16> %c, %d
141 define <8 x i32> @vwmul_v8i32(ptr %x, ptr %y) {
142 ; CHECK-LABEL: vwmul_v8i32:
144 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
145 ; CHECK-NEXT: vle16.v v10, (a0)
146 ; CHECK-NEXT: vle16.v v11, (a1)
147 ; CHECK-NEXT: vwmul.vv v8, v10, v11
149 %a = load <8 x i16>, ptr %x
150 %b = load <8 x i16>, ptr %y
151 %c = sext <8 x i16> %a to <8 x i32>
152 %d = sext <8 x i16> %b to <8 x i32>
153 %e = mul <8 x i32> %c, %d
157 define <4 x i64> @vwmul_v4i64(ptr %x, ptr %y) {
158 ; CHECK-LABEL: vwmul_v4i64:
160 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
161 ; CHECK-NEXT: vle32.v v10, (a0)
162 ; CHECK-NEXT: vle32.v v11, (a1)
163 ; CHECK-NEXT: vwmul.vv v8, v10, v11
165 %a = load <4 x i32>, ptr %x
166 %b = load <4 x i32>, ptr %y
167 %c = sext <4 x i32> %a to <4 x i64>
168 %d = sext <4 x i32> %b to <4 x i64>
169 %e = mul <4 x i64> %c, %d
173 define <32 x i16> @vwmul_v32i16(ptr %x, ptr %y) {
174 ; CHECK-LABEL: vwmul_v32i16:
176 ; CHECK-NEXT: li a2, 32
177 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
178 ; CHECK-NEXT: vle8.v v12, (a0)
179 ; CHECK-NEXT: vle8.v v14, (a1)
180 ; CHECK-NEXT: vwmul.vv v8, v12, v14
182 %a = load <32 x i8>, ptr %x
183 %b = load <32 x i8>, ptr %y
184 %c = sext <32 x i8> %a to <32 x i16>
185 %d = sext <32 x i8> %b to <32 x i16>
186 %e = mul <32 x i16> %c, %d
190 define <16 x i32> @vwmul_v16i32(ptr %x, ptr %y) {
191 ; CHECK-LABEL: vwmul_v16i32:
193 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
194 ; CHECK-NEXT: vle16.v v12, (a0)
195 ; CHECK-NEXT: vle16.v v14, (a1)
196 ; CHECK-NEXT: vwmul.vv v8, v12, v14
198 %a = load <16 x i16>, ptr %x
199 %b = load <16 x i16>, ptr %y
200 %c = sext <16 x i16> %a to <16 x i32>
201 %d = sext <16 x i16> %b to <16 x i32>
202 %e = mul <16 x i32> %c, %d
206 define <8 x i64> @vwmul_v8i64(ptr %x, ptr %y) {
207 ; CHECK-LABEL: vwmul_v8i64:
209 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
210 ; CHECK-NEXT: vle32.v v12, (a0)
211 ; CHECK-NEXT: vle32.v v14, (a1)
212 ; CHECK-NEXT: vwmul.vv v8, v12, v14
214 %a = load <8 x i32>, ptr %x
215 %b = load <8 x i32>, ptr %y
216 %c = sext <8 x i32> %a to <8 x i64>
217 %d = sext <8 x i32> %b to <8 x i64>
218 %e = mul <8 x i64> %c, %d
222 define <64 x i16> @vwmul_v64i16(ptr %x, ptr %y) {
223 ; CHECK-LABEL: vwmul_v64i16:
225 ; CHECK-NEXT: li a2, 64
226 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
227 ; CHECK-NEXT: vle8.v v16, (a0)
228 ; CHECK-NEXT: vle8.v v20, (a1)
229 ; CHECK-NEXT: vwmul.vv v8, v16, v20
231 %a = load <64 x i8>, ptr %x
232 %b = load <64 x i8>, ptr %y
233 %c = sext <64 x i8> %a to <64 x i16>
234 %d = sext <64 x i8> %b to <64 x i16>
235 %e = mul <64 x i16> %c, %d
239 define <32 x i32> @vwmul_v32i32(ptr %x, ptr %y) {
240 ; CHECK-LABEL: vwmul_v32i32:
242 ; CHECK-NEXT: li a2, 32
243 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
244 ; CHECK-NEXT: vle16.v v16, (a0)
245 ; CHECK-NEXT: vle16.v v20, (a1)
246 ; CHECK-NEXT: vwmul.vv v8, v16, v20
248 %a = load <32 x i16>, ptr %x
249 %b = load <32 x i16>, ptr %y
250 %c = sext <32 x i16> %a to <32 x i32>
251 %d = sext <32 x i16> %b to <32 x i32>
252 %e = mul <32 x i32> %c, %d
256 define <16 x i64> @vwmul_v16i64(ptr %x, ptr %y) {
257 ; CHECK-LABEL: vwmul_v16i64:
259 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
260 ; CHECK-NEXT: vle32.v v16, (a0)
261 ; CHECK-NEXT: vle32.v v20, (a1)
262 ; CHECK-NEXT: vwmul.vv v8, v16, v20
264 %a = load <16 x i32>, ptr %x
265 %b = load <16 x i32>, ptr %y
266 %c = sext <16 x i32> %a to <16 x i64>
267 %d = sext <16 x i32> %b to <16 x i64>
268 %e = mul <16 x i64> %c, %d
272 define <128 x i16> @vwmul_v128i16(ptr %x, ptr %y) {
273 ; CHECK-LABEL: vwmul_v128i16:
275 ; CHECK-NEXT: addi sp, sp, -16
276 ; CHECK-NEXT: .cfi_def_cfa_offset 16
277 ; CHECK-NEXT: csrr a2, vlenb
278 ; CHECK-NEXT: slli a2, a2, 4
279 ; CHECK-NEXT: sub sp, sp, a2
280 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
281 ; CHECK-NEXT: li a2, 128
282 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
283 ; CHECK-NEXT: vle8.v v8, (a0)
284 ; CHECK-NEXT: addi a0, sp, 16
285 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
286 ; CHECK-NEXT: vle8.v v0, (a1)
287 ; CHECK-NEXT: li a0, 64
288 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
289 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
290 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
291 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
292 ; CHECK-NEXT: vwmul.vv v24, v16, v8
293 ; CHECK-NEXT: csrr a0, vlenb
294 ; CHECK-NEXT: slli a0, a0, 3
295 ; CHECK-NEXT: add a0, sp, a0
296 ; CHECK-NEXT: addi a0, a0, 16
297 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
298 ; CHECK-NEXT: addi a0, sp, 16
299 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
300 ; CHECK-NEXT: vwmul.vv v8, v16, v0
301 ; CHECK-NEXT: csrr a0, vlenb
302 ; CHECK-NEXT: slli a0, a0, 3
303 ; CHECK-NEXT: add a0, sp, a0
304 ; CHECK-NEXT: addi a0, a0, 16
305 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
306 ; CHECK-NEXT: csrr a0, vlenb
307 ; CHECK-NEXT: slli a0, a0, 4
308 ; CHECK-NEXT: add sp, sp, a0
309 ; CHECK-NEXT: .cfi_def_cfa sp, 16
310 ; CHECK-NEXT: addi sp, sp, 16
311 ; CHECK-NEXT: .cfi_def_cfa_offset 0
313 %a = load <128 x i8>, ptr %x
314 %b = load <128 x i8>, ptr %y
315 %c = sext <128 x i8> %a to <128 x i16>
316 %d = sext <128 x i8> %b to <128 x i16>
317 %e = mul <128 x i16> %c, %d
321 define <64 x i32> @vwmul_v64i32(ptr %x, ptr %y) {
322 ; CHECK-LABEL: vwmul_v64i32:
324 ; CHECK-NEXT: addi sp, sp, -16
325 ; CHECK-NEXT: .cfi_def_cfa_offset 16
326 ; CHECK-NEXT: csrr a2, vlenb
327 ; CHECK-NEXT: slli a2, a2, 4
328 ; CHECK-NEXT: sub sp, sp, a2
329 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
330 ; CHECK-NEXT: li a2, 64
331 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
332 ; CHECK-NEXT: vle16.v v8, (a0)
333 ; CHECK-NEXT: addi a0, sp, 16
334 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
335 ; CHECK-NEXT: vle16.v v0, (a1)
336 ; CHECK-NEXT: li a0, 32
337 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
338 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
339 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
340 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
341 ; CHECK-NEXT: vwmul.vv v24, v16, v8
342 ; CHECK-NEXT: csrr a0, vlenb
343 ; CHECK-NEXT: slli a0, a0, 3
344 ; CHECK-NEXT: add a0, sp, a0
345 ; CHECK-NEXT: addi a0, a0, 16
346 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
347 ; CHECK-NEXT: addi a0, sp, 16
348 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
349 ; CHECK-NEXT: vwmul.vv v8, v16, v0
350 ; CHECK-NEXT: csrr a0, vlenb
351 ; CHECK-NEXT: slli a0, a0, 3
352 ; CHECK-NEXT: add a0, sp, a0
353 ; CHECK-NEXT: addi a0, a0, 16
354 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
355 ; CHECK-NEXT: csrr a0, vlenb
356 ; CHECK-NEXT: slli a0, a0, 4
357 ; CHECK-NEXT: add sp, sp, a0
358 ; CHECK-NEXT: .cfi_def_cfa sp, 16
359 ; CHECK-NEXT: addi sp, sp, 16
360 ; CHECK-NEXT: .cfi_def_cfa_offset 0
362 %a = load <64 x i16>, ptr %x
363 %b = load <64 x i16>, ptr %y
364 %c = sext <64 x i16> %a to <64 x i32>
365 %d = sext <64 x i16> %b to <64 x i32>
366 %e = mul <64 x i32> %c, %d
370 define <32 x i64> @vwmul_v32i64(ptr %x, ptr %y) {
371 ; CHECK-LABEL: vwmul_v32i64:
373 ; CHECK-NEXT: addi sp, sp, -16
374 ; CHECK-NEXT: .cfi_def_cfa_offset 16
375 ; CHECK-NEXT: csrr a2, vlenb
376 ; CHECK-NEXT: slli a2, a2, 4
377 ; CHECK-NEXT: sub sp, sp, a2
378 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
379 ; CHECK-NEXT: li a2, 32
380 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
381 ; CHECK-NEXT: vle32.v v8, (a0)
382 ; CHECK-NEXT: addi a0, sp, 16
383 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
384 ; CHECK-NEXT: vle32.v v0, (a1)
385 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
386 ; CHECK-NEXT: vslidedown.vi v16, v8, 16
387 ; CHECK-NEXT: vslidedown.vi v8, v0, 16
388 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
389 ; CHECK-NEXT: vwmul.vv v24, v16, v8
390 ; CHECK-NEXT: csrr a0, vlenb
391 ; CHECK-NEXT: slli a0, a0, 3
392 ; CHECK-NEXT: add a0, sp, a0
393 ; CHECK-NEXT: addi a0, a0, 16
394 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
395 ; CHECK-NEXT: addi a0, sp, 16
396 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
397 ; CHECK-NEXT: vwmul.vv v8, v16, v0
398 ; CHECK-NEXT: csrr a0, vlenb
399 ; CHECK-NEXT: slli a0, a0, 3
400 ; CHECK-NEXT: add a0, sp, a0
401 ; CHECK-NEXT: addi a0, a0, 16
402 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
403 ; CHECK-NEXT: csrr a0, vlenb
404 ; CHECK-NEXT: slli a0, a0, 4
405 ; CHECK-NEXT: add sp, sp, a0
406 ; CHECK-NEXT: .cfi_def_cfa sp, 16
407 ; CHECK-NEXT: addi sp, sp, 16
408 ; CHECK-NEXT: .cfi_def_cfa_offset 0
410 %a = load <32 x i32>, ptr %x
411 %b = load <32 x i32>, ptr %y
412 %c = sext <32 x i32> %a to <32 x i64>
413 %d = sext <32 x i32> %b to <32 x i64>
414 %e = mul <32 x i64> %c, %d
418 define <2 x i32> @vwmul_v2i32_v2i8(ptr %x, ptr %y) {
419 ; CHECK-LABEL: vwmul_v2i32_v2i8:
421 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
422 ; CHECK-NEXT: vle8.v v8, (a0)
423 ; CHECK-NEXT: vle8.v v9, (a1)
424 ; CHECK-NEXT: vsext.vf2 v10, v8
425 ; CHECK-NEXT: vsext.vf2 v11, v9
426 ; CHECK-NEXT: vwmul.vv v8, v10, v11
428 %a = load <2 x i8>, ptr %x
429 %b = load <2 x i8>, ptr %y
430 %c = sext <2 x i8> %a to <2 x i32>
431 %d = sext <2 x i8> %b to <2 x i32>
432 %e = mul <2 x i32> %c, %d
436 define <4 x i32> @vwmul_v4i32_v4i8_v4i16(ptr %x, ptr %y) {
437 ; CHECK-LABEL: vwmul_v4i32_v4i8_v4i16:
439 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
440 ; CHECK-NEXT: vle8.v v8, (a0)
441 ; CHECK-NEXT: vle16.v v9, (a1)
442 ; CHECK-NEXT: vsext.vf2 v10, v8
443 ; CHECK-NEXT: vwmul.vv v8, v10, v9
445 %a = load <4 x i8>, ptr %x
446 %b = load <4 x i16>, ptr %y
447 %c = sext <4 x i8> %a to <4 x i32>
448 %d = sext <4 x i16> %b to <4 x i32>
449 %e = mul <4 x i32> %c, %d
453 define <4 x i64> @vwmul_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
454 ; CHECK-LABEL: vwmul_v4i64_v4i32_v4i8:
456 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
457 ; CHECK-NEXT: vle8.v v8, (a1)
458 ; CHECK-NEXT: vle32.v v10, (a0)
459 ; CHECK-NEXT: vsext.vf4 v11, v8
460 ; CHECK-NEXT: vwmul.vv v8, v10, v11
462 %a = load <4 x i32>, ptr %x
463 %b = load <4 x i8>, ptr %y
464 %c = sext <4 x i32> %a to <4 x i64>
465 %d = sext <4 x i8> %b to <4 x i64>
466 %e = mul <4 x i64> %c, %d
470 define <2 x i16> @vwmul_vx_v2i16(ptr %x, i8 %y) {
471 ; CHECK-LABEL: vwmul_vx_v2i16:
473 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
474 ; CHECK-NEXT: vle8.v v9, (a0)
475 ; CHECK-NEXT: vwmul.vx v8, v9, a1
477 %a = load <2 x i8>, ptr %x
478 %b = insertelement <2 x i8> poison, i8 %y, i32 0
479 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
480 %d = sext <2 x i8> %a to <2 x i16>
481 %e = sext <2 x i8> %c to <2 x i16>
482 %f = mul <2 x i16> %d, %e
486 define <4 x i16> @vwmul_vx_v4i16(ptr %x, i8 %y) {
487 ; CHECK-LABEL: vwmul_vx_v4i16:
489 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
490 ; CHECK-NEXT: vle8.v v9, (a0)
491 ; CHECK-NEXT: vwmul.vx v8, v9, a1
493 %a = load <4 x i8>, ptr %x
494 %b = insertelement <4 x i8> poison, i8 %y, i32 0
495 %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
496 %d = sext <4 x i8> %a to <4 x i16>
497 %e = sext <4 x i8> %c to <4 x i16>
498 %f = mul <4 x i16> %d, %e
502 define <2 x i32> @vwmul_vx_v2i32(ptr %x, i16 %y) {
503 ; CHECK-LABEL: vwmul_vx_v2i32:
505 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
506 ; CHECK-NEXT: vle16.v v9, (a0)
507 ; CHECK-NEXT: vwmul.vx v8, v9, a1
509 %a = load <2 x i16>, ptr %x
510 %b = insertelement <2 x i16> poison, i16 %y, i32 0
511 %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
512 %d = sext <2 x i16> %a to <2 x i32>
513 %e = sext <2 x i16> %c to <2 x i32>
514 %f = mul <2 x i32> %d, %e
518 define <8 x i16> @vwmul_vx_v8i16(ptr %x, i8 %y) {
519 ; CHECK-LABEL: vwmul_vx_v8i16:
521 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
522 ; CHECK-NEXT: vle8.v v9, (a0)
523 ; CHECK-NEXT: vwmul.vx v8, v9, a1
525 %a = load <8 x i8>, ptr %x
526 %b = insertelement <8 x i8> poison, i8 %y, i32 0
527 %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
528 %d = sext <8 x i8> %a to <8 x i16>
529 %e = sext <8 x i8> %c to <8 x i16>
530 %f = mul <8 x i16> %d, %e
534 define <4 x i32> @vwmul_vx_v4i32(ptr %x, i16 %y) {
535 ; CHECK-LABEL: vwmul_vx_v4i32:
537 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
538 ; CHECK-NEXT: vle16.v v9, (a0)
539 ; CHECK-NEXT: vwmul.vx v8, v9, a1
541 %a = load <4 x i16>, ptr %x
542 %b = insertelement <4 x i16> poison, i16 %y, i32 0
543 %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
544 %d = sext <4 x i16> %a to <4 x i32>
545 %e = sext <4 x i16> %c to <4 x i32>
546 %f = mul <4 x i32> %d, %e
550 define <2 x i64> @vwmul_vx_v2i64(ptr %x, i32 %y) {
551 ; CHECK-LABEL: vwmul_vx_v2i64:
553 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
554 ; CHECK-NEXT: vle32.v v9, (a0)
555 ; CHECK-NEXT: vwmul.vx v8, v9, a1
557 %a = load <2 x i32>, ptr %x
558 %b = insertelement <2 x i32> poison, i32 %y, i64 0
559 %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
560 %d = sext <2 x i32> %a to <2 x i64>
561 %e = sext <2 x i32> %c to <2 x i64>
562 %f = mul <2 x i64> %d, %e
566 define <16 x i16> @vwmul_vx_v16i16(ptr %x, i8 %y) {
567 ; CHECK-LABEL: vwmul_vx_v16i16:
569 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
570 ; CHECK-NEXT: vle8.v v10, (a0)
571 ; CHECK-NEXT: vwmul.vx v8, v10, a1
573 %a = load <16 x i8>, ptr %x
574 %b = insertelement <16 x i8> poison, i8 %y, i32 0
575 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
576 %d = sext <16 x i8> %a to <16 x i16>
577 %e = sext <16 x i8> %c to <16 x i16>
578 %f = mul <16 x i16> %d, %e
582 define <8 x i32> @vwmul_vx_v8i32(ptr %x, i16 %y) {
583 ; CHECK-LABEL: vwmul_vx_v8i32:
585 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
586 ; CHECK-NEXT: vle16.v v10, (a0)
587 ; CHECK-NEXT: vwmul.vx v8, v10, a1
589 %a = load <8 x i16>, ptr %x
590 %b = insertelement <8 x i16> poison, i16 %y, i32 0
591 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
592 %d = sext <8 x i16> %a to <8 x i32>
593 %e = sext <8 x i16> %c to <8 x i32>
594 %f = mul <8 x i32> %d, %e
598 define <4 x i64> @vwmul_vx_v4i64(ptr %x, i32 %y) {
599 ; CHECK-LABEL: vwmul_vx_v4i64:
601 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
602 ; CHECK-NEXT: vle32.v v10, (a0)
603 ; CHECK-NEXT: vwmul.vx v8, v10, a1
605 %a = load <4 x i32>, ptr %x
606 %b = insertelement <4 x i32> poison, i32 %y, i64 0
607 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
608 %d = sext <4 x i32> %a to <4 x i64>
609 %e = sext <4 x i32> %c to <4 x i64>
610 %f = mul <4 x i64> %d, %e
614 define <32 x i16> @vwmul_vx_v32i16(ptr %x, i8 %y) {
615 ; CHECK-LABEL: vwmul_vx_v32i16:
617 ; CHECK-NEXT: li a2, 32
618 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
619 ; CHECK-NEXT: vle8.v v12, (a0)
620 ; CHECK-NEXT: vwmul.vx v8, v12, a1
622 %a = load <32 x i8>, ptr %x
623 %b = insertelement <32 x i8> poison, i8 %y, i32 0
624 %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
625 %d = sext <32 x i8> %a to <32 x i16>
626 %e = sext <32 x i8> %c to <32 x i16>
627 %f = mul <32 x i16> %d, %e
631 define <16 x i32> @vwmul_vx_v16i32(ptr %x, i16 %y) {
632 ; CHECK-LABEL: vwmul_vx_v16i32:
634 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
635 ; CHECK-NEXT: vle16.v v12, (a0)
636 ; CHECK-NEXT: vwmul.vx v8, v12, a1
638 %a = load <16 x i16>, ptr %x
639 %b = insertelement <16 x i16> poison, i16 %y, i32 0
640 %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
641 %d = sext <16 x i16> %a to <16 x i32>
642 %e = sext <16 x i16> %c to <16 x i32>
643 %f = mul <16 x i32> %d, %e
647 define <8 x i64> @vwmul_vx_v8i64(ptr %x, i32 %y) {
648 ; CHECK-LABEL: vwmul_vx_v8i64:
650 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
651 ; CHECK-NEXT: vle32.v v12, (a0)
652 ; CHECK-NEXT: vwmul.vx v8, v12, a1
654 %a = load <8 x i32>, ptr %x
655 %b = insertelement <8 x i32> poison, i32 %y, i64 0
656 %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
657 %d = sext <8 x i32> %a to <8 x i64>
658 %e = sext <8 x i32> %c to <8 x i64>
659 %f = mul <8 x i64> %d, %e
663 define <64 x i16> @vwmul_vx_v64i16(ptr %x, i8 %y) {
664 ; CHECK-LABEL: vwmul_vx_v64i16:
666 ; CHECK-NEXT: li a2, 64
667 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
668 ; CHECK-NEXT: vle8.v v16, (a0)
669 ; CHECK-NEXT: vwmul.vx v8, v16, a1
671 %a = load <64 x i8>, ptr %x
672 %b = insertelement <64 x i8> poison, i8 %y, i32 0
673 %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
674 %d = sext <64 x i8> %a to <64 x i16>
675 %e = sext <64 x i8> %c to <64 x i16>
676 %f = mul <64 x i16> %d, %e
680 define <32 x i32> @vwmul_vx_v32i32(ptr %x, i16 %y) {
681 ; CHECK-LABEL: vwmul_vx_v32i32:
683 ; CHECK-NEXT: li a2, 32
684 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
685 ; CHECK-NEXT: vle16.v v16, (a0)
686 ; CHECK-NEXT: vwmul.vx v8, v16, a1
688 %a = load <32 x i16>, ptr %x
689 %b = insertelement <32 x i16> poison, i16 %y, i32 0
690 %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
691 %d = sext <32 x i16> %a to <32 x i32>
692 %e = sext <32 x i16> %c to <32 x i32>
693 %f = mul <32 x i32> %d, %e
697 define <16 x i64> @vwmul_vx_v16i64(ptr %x, i32 %y) {
698 ; CHECK-LABEL: vwmul_vx_v16i64:
700 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
701 ; CHECK-NEXT: vle32.v v16, (a0)
702 ; CHECK-NEXT: vwmul.vx v8, v16, a1
704 %a = load <16 x i32>, ptr %x
705 %b = insertelement <16 x i32> poison, i32 %y, i64 0
706 %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
707 %d = sext <16 x i32> %a to <16 x i64>
708 %e = sext <16 x i32> %c to <16 x i64>
709 %f = mul <16 x i64> %d, %e
713 define <8 x i16> @vwmul_vx_v8i16_i8(ptr %x, ptr %y) {
714 ; CHECK-LABEL: vwmul_vx_v8i16_i8:
716 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
717 ; CHECK-NEXT: vle8.v v9, (a0)
718 ; CHECK-NEXT: lb a0, 0(a1)
719 ; CHECK-NEXT: vwmul.vx v8, v9, a0
721 %a = load <8 x i8>, ptr %x
723 %c = sext i8 %b to i16
724 %d = insertelement <8 x i16> poison, i16 %c, i32 0
725 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
726 %f = sext <8 x i8> %a to <8 x i16>
727 %g = mul <8 x i16> %e, %f
731 define <8 x i16> @vwmul_vx_v8i16_i16(ptr %x, ptr %y) {
732 ; CHECK-LABEL: vwmul_vx_v8i16_i16:
734 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
735 ; CHECK-NEXT: vle8.v v8, (a0)
736 ; CHECK-NEXT: lh a0, 0(a1)
737 ; CHECK-NEXT: vsext.vf2 v9, v8
738 ; CHECK-NEXT: vmul.vx v8, v9, a0
740 %a = load <8 x i8>, ptr %x
741 %b = load i16, ptr %y
742 %d = insertelement <8 x i16> poison, i16 %b, i32 0
743 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
744 %f = sext <8 x i8> %a to <8 x i16>
745 %g = mul <8 x i16> %e, %f
749 define <4 x i32> @vwmul_vx_v4i32_i8(ptr %x, ptr %y) {
750 ; CHECK-LABEL: vwmul_vx_v4i32_i8:
752 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
753 ; CHECK-NEXT: vle16.v v9, (a0)
754 ; CHECK-NEXT: lb a0, 0(a1)
755 ; CHECK-NEXT: vwmul.vx v8, v9, a0
757 %a = load <4 x i16>, ptr %x
759 %c = sext i8 %b to i32
760 %d = insertelement <4 x i32> poison, i32 %c, i32 0
761 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
762 %f = sext <4 x i16> %a to <4 x i32>
763 %g = mul <4 x i32> %e, %f
767 define <4 x i32> @vwmul_vx_v4i32_i16(ptr %x, ptr %y) {
768 ; CHECK-LABEL: vwmul_vx_v4i32_i16:
770 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
771 ; CHECK-NEXT: vle16.v v9, (a0)
772 ; CHECK-NEXT: lh a0, 0(a1)
773 ; CHECK-NEXT: vwmul.vx v8, v9, a0
775 %a = load <4 x i16>, ptr %x
776 %b = load i16, ptr %y
777 %c = sext i16 %b to i32
778 %d = insertelement <4 x i32> poison, i32 %c, i32 0
779 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
780 %f = sext <4 x i16> %a to <4 x i32>
781 %g = mul <4 x i32> %e, %f
785 define <4 x i32> @vwmul_vx_v4i32_i32(ptr %x, ptr %y) {
786 ; CHECK-LABEL: vwmul_vx_v4i32_i32:
788 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
789 ; CHECK-NEXT: vle16.v v8, (a0)
790 ; CHECK-NEXT: lw a0, 0(a1)
791 ; CHECK-NEXT: vsext.vf2 v9, v8
792 ; CHECK-NEXT: vmul.vx v8, v9, a0
794 %a = load <4 x i16>, ptr %x
795 %b = load i32, ptr %y
796 %d = insertelement <4 x i32> poison, i32 %b, i32 0
797 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
798 %f = sext <4 x i16> %a to <4 x i32>
799 %g = mul <4 x i32> %e, %f
803 define <2 x i64> @vwmul_vx_v2i64_i8(ptr %x, ptr %y) {
804 ; CHECK-LABEL: vwmul_vx_v2i64_i8:
806 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
807 ; CHECK-NEXT: vle32.v v9, (a0)
808 ; CHECK-NEXT: lb a0, 0(a1)
809 ; CHECK-NEXT: vwmul.vx v8, v9, a0
811 %a = load <2 x i32>, ptr %x
813 %c = sext i8 %b to i64
814 %d = insertelement <2 x i64> poison, i64 %c, i64 0
815 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
816 %f = sext <2 x i32> %a to <2 x i64>
817 %g = mul <2 x i64> %e, %f
821 define <2 x i64> @vwmul_vx_v2i64_i16(ptr %x, ptr %y) {
822 ; CHECK-LABEL: vwmul_vx_v2i64_i16:
824 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
825 ; CHECK-NEXT: vle32.v v9, (a0)
826 ; CHECK-NEXT: lh a0, 0(a1)
827 ; CHECK-NEXT: vwmul.vx v8, v9, a0
829 %a = load <2 x i32>, ptr %x
830 %b = load i16, ptr %y
831 %c = sext i16 %b to i64
832 %d = insertelement <2 x i64> poison, i64 %c, i64 0
833 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
834 %f = sext <2 x i32> %a to <2 x i64>
835 %g = mul <2 x i64> %e, %f
839 define <2 x i64> @vwmul_vx_v2i64_i32(ptr %x, ptr %y) {
840 ; CHECK-LABEL: vwmul_vx_v2i64_i32:
842 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
843 ; CHECK-NEXT: vle32.v v9, (a0)
844 ; CHECK-NEXT: lw a0, 0(a1)
845 ; CHECK-NEXT: vwmul.vx v8, v9, a0
847 %a = load <2 x i32>, ptr %x
848 %b = load i32, ptr %y
849 %c = sext i32 %b to i64
850 %d = insertelement <2 x i64> poison, i64 %c, i64 0
851 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
852 %f = sext <2 x i32> %a to <2 x i64>
853 %g = mul <2 x i64> %e, %f
857 define <2 x i64> @vwmul_vx_v2i64_i64(ptr %x, ptr %y) {
858 ; RV32-LABEL: vwmul_vx_v2i64_i64:
860 ; RV32-NEXT: addi sp, sp, -16
861 ; RV32-NEXT: .cfi_def_cfa_offset 16
862 ; RV32-NEXT: lw a2, 0(a1)
863 ; RV32-NEXT: lw a1, 4(a1)
864 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
865 ; RV32-NEXT: vle32.v v8, (a0)
866 ; RV32-NEXT: sw a2, 8(sp)
867 ; RV32-NEXT: sw a1, 12(sp)
868 ; RV32-NEXT: addi a0, sp, 8
869 ; RV32-NEXT: vlse64.v v9, (a0), zero
870 ; RV32-NEXT: vsext.vf2 v10, v8
871 ; RV32-NEXT: vmul.vv v8, v9, v10
872 ; RV32-NEXT: addi sp, sp, 16
873 ; RV32-NEXT: .cfi_def_cfa_offset 0
876 ; RV64-LABEL: vwmul_vx_v2i64_i64:
878 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
879 ; RV64-NEXT: vle32.v v8, (a0)
880 ; RV64-NEXT: ld a0, 0(a1)
881 ; RV64-NEXT: vsext.vf2 v9, v8
882 ; RV64-NEXT: vmul.vx v8, v9, a0
884 %a = load <2 x i32>, ptr %x
885 %b = load i64, ptr %y
886 %d = insertelement <2 x i64> poison, i64 %b, i64 0
887 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
888 %f = sext <2 x i32> %a to <2 x i64>
889 %g = mul <2 x i64> %e, %f
893 define <2 x i16> @vwmul_v2i16_multiuse(ptr %x, ptr %y, ptr %z, ptr %w) {
894 ; CHECK-LABEL: vwmul_v2i16_multiuse:
896 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
897 ; CHECK-NEXT: vle8.v v8, (a0)
898 ; CHECK-NEXT: vle8.v v9, (a1)
899 ; CHECK-NEXT: vle8.v v10, (a2)
900 ; CHECK-NEXT: vle8.v v11, (a3)
901 ; CHECK-NEXT: vsext.vf2 v12, v8
902 ; CHECK-NEXT: vsext.vf2 v8, v9
903 ; CHECK-NEXT: vsext.vf2 v9, v10
904 ; CHECK-NEXT: vsext.vf2 v10, v11
905 ; CHECK-NEXT: vmul.vv v11, v12, v10
906 ; CHECK-NEXT: vmul.vv v10, v8, v10
907 ; CHECK-NEXT: vdivu.vv v8, v8, v9
908 ; CHECK-NEXT: vor.vv v9, v11, v10
909 ; CHECK-NEXT: vor.vv v8, v9, v8
911 %a = load <2 x i8>, ptr %x
912 %b = load <2 x i8>, ptr %y
913 %c = load <2 x i8>, ptr %z
914 %d = load <2 x i8>, ptr %w
916 %as = sext <2 x i8> %a to <2 x i16>
917 %bs = sext <2 x i8> %b to <2 x i16>
918 %cs = sext <2 x i8> %c to <2 x i16>
919 %ds = sext <2 x i8> %d to <2 x i16>
921 %e = mul <2 x i16> %as, %ds
922 %f = mul <2 x i16> %bs, %ds ; shares 1 use with %e
923 %g = udiv <2 x i16> %bs, %cs ; shares 1 use with %f, and no uses with %e
925 %h = or <2 x i16> %e, %f
926 %i = or <2 x i16> %h, %g