1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
3 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
5 define <2 x i16> @vwmulu_v2i16(ptr %x, ptr %y) {
6 ; CHECK-LABEL: vwmulu_v2i16:
8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
9 ; CHECK-NEXT: vle8.v v9, (a0)
10 ; CHECK-NEXT: vle8.v v10, (a1)
11 ; CHECK-NEXT: vwmulu.vv v8, v9, v10
13 %a = load <2 x i8>, ptr %x
14 %b = load <2 x i8>, ptr %y
15 %c = zext <2 x i8> %a to <2 x i16>
16 %d = zext <2 x i8> %b to <2 x i16>
17 %e = mul <2 x i16> %c, %d
21 define <4 x i16> @vwmulu_v4i16(ptr %x, ptr %y) {
22 ; CHECK-LABEL: vwmulu_v4i16:
24 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
25 ; CHECK-NEXT: vle8.v v9, (a0)
26 ; CHECK-NEXT: vle8.v v10, (a1)
27 ; CHECK-NEXT: vwmulu.vv v8, v9, v10
29 %a = load <4 x i8>, ptr %x
30 %b = load <4 x i8>, ptr %y
31 %c = zext <4 x i8> %a to <4 x i16>
32 %d = zext <4 x i8> %b to <4 x i16>
33 %e = mul <4 x i16> %c, %d
37 define <2 x i32> @vwmulu_v2i32(ptr %x, ptr %y) {
38 ; CHECK-LABEL: vwmulu_v2i32:
40 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
41 ; CHECK-NEXT: vle16.v v9, (a0)
42 ; CHECK-NEXT: vle16.v v10, (a1)
43 ; CHECK-NEXT: vwmulu.vv v8, v9, v10
45 %a = load <2 x i16>, ptr %x
46 %b = load <2 x i16>, ptr %y
47 %c = zext <2 x i16> %a to <2 x i32>
48 %d = zext <2 x i16> %b to <2 x i32>
49 %e = mul <2 x i32> %c, %d
53 define <8 x i16> @vwmulu_v8i16(ptr %x, ptr %y) {
54 ; CHECK-LABEL: vwmulu_v8i16:
56 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
57 ; CHECK-NEXT: vle8.v v9, (a0)
58 ; CHECK-NEXT: vle8.v v10, (a1)
59 ; CHECK-NEXT: vwmulu.vv v8, v9, v10
61 %a = load <8 x i8>, ptr %x
62 %b = load <8 x i8>, ptr %y
63 %c = zext <8 x i8> %a to <8 x i16>
64 %d = zext <8 x i8> %b to <8 x i16>
65 %e = mul <8 x i16> %c, %d
69 define <4 x i32> @vwmulu_v4i32(ptr %x, ptr %y) {
70 ; CHECK-LABEL: vwmulu_v4i32:
72 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
73 ; CHECK-NEXT: vle16.v v9, (a0)
74 ; CHECK-NEXT: vle16.v v10, (a1)
75 ; CHECK-NEXT: vwmulu.vv v8, v9, v10
77 %a = load <4 x i16>, ptr %x
78 %b = load <4 x i16>, ptr %y
79 %c = zext <4 x i16> %a to <4 x i32>
80 %d = zext <4 x i16> %b to <4 x i32>
81 %e = mul <4 x i32> %c, %d
85 define <2 x i64> @vwmulu_v2i64(ptr %x, ptr %y) {
86 ; CHECK-LABEL: vwmulu_v2i64:
88 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
89 ; CHECK-NEXT: vle32.v v9, (a0)
90 ; CHECK-NEXT: vle32.v v10, (a1)
91 ; CHECK-NEXT: vwmulu.vv v8, v9, v10
93 %a = load <2 x i32>, ptr %x
94 %b = load <2 x i32>, ptr %y
95 %c = zext <2 x i32> %a to <2 x i64>
96 %d = zext <2 x i32> %b to <2 x i64>
97 %e = mul <2 x i64> %c, %d
101 define <16 x i16> @vwmulu_v16i16(ptr %x, ptr %y) {
102 ; CHECK-LABEL: vwmulu_v16i16:
104 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
105 ; CHECK-NEXT: vle8.v v10, (a0)
106 ; CHECK-NEXT: vle8.v v11, (a1)
107 ; CHECK-NEXT: vwmulu.vv v8, v10, v11
109 %a = load <16 x i8>, ptr %x
110 %b = load <16 x i8>, ptr %y
111 %c = zext <16 x i8> %a to <16 x i16>
112 %d = zext <16 x i8> %b to <16 x i16>
113 %e = mul <16 x i16> %c, %d
117 define <8 x i32> @vwmulu_v8i32(ptr %x, ptr %y) {
118 ; CHECK-LABEL: vwmulu_v8i32:
120 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
121 ; CHECK-NEXT: vle16.v v10, (a0)
122 ; CHECK-NEXT: vle16.v v11, (a1)
123 ; CHECK-NEXT: vwmulu.vv v8, v10, v11
125 %a = load <8 x i16>, ptr %x
126 %b = load <8 x i16>, ptr %y
127 %c = zext <8 x i16> %a to <8 x i32>
128 %d = zext <8 x i16> %b to <8 x i32>
129 %e = mul <8 x i32> %c, %d
133 define <4 x i64> @vwmulu_v4i64(ptr %x, ptr %y) {
134 ; CHECK-LABEL: vwmulu_v4i64:
136 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
137 ; CHECK-NEXT: vle32.v v10, (a0)
138 ; CHECK-NEXT: vle32.v v11, (a1)
139 ; CHECK-NEXT: vwmulu.vv v8, v10, v11
141 %a = load <4 x i32>, ptr %x
142 %b = load <4 x i32>, ptr %y
143 %c = zext <4 x i32> %a to <4 x i64>
144 %d = zext <4 x i32> %b to <4 x i64>
145 %e = mul <4 x i64> %c, %d
149 define <32 x i16> @vwmulu_v32i16(ptr %x, ptr %y) {
150 ; CHECK-LABEL: vwmulu_v32i16:
152 ; CHECK-NEXT: li a2, 32
153 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
154 ; CHECK-NEXT: vle8.v v12, (a0)
155 ; CHECK-NEXT: vle8.v v14, (a1)
156 ; CHECK-NEXT: vwmulu.vv v8, v12, v14
158 %a = load <32 x i8>, ptr %x
159 %b = load <32 x i8>, ptr %y
160 %c = zext <32 x i8> %a to <32 x i16>
161 %d = zext <32 x i8> %b to <32 x i16>
162 %e = mul <32 x i16> %c, %d
166 define <16 x i32> @vwmulu_v16i32(ptr %x, ptr %y) {
167 ; CHECK-LABEL: vwmulu_v16i32:
169 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
170 ; CHECK-NEXT: vle16.v v12, (a0)
171 ; CHECK-NEXT: vle16.v v14, (a1)
172 ; CHECK-NEXT: vwmulu.vv v8, v12, v14
174 %a = load <16 x i16>, ptr %x
175 %b = load <16 x i16>, ptr %y
176 %c = zext <16 x i16> %a to <16 x i32>
177 %d = zext <16 x i16> %b to <16 x i32>
178 %e = mul <16 x i32> %c, %d
182 define <8 x i64> @vwmulu_v8i64(ptr %x, ptr %y) {
183 ; CHECK-LABEL: vwmulu_v8i64:
185 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
186 ; CHECK-NEXT: vle32.v v12, (a0)
187 ; CHECK-NEXT: vle32.v v14, (a1)
188 ; CHECK-NEXT: vwmulu.vv v8, v12, v14
190 %a = load <8 x i32>, ptr %x
191 %b = load <8 x i32>, ptr %y
192 %c = zext <8 x i32> %a to <8 x i64>
193 %d = zext <8 x i32> %b to <8 x i64>
194 %e = mul <8 x i64> %c, %d
198 define <64 x i16> @vwmulu_v64i16(ptr %x, ptr %y) {
199 ; CHECK-LABEL: vwmulu_v64i16:
201 ; CHECK-NEXT: li a2, 64
202 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
203 ; CHECK-NEXT: vle8.v v16, (a0)
204 ; CHECK-NEXT: vle8.v v20, (a1)
205 ; CHECK-NEXT: vwmulu.vv v8, v16, v20
207 %a = load <64 x i8>, ptr %x
208 %b = load <64 x i8>, ptr %y
209 %c = zext <64 x i8> %a to <64 x i16>
210 %d = zext <64 x i8> %b to <64 x i16>
211 %e = mul <64 x i16> %c, %d
215 define <32 x i32> @vwmulu_v32i32(ptr %x, ptr %y) {
216 ; CHECK-LABEL: vwmulu_v32i32:
218 ; CHECK-NEXT: li a2, 32
219 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
220 ; CHECK-NEXT: vle16.v v16, (a0)
221 ; CHECK-NEXT: vle16.v v20, (a1)
222 ; CHECK-NEXT: vwmulu.vv v8, v16, v20
224 %a = load <32 x i16>, ptr %x
225 %b = load <32 x i16>, ptr %y
226 %c = zext <32 x i16> %a to <32 x i32>
227 %d = zext <32 x i16> %b to <32 x i32>
228 %e = mul <32 x i32> %c, %d
232 define <16 x i64> @vwmulu_v16i64(ptr %x, ptr %y) {
233 ; CHECK-LABEL: vwmulu_v16i64:
235 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
236 ; CHECK-NEXT: vle32.v v16, (a0)
237 ; CHECK-NEXT: vle32.v v20, (a1)
238 ; CHECK-NEXT: vwmulu.vv v8, v16, v20
240 %a = load <16 x i32>, ptr %x
241 %b = load <16 x i32>, ptr %y
242 %c = zext <16 x i32> %a to <16 x i64>
243 %d = zext <16 x i32> %b to <16 x i64>
244 %e = mul <16 x i64> %c, %d
248 define <128 x i16> @vwmulu_v128i16(ptr %x, ptr %y) {
249 ; CHECK-LABEL: vwmulu_v128i16:
251 ; CHECK-NEXT: addi sp, sp, -16
252 ; CHECK-NEXT: .cfi_def_cfa_offset 16
253 ; CHECK-NEXT: csrr a2, vlenb
254 ; CHECK-NEXT: slli a2, a2, 4
255 ; CHECK-NEXT: sub sp, sp, a2
256 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
257 ; CHECK-NEXT: li a2, 128
258 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
259 ; CHECK-NEXT: vle8.v v8, (a0)
260 ; CHECK-NEXT: addi a0, sp, 16
261 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
262 ; CHECK-NEXT: vle8.v v0, (a1)
263 ; CHECK-NEXT: li a0, 64
264 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
265 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
266 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
267 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
268 ; CHECK-NEXT: vmv4r.v v24, v8
269 ; CHECK-NEXT: vwmulu.vv v8, v16, v24
270 ; CHECK-NEXT: csrr a0, vlenb
271 ; CHECK-NEXT: slli a0, a0, 3
272 ; CHECK-NEXT: add a0, sp, a0
273 ; CHECK-NEXT: addi a0, a0, 16
274 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
275 ; CHECK-NEXT: addi a0, sp, 16
276 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
277 ; CHECK-NEXT: vwmulu.vv v8, v16, v0
278 ; CHECK-NEXT: csrr a0, vlenb
279 ; CHECK-NEXT: slli a0, a0, 3
280 ; CHECK-NEXT: add a0, sp, a0
281 ; CHECK-NEXT: addi a0, a0, 16
282 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
283 ; CHECK-NEXT: csrr a0, vlenb
284 ; CHECK-NEXT: slli a0, a0, 4
285 ; CHECK-NEXT: add sp, sp, a0
286 ; CHECK-NEXT: addi sp, sp, 16
288 %a = load <128 x i8>, ptr %x
289 %b = load <128 x i8>, ptr %y
290 %c = zext <128 x i8> %a to <128 x i16>
291 %d = zext <128 x i8> %b to <128 x i16>
292 %e = mul <128 x i16> %c, %d
296 define <64 x i32> @vwmulu_v64i32(ptr %x, ptr %y) {
297 ; CHECK-LABEL: vwmulu_v64i32:
299 ; CHECK-NEXT: addi sp, sp, -16
300 ; CHECK-NEXT: .cfi_def_cfa_offset 16
301 ; CHECK-NEXT: csrr a2, vlenb
302 ; CHECK-NEXT: slli a2, a2, 4
303 ; CHECK-NEXT: sub sp, sp, a2
304 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
305 ; CHECK-NEXT: li a2, 64
306 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
307 ; CHECK-NEXT: vle16.v v8, (a0)
308 ; CHECK-NEXT: addi a0, sp, 16
309 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
310 ; CHECK-NEXT: vle16.v v0, (a1)
311 ; CHECK-NEXT: li a0, 32
312 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
313 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
314 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
315 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
316 ; CHECK-NEXT: vmv4r.v v24, v8
317 ; CHECK-NEXT: vwmulu.vv v8, v16, v24
318 ; CHECK-NEXT: csrr a0, vlenb
319 ; CHECK-NEXT: slli a0, a0, 3
320 ; CHECK-NEXT: add a0, sp, a0
321 ; CHECK-NEXT: addi a0, a0, 16
322 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
323 ; CHECK-NEXT: addi a0, sp, 16
324 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
325 ; CHECK-NEXT: vwmulu.vv v8, v16, v0
326 ; CHECK-NEXT: csrr a0, vlenb
327 ; CHECK-NEXT: slli a0, a0, 3
328 ; CHECK-NEXT: add a0, sp, a0
329 ; CHECK-NEXT: addi a0, a0, 16
330 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
331 ; CHECK-NEXT: csrr a0, vlenb
332 ; CHECK-NEXT: slli a0, a0, 4
333 ; CHECK-NEXT: add sp, sp, a0
334 ; CHECK-NEXT: addi sp, sp, 16
336 %a = load <64 x i16>, ptr %x
337 %b = load <64 x i16>, ptr %y
338 %c = zext <64 x i16> %a to <64 x i32>
339 %d = zext <64 x i16> %b to <64 x i32>
340 %e = mul <64 x i32> %c, %d
344 define <32 x i64> @vwmulu_v32i64(ptr %x, ptr %y) {
345 ; CHECK-LABEL: vwmulu_v32i64:
347 ; CHECK-NEXT: addi sp, sp, -16
348 ; CHECK-NEXT: .cfi_def_cfa_offset 16
349 ; CHECK-NEXT: csrr a2, vlenb
350 ; CHECK-NEXT: slli a2, a2, 4
351 ; CHECK-NEXT: sub sp, sp, a2
352 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
353 ; CHECK-NEXT: li a2, 32
354 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
355 ; CHECK-NEXT: vle32.v v8, (a0)
356 ; CHECK-NEXT: addi a0, sp, 16
357 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
358 ; CHECK-NEXT: vle32.v v0, (a1)
359 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
360 ; CHECK-NEXT: vslidedown.vi v16, v8, 16
361 ; CHECK-NEXT: vslidedown.vi v8, v0, 16
362 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
363 ; CHECK-NEXT: vmv4r.v v24, v8
364 ; CHECK-NEXT: vwmulu.vv v8, v16, v24
365 ; CHECK-NEXT: csrr a0, vlenb
366 ; CHECK-NEXT: slli a0, a0, 3
367 ; CHECK-NEXT: add a0, sp, a0
368 ; CHECK-NEXT: addi a0, a0, 16
369 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
370 ; CHECK-NEXT: addi a0, sp, 16
371 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
372 ; CHECK-NEXT: vwmulu.vv v8, v16, v0
373 ; CHECK-NEXT: csrr a0, vlenb
374 ; CHECK-NEXT: slli a0, a0, 3
375 ; CHECK-NEXT: add a0, sp, a0
376 ; CHECK-NEXT: addi a0, a0, 16
377 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
378 ; CHECK-NEXT: csrr a0, vlenb
379 ; CHECK-NEXT: slli a0, a0, 4
380 ; CHECK-NEXT: add sp, sp, a0
381 ; CHECK-NEXT: addi sp, sp, 16
383 %a = load <32 x i32>, ptr %x
384 %b = load <32 x i32>, ptr %y
385 %c = zext <32 x i32> %a to <32 x i64>
386 %d = zext <32 x i32> %b to <32 x i64>
387 %e = mul <32 x i64> %c, %d
391 define <2 x i32> @vwmulu_v2i32_v2i8(ptr %x, ptr %y) {
392 ; CHECK-LABEL: vwmulu_v2i32_v2i8:
394 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
395 ; CHECK-NEXT: vle8.v v8, (a1)
396 ; CHECK-NEXT: vle8.v v9, (a0)
397 ; CHECK-NEXT: vzext.vf2 v10, v8
398 ; CHECK-NEXT: vzext.vf2 v11, v9
399 ; CHECK-NEXT: vwmulu.vv v8, v11, v10
401 %a = load <2 x i8>, ptr %x
402 %b = load <2 x i8>, ptr %y
403 %c = zext <2 x i8> %a to <2 x i32>
404 %d = zext <2 x i8> %b to <2 x i32>
405 %e = mul <2 x i32> %c, %d
409 define <4 x i32> @vwmulu_v4i32_v4i8_v4i16(ptr %x, ptr %y) {
410 ; CHECK-LABEL: vwmulu_v4i32_v4i8_v4i16:
412 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
413 ; CHECK-NEXT: vle8.v v8, (a0)
414 ; CHECK-NEXT: vle16.v v9, (a1)
415 ; CHECK-NEXT: vzext.vf2 v10, v8
416 ; CHECK-NEXT: vwmulu.vv v8, v10, v9
418 %a = load <4 x i8>, ptr %x
419 %b = load <4 x i16>, ptr %y
420 %c = zext <4 x i8> %a to <4 x i32>
421 %d = zext <4 x i16> %b to <4 x i32>
422 %e = mul <4 x i32> %c, %d
426 define <4 x i64> @vwmulu_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
427 ; CHECK-LABEL: vwmulu_v4i64_v4i32_v4i8:
429 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
430 ; CHECK-NEXT: vle8.v v8, (a1)
431 ; CHECK-NEXT: vle32.v v10, (a0)
432 ; CHECK-NEXT: vzext.vf4 v11, v8
433 ; CHECK-NEXT: vwmulu.vv v8, v10, v11
435 %a = load <4 x i32>, ptr %x
436 %b = load <4 x i8>, ptr %y
437 %c = zext <4 x i32> %a to <4 x i64>
438 %d = zext <4 x i8> %b to <4 x i64>
439 %e = mul <4 x i64> %c, %d
443 define <2 x i16> @vwmulu_vx_v2i16(ptr %x, i8 %y) {
444 ; CHECK-LABEL: vwmulu_vx_v2i16:
446 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
447 ; CHECK-NEXT: vle8.v v9, (a0)
448 ; CHECK-NEXT: vwmulu.vx v8, v9, a1
450 %a = load <2 x i8>, ptr %x
451 %b = insertelement <2 x i8> poison, i8 %y, i32 0
452 %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
453 %d = zext <2 x i8> %a to <2 x i16>
454 %e = zext <2 x i8> %c to <2 x i16>
455 %f = mul <2 x i16> %d, %e
459 define <4 x i16> @vwmulu_vx_v4i16(ptr %x, i8 %y) {
460 ; CHECK-LABEL: vwmulu_vx_v4i16:
462 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
463 ; CHECK-NEXT: vle8.v v9, (a0)
464 ; CHECK-NEXT: vwmulu.vx v8, v9, a1
466 %a = load <4 x i8>, ptr %x
467 %b = insertelement <4 x i8> poison, i8 %y, i32 0
468 %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
469 %d = zext <4 x i8> %a to <4 x i16>
470 %e = zext <4 x i8> %c to <4 x i16>
471 %f = mul <4 x i16> %d, %e
475 define <2 x i32> @vwmulu_vx_v2i32(ptr %x, i16 %y) {
476 ; CHECK-LABEL: vwmulu_vx_v2i32:
478 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
479 ; CHECK-NEXT: vle16.v v9, (a0)
480 ; CHECK-NEXT: vwmulu.vx v8, v9, a1
482 %a = load <2 x i16>, ptr %x
483 %b = insertelement <2 x i16> poison, i16 %y, i32 0
484 %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
485 %d = zext <2 x i16> %a to <2 x i32>
486 %e = zext <2 x i16> %c to <2 x i32>
487 %f = mul <2 x i32> %d, %e
491 define <8 x i16> @vwmulu_vx_v8i16(ptr %x, i8 %y) {
492 ; CHECK-LABEL: vwmulu_vx_v8i16:
494 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
495 ; CHECK-NEXT: vle8.v v9, (a0)
496 ; CHECK-NEXT: vwmulu.vx v8, v9, a1
498 %a = load <8 x i8>, ptr %x
499 %b = insertelement <8 x i8> poison, i8 %y, i32 0
500 %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
501 %d = zext <8 x i8> %a to <8 x i16>
502 %e = zext <8 x i8> %c to <8 x i16>
503 %f = mul <8 x i16> %d, %e
507 define <4 x i32> @vwmulu_vx_v4i32(ptr %x, i16 %y) {
508 ; CHECK-LABEL: vwmulu_vx_v4i32:
510 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
511 ; CHECK-NEXT: vle16.v v9, (a0)
512 ; CHECK-NEXT: vwmulu.vx v8, v9, a1
514 %a = load <4 x i16>, ptr %x
515 %b = insertelement <4 x i16> poison, i16 %y, i32 0
516 %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
517 %d = zext <4 x i16> %a to <4 x i32>
518 %e = zext <4 x i16> %c to <4 x i32>
519 %f = mul <4 x i32> %d, %e
523 define <2 x i64> @vwmulu_vx_v2i64(ptr %x, i32 %y) {
524 ; CHECK-LABEL: vwmulu_vx_v2i64:
526 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
527 ; CHECK-NEXT: vle32.v v9, (a0)
528 ; CHECK-NEXT: vwmulu.vx v8, v9, a1
530 %a = load <2 x i32>, ptr %x
531 %b = insertelement <2 x i32> poison, i32 %y, i64 0
532 %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
533 %d = zext <2 x i32> %a to <2 x i64>
534 %e = zext <2 x i32> %c to <2 x i64>
535 %f = mul <2 x i64> %d, %e
539 define <16 x i16> @vwmulu_vx_v16i16(ptr %x, i8 %y) {
540 ; CHECK-LABEL: vwmulu_vx_v16i16:
542 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
543 ; CHECK-NEXT: vle8.v v10, (a0)
544 ; CHECK-NEXT: vwmulu.vx v8, v10, a1
546 %a = load <16 x i8>, ptr %x
547 %b = insertelement <16 x i8> poison, i8 %y, i32 0
548 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
549 %d = zext <16 x i8> %a to <16 x i16>
550 %e = zext <16 x i8> %c to <16 x i16>
551 %f = mul <16 x i16> %d, %e
555 define <8 x i32> @vwmulu_vx_v8i32(ptr %x, i16 %y) {
556 ; CHECK-LABEL: vwmulu_vx_v8i32:
558 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
559 ; CHECK-NEXT: vle16.v v10, (a0)
560 ; CHECK-NEXT: vwmulu.vx v8, v10, a1
562 %a = load <8 x i16>, ptr %x
563 %b = insertelement <8 x i16> poison, i16 %y, i32 0
564 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
565 %d = zext <8 x i16> %a to <8 x i32>
566 %e = zext <8 x i16> %c to <8 x i32>
567 %f = mul <8 x i32> %d, %e
571 define <4 x i64> @vwmulu_vx_v4i64(ptr %x, i32 %y) {
572 ; CHECK-LABEL: vwmulu_vx_v4i64:
574 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
575 ; CHECK-NEXT: vle32.v v10, (a0)
576 ; CHECK-NEXT: vwmulu.vx v8, v10, a1
578 %a = load <4 x i32>, ptr %x
579 %b = insertelement <4 x i32> poison, i32 %y, i64 0
580 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
581 %d = zext <4 x i32> %a to <4 x i64>
582 %e = zext <4 x i32> %c to <4 x i64>
583 %f = mul <4 x i64> %d, %e
587 define <32 x i16> @vwmulu_vx_v32i16(ptr %x, i8 %y) {
588 ; CHECK-LABEL: vwmulu_vx_v32i16:
590 ; CHECK-NEXT: li a2, 32
591 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
592 ; CHECK-NEXT: vle8.v v12, (a0)
593 ; CHECK-NEXT: vwmulu.vx v8, v12, a1
595 %a = load <32 x i8>, ptr %x
596 %b = insertelement <32 x i8> poison, i8 %y, i32 0
597 %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
598 %d = zext <32 x i8> %a to <32 x i16>
599 %e = zext <32 x i8> %c to <32 x i16>
600 %f = mul <32 x i16> %d, %e
604 define <16 x i32> @vwmulu_vx_v16i32(ptr %x, i16 %y) {
605 ; CHECK-LABEL: vwmulu_vx_v16i32:
607 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
608 ; CHECK-NEXT: vle16.v v12, (a0)
609 ; CHECK-NEXT: vwmulu.vx v8, v12, a1
611 %a = load <16 x i16>, ptr %x
612 %b = insertelement <16 x i16> poison, i16 %y, i32 0
613 %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
614 %d = zext <16 x i16> %a to <16 x i32>
615 %e = zext <16 x i16> %c to <16 x i32>
616 %f = mul <16 x i32> %d, %e
620 define <8 x i64> @vwmulu_vx_v8i64(ptr %x, i32 %y) {
621 ; CHECK-LABEL: vwmulu_vx_v8i64:
623 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
624 ; CHECK-NEXT: vle32.v v12, (a0)
625 ; CHECK-NEXT: vwmulu.vx v8, v12, a1
627 %a = load <8 x i32>, ptr %x
628 %b = insertelement <8 x i32> poison, i32 %y, i64 0
629 %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
630 %d = zext <8 x i32> %a to <8 x i64>
631 %e = zext <8 x i32> %c to <8 x i64>
632 %f = mul <8 x i64> %d, %e
636 define <64 x i16> @vwmulu_vx_v64i16(ptr %x, i8 %y) {
637 ; CHECK-LABEL: vwmulu_vx_v64i16:
639 ; CHECK-NEXT: li a2, 64
640 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
641 ; CHECK-NEXT: vle8.v v16, (a0)
642 ; CHECK-NEXT: vwmulu.vx v8, v16, a1
644 %a = load <64 x i8>, ptr %x
645 %b = insertelement <64 x i8> poison, i8 %y, i32 0
646 %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
647 %d = zext <64 x i8> %a to <64 x i16>
648 %e = zext <64 x i8> %c to <64 x i16>
649 %f = mul <64 x i16> %d, %e
653 define <32 x i32> @vwmulu_vx_v32i32(ptr %x, i16 %y) {
654 ; CHECK-LABEL: vwmulu_vx_v32i32:
656 ; CHECK-NEXT: li a2, 32
657 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
658 ; CHECK-NEXT: vle16.v v16, (a0)
659 ; CHECK-NEXT: vwmulu.vx v8, v16, a1
661 %a = load <32 x i16>, ptr %x
662 %b = insertelement <32 x i16> poison, i16 %y, i32 0
663 %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
664 %d = zext <32 x i16> %a to <32 x i32>
665 %e = zext <32 x i16> %c to <32 x i32>
666 %f = mul <32 x i32> %d, %e
670 define <16 x i64> @vwmulu_vx_v16i64(ptr %x, i32 %y) {
671 ; CHECK-LABEL: vwmulu_vx_v16i64:
673 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
674 ; CHECK-NEXT: vle32.v v16, (a0)
675 ; CHECK-NEXT: vwmulu.vx v8, v16, a1
677 %a = load <16 x i32>, ptr %x
678 %b = insertelement <16 x i32> poison, i32 %y, i64 0
679 %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
680 %d = zext <16 x i32> %a to <16 x i64>
681 %e = zext <16 x i32> %c to <16 x i64>
682 %f = mul <16 x i64> %d, %e
686 define <8 x i16> @vwmulu_vx_v8i16_i8(ptr %x, ptr %y) {
687 ; CHECK-LABEL: vwmulu_vx_v8i16_i8:
689 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
690 ; CHECK-NEXT: vle8.v v9, (a0)
691 ; CHECK-NEXT: lbu a0, 0(a1)
692 ; CHECK-NEXT: vwmulu.vx v8, v9, a0
694 %a = load <8 x i8>, ptr %x
696 %c = zext i8 %b to i16
697 %d = insertelement <8 x i16> poison, i16 %c, i32 0
698 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
699 %f = zext <8 x i8> %a to <8 x i16>
700 %g = mul <8 x i16> %e, %f
704 define <8 x i16> @vwmulu_vx_v8i16_i16(ptr %x, ptr %y) {
705 ; CHECK-LABEL: vwmulu_vx_v8i16_i16:
707 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
708 ; CHECK-NEXT: vle8.v v8, (a0)
709 ; CHECK-NEXT: lh a0, 0(a1)
710 ; CHECK-NEXT: vzext.vf2 v9, v8
711 ; CHECK-NEXT: vmul.vx v8, v9, a0
713 %a = load <8 x i8>, ptr %x
714 %b = load i16, ptr %y
715 %d = insertelement <8 x i16> poison, i16 %b, i32 0
716 %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
717 %f = zext <8 x i8> %a to <8 x i16>
718 %g = mul <8 x i16> %e, %f
722 define <4 x i32> @vwmulu_vx_v4i32_i8(ptr %x, ptr %y) {
723 ; CHECK-LABEL: vwmulu_vx_v4i32_i8:
725 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
726 ; CHECK-NEXT: vle16.v v9, (a0)
727 ; CHECK-NEXT: lbu a0, 0(a1)
728 ; CHECK-NEXT: vwmulu.vx v8, v9, a0
730 %a = load <4 x i16>, ptr %x
732 %c = zext i8 %b to i32
733 %d = insertelement <4 x i32> poison, i32 %c, i32 0
734 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
735 %f = zext <4 x i16> %a to <4 x i32>
736 %g = mul <4 x i32> %e, %f
740 define <4 x i32> @vwmulu_vx_v4i32_i16(ptr %x, ptr %y) {
741 ; CHECK-LABEL: vwmulu_vx_v4i32_i16:
743 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
744 ; CHECK-NEXT: vle16.v v9, (a0)
745 ; CHECK-NEXT: lhu a0, 0(a1)
746 ; CHECK-NEXT: vwmulu.vx v8, v9, a0
748 %a = load <4 x i16>, ptr %x
749 %b = load i16, ptr %y
750 %c = zext i16 %b to i32
751 %d = insertelement <4 x i32> poison, i32 %c, i32 0
752 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
753 %f = zext <4 x i16> %a to <4 x i32>
754 %g = mul <4 x i32> %e, %f
758 define <4 x i32> @vwmulu_vx_v4i32_i32(ptr %x, ptr %y) {
759 ; CHECK-LABEL: vwmulu_vx_v4i32_i32:
761 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
762 ; CHECK-NEXT: vle16.v v8, (a0)
763 ; CHECK-NEXT: lw a0, 0(a1)
764 ; CHECK-NEXT: vzext.vf2 v9, v8
765 ; CHECK-NEXT: vmul.vx v8, v9, a0
767 %a = load <4 x i16>, ptr %x
768 %b = load i32, ptr %y
769 %d = insertelement <4 x i32> poison, i32 %b, i32 0
770 %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
771 %f = zext <4 x i16> %a to <4 x i32>
772 %g = mul <4 x i32> %e, %f
776 define <2 x i64> @vwmulu_vx_v2i64_i8(ptr %x, ptr %y) {
777 ; RV32-LABEL: vwmulu_vx_v2i64_i8:
779 ; RV32-NEXT: addi sp, sp, -16
780 ; RV32-NEXT: .cfi_def_cfa_offset 16
781 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
782 ; RV32-NEXT: lb a1, 0(a1)
783 ; RV32-NEXT: vle32.v v25, (a0)
784 ; RV32-NEXT: srai a0, a1, 31
785 ; RV32-NEXT: sw a1, 8(sp)
786 ; RV32-NEXT: sw a0, 12(sp)
787 ; RV32-NEXT: addi a0, sp, 8
788 ; RV32-NEXT: vlse64.v v26, (a0), zero
789 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
790 ; RV32-NEXT: vzext.vf2 v27, v25
791 ; RV32-NEXT: vmul.vv v8, v26, v27
792 ; RV32-NEXT: addi sp, sp, 16
795 ; RV64-LABEL: vwmulu_vx_v2i64_i8:
797 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
798 ; RV64-NEXT: vle32.v v25, (a0)
799 ; RV64-NEXT: lb a0, 0(a1)
800 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
801 ; RV64-NEXT: vzext.vf2 v26, v25
802 ; RV64-NEXT: vmul.vx v8, v26, a0
804 %a = load <2 x i32>, ptr %x
806 %c = zext i8 %b to i64
807 %d = insertelement <2 x i64> poison, i64 %c, i64 0
808 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
809 %f = zext <2 x i32> %a to <2 x i64>
810 %g = mul <2 x i64> %e, %f
814 define <2 x i64> @vwmulu_vx_v2i64_i16(ptr %x, ptr %y) {
815 ; RV32-LABEL: vwmulu_vx_v2i64_i16:
817 ; RV32-NEXT: addi sp, sp, -16
818 ; RV32-NEXT: .cfi_def_cfa_offset 16
819 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
820 ; RV32-NEXT: lh a1, 0(a1)
821 ; RV32-NEXT: vle32.v v25, (a0)
822 ; RV32-NEXT: srai a0, a1, 31
823 ; RV32-NEXT: sw a1, 8(sp)
824 ; RV32-NEXT: sw a0, 12(sp)
825 ; RV32-NEXT: addi a0, sp, 8
826 ; RV32-NEXT: vlse64.v v26, (a0), zero
827 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
828 ; RV32-NEXT: vzext.vf2 v27, v25
829 ; RV32-NEXT: vmul.vv v8, v26, v27
830 ; RV32-NEXT: addi sp, sp, 16
833 ; RV64-LABEL: vwmulu_vx_v2i64_i16:
835 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
836 ; RV64-NEXT: vle32.v v25, (a0)
837 ; RV64-NEXT: lh a0, 0(a1)
838 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
839 ; RV64-NEXT: vzext.vf2 v26, v25
840 ; RV64-NEXT: vmul.vx v8, v26, a0
842 %a = load <2 x i32>, ptr %x
843 %b = load i16, ptr %y
844 %c = zext i16 %b to i64
845 %d = insertelement <2 x i64> poison, i64 %c, i64 0
846 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
847 %f = zext <2 x i32> %a to <2 x i64>
848 %g = mul <2 x i64> %e, %f
852 define <2 x i64> @vwmulu_vx_v2i64_i32(ptr %x, ptr %y) {
853 ; RV32-LABEL: vwmulu_vx_v2i64_i32:
855 ; RV32-NEXT: addi sp, sp, -16
856 ; RV32-NEXT: .cfi_def_cfa_offset 16
857 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
858 ; RV32-NEXT: lw a1, 0(a1)
859 ; RV32-NEXT: vle32.v v25, (a0)
860 ; RV32-NEXT: srai a0, a1, 31
861 ; RV32-NEXT: sw a1, 8(sp)
862 ; RV32-NEXT: sw a0, 12(sp)
863 ; RV32-NEXT: addi a0, sp, 8
864 ; RV32-NEXT: vlse64.v v26, (a0), zero
865 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
866 ; RV32-NEXT: vzext.vf2 v27, v25
867 ; RV32-NEXT: vmul.vv v8, v26, v27
868 ; RV32-NEXT: addi sp, sp, 16
871 ; RV64-LABEL: vwmulu_vx_v2i64_i32:
873 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
874 ; RV64-NEXT: vle32.v v25, (a0)
875 ; RV64-NEXT: lw a0, 0(a1)
876 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
877 ; RV64-NEXT: vzext.vf2 v26, v25
878 ; RV64-NEXT: vmul.vx v8, v26, a0
880 %a = load <2 x i32>, ptr %x
881 %b = load i32, ptr %y
882 %c = zext i32 %b to i64
883 %d = insertelement <2 x i64> poison, i64 %c, i64 0
884 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
885 %f = zext <2 x i32> %a to <2 x i64>
886 %g = mul <2 x i64> %e, %f
890 define <2 x i64> @vwmulu_vx_v2i64_i64(ptr %x, ptr %y) {
891 ; RV32-LABEL: vwmulu_vx_v2i64_i64:
893 ; RV32-NEXT: addi sp, sp, -16
894 ; RV32-NEXT: .cfi_def_cfa_offset 16
895 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
896 ; RV32-NEXT: lw a2, 4(a1)
897 ; RV32-NEXT: lw a1, 0(a1)
898 ; RV32-NEXT: vle32.v v25, (a0)
899 ; RV32-NEXT: sw a2, 12(sp)
900 ; RV32-NEXT: sw a1, 8(sp)
901 ; RV32-NEXT: addi a0, sp, 8
902 ; RV32-NEXT: vlse64.v v26, (a0), zero
903 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, mu
904 ; RV32-NEXT: vzext.vf2 v27, v25
905 ; RV32-NEXT: vmul.vv v8, v26, v27
906 ; RV32-NEXT: addi sp, sp, 16
909 ; RV64-LABEL: vwmulu_vx_v2i64_i64:
911 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
912 ; RV64-NEXT: vle32.v v25, (a0)
913 ; RV64-NEXT: ld a0, 0(a1)
914 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
915 ; RV64-NEXT: vzext.vf2 v26, v25
916 ; RV64-NEXT: vmul.vx v8, v26, a0
918 %a = load <2 x i32>, ptr %x
919 %b = load i64, ptr %y
920 %d = insertelement <2 x i64> poison, i64 %b, i64 0
921 %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
922 %f = zext <2 x i32> %a to <2 x i64>
923 %g = mul <2 x i64> %e, %f